Skip to content

Commit 32063b0

Browse files
author
Siyuan Feng
authored
[Doc] Quick Start (#17289)
This PR introduces a new quick start tutorial to the documentation.
1 parent b76ebad commit 32063b0

File tree

6 files changed

+204
-1
lines changed

6 files changed

+204
-1
lines changed

docs/.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
doxygen
22
modules
3-
tutorials

docs/conf.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
408408
from sphinx_gallery.sorting import ExplicitOrder
409409

410410
examples_dirs = [
411+
# legacy tutorial structure under gallery folder
411412
tvm_path.joinpath("gallery", "tutorial"),
412413
tvm_path.joinpath("gallery", "how_to", "compile_models"),
413414
tvm_path.joinpath("gallery", "how_to", "deploy_models"),
@@ -419,9 +420,12 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
419420
tvm_path.joinpath("gallery", "how_to", "work_with_microtvm"),
420421
tvm_path.joinpath("gallery", "how_to", "extend_tvm"),
421422
tvm_path.joinpath("vta", "tutorials"),
423+
# New tutorial structure under docs folder
424+
tvm_path.joinpath("docs", "get_started", "tutorials"),
422425
]
423426

424427
gallery_dirs = [
428+
# legacy tutorial structure under gallery folder
425429
"tutorial",
426430
"how_to/compile_models",
427431
"how_to/deploy_models",
@@ -433,6 +437,8 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
433437
"how_to/work_with_microtvm",
434438
"how_to/extend_tvm",
435439
"topic/vta/tutorials",
440+
# New tutorial structure under docs folder
441+
"get_started/tutorials/",
436442
]
437443

438444

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Get Started
2+
-----------
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
"""
19+
.. _quick_start:
20+
21+
Quick Start
22+
===========
23+
24+
This tutorial is for people who are new to Apache TVM. Taking an simple example
25+
to show how to use Apache TVM to compile a simple neural network.
26+
27+
.. contents:: Table of Contents
28+
:local:
29+
:depth: 2
30+
31+
"""
32+
33+
################################################################################
34+
# Overview
35+
# --------
36+
# Apache TVM is a machine learning compilation framework, following the principle of
37+
# **Python-first development** and **universal deployment**. It takes in pre-trained
38+
# machine learning models, compiles and generates deployable modules that can be embedded
39+
# and run everywhere.
40+
# Apache TVM also enables customizing optimization processes to introduce new optimizations,
41+
# libraries, codegen and more.
42+
#
43+
# Apache TVM can help to:
44+
#
45+
# - **Optimize** performance of ML workloads, composing libraries and codegen.
46+
# - **Deploy** ML workloads to a diverse set of new environments, including new runtime and new
47+
# hardware.
48+
# - **Continuously improve and customize** ML deployment pipeline in Python by quickly customizing
49+
# library dispatching, bringing in customized operators and code generation.
50+
51+
################################################################################
52+
# Overall Flow
53+
# ------------
54+
# Then we will show the overall flow of using Apache TVM to compile a neural network model,
55+
# showing how to optimize, deploy and run the model.
56+
# The overall flow is illustrated as the figure:
57+
#
58+
# .. figure:: https://raw.githubusercontent.com/tlc-pack/web-data/main/images/design/tvm_overall_flow.svg
59+
# :align: center
60+
# :width: 80%
61+
#
62+
# The overall flow consists of the following steps:
63+
#
64+
# - **Construct or Import a Model**: Construct a neural network model or import a pre-trained
65+
# model from other frameworks (e.g. PyTorch, ONNX), and create the TVM IRModule, which contains
66+
# all the information needed for compilation, including high-level Relax functions for
67+
# computational graph, and low-level TensorIR functions for tensor program.
68+
# - **Perform Composable Optimizations**: Perform a series of optimization transformations,
69+
# such as graph optimizations, tensor program optimizations, and library dispatching.
70+
# - **Build and Universal Deployment**: Build the optimized model to a deployable module to the
71+
# universal runtime, and execute it on different devices, such as CPU, GPU, or other accelerators.
72+
73+
################################################################################
74+
# Construct or Import a Model
75+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
76+
# Before we get started, let's construct a neural network model first.
77+
# In this tutorial, to make things simple, we will defined a two-layer MLP networks
78+
# directly in this script with TVM Relax frontend, which is a similar API to PyTorch.
79+
#
80+
81+
import tvm
82+
from tvm import relax
83+
from tvm.relax.frontend import nn
84+
85+
86+
class MLPModel(nn.Module):
87+
def __init__(self):
88+
super(MLPModel, self).__init__()
89+
self.fc1 = nn.Linear(784, 256)
90+
self.relu1 = nn.ReLU()
91+
self.fc2 = nn.Linear(256, 10)
92+
93+
def forward(self, x):
94+
x = self.fc1(x)
95+
x = self.relu1(x)
96+
x = self.fc2(x)
97+
return x
98+
99+
100+
################################################################################
101+
# Then we can export the model to TVM IRModule, which is the central intermediate representation
102+
# in TVM.
103+
104+
mod, param_spec = MLPModel().export_tvm(
105+
spec={"forward": {"x": nn.spec.Tensor((1, 784), "float32")}}
106+
)
107+
mod.show()
108+
109+
################################################################################
110+
# Perform Optimization Transformations
111+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
112+
# Apache TVM leverage ``pipeline`` to transform and optimize program.
113+
# The pipeline encapsulates a collection of transformation that gets two goals (at the same level):
114+
#
115+
# - **Model optimizations**: such as operator fusion, layout rewrites.
116+
# - **Tensor program optimization**: Map the operators to low-level implementations
117+
# (both library or codegen)
118+
#
119+
# .. note::
120+
# The twos are goals but not the stages of the pipeline. The two optimizations are performed
121+
# **at the same level**, or separately in two stages.
122+
#
123+
# .. note::
124+
# In this tutorial we only demonstrate the overall flow, by leverage ``zero`` optimization
125+
# pipeline, instead of optimizing for any specific target.
126+
127+
mod = relax.get_pipeline("zero")(mod)
128+
129+
130+
################################################################################
131+
# Build and Universal Deployment
132+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
133+
# After the optimization, we can build the model to a deployable module and run it on
134+
# different devices.
135+
136+
137+
import numpy as np
138+
139+
target = tvm.target.Target("llvm")
140+
ex = relax.build(mod, target)
141+
device = tvm.cpu()
142+
vm = relax.VirtualMachine(ex, device)
143+
data = np.random.rand(1, 784).astype("float32")
144+
tvm_data = tvm.nd.array(data, device=device)
145+
params = [np.random.rand(*param.shape).astype("float32") for _, param in param_spec]
146+
params = [tvm.nd.array(param, device=device) for param in params]
147+
print(vm["forward"](tvm_data, *params).numpy())
148+
149+
################################################################################
150+
# Our goal is to bring machine learning to the application with any language of interest,
151+
# with the minimum runtime support.
152+
#
153+
# - Each function in IRModule becomes a runnable function in the runtime. For example in LLM
154+
# cases, we can call ``prefill`` and ``decode`` functions directly.
155+
#
156+
# .. code-block:: Python
157+
#
158+
# prefill_logits = vm["prefill"](inputs, weight, kv_cache)
159+
# decoded_logits = vm["decode"](inputs, weight, kv_cache)
160+
#
161+
# - TVM runtime comes with native data structures, such as NDArray, can also have zero
162+
# copy exchange with existing ecosystem (DLPack exchange with PyTorch)
163+
#
164+
# .. code-block:: Python
165+
#
166+
# # Convert PyTorch tensor to TVM NDArray
167+
# x_tvm = tvm.nd.from_dlpack(x_torch.to_dlpack())
168+
# # Convert TVM NDArray to PyTorch tensor
169+
# x_torch = torch.from_dlpack(x_tvm.to_dlpack())
170+
#
171+
# - TVM runtime works in non-python environments, so it works on settings such as mobile
172+
#
173+
# .. code-block:: C++
174+
#
175+
# // C++ snippet
176+
# runtime::Module vm = ex.GetFunction("load_executable")();
177+
# vm.GetFunction("init")(...);
178+
# NDArray out = vm.GetFunction("prefill")(data, weight, kv_cache);
179+
#
180+
# .. code-block:: Java
181+
#
182+
# // Java snippet
183+
# Module vm = ex.getFunction("load_executable").invoke();
184+
# vm.getFunction("init").pushArg(...).invoke;
185+
# NDArray out = vm.getFunction("prefill").pushArg(data).pushArg(weight).pushArg(kv_cache).invoke();
186+
#
187+
188+
################################################################################
189+
# Read next
190+
# ---------
191+
# This tutorial demonstrates the overall flow of using Apache TVM to compile a neural network model.
192+
# For more advanced or specific topics, please refer to the following tutorials
193+
#

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ driving its costs down.
3333
:caption: Getting Started
3434

3535
install/index
36+
get_started/tutorials/quick_start
3637
contribute/index
3738

3839
.. toctree::

tests/scripts/task_python_docs.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ IGNORED_WARNINGS=(
9090
'absl:For model inputs containing unsupported operations which cannot be quantized, the `inference_input_type` attribute will default to the original type.'
9191
'absl:Found untraced functions such as _jit_compiled_convolution_op'
9292
'You are using pip version'
93+
# Tutorial READMEs can be ignored, but other docs should be included
94+
"tutorials/README.rst: WARNING: document isn't included in any toctree"
9395
)
9496

9597
JOINED_WARNINGS=$(join_by '|' "${IGNORED_WARNINGS[@]}")

0 commit comments

Comments
 (0)