apache · tmoreau89 · Nov 18, 2021 · Nov 8, 2021 · Nov 8, 2021 · Nov 8, 2021
diff --git a/tests/python/contrib/test_hexagon/conftest.py b/tests/python/contrib/test_hexagon/conftest.py
@@ -19,7 +19,7 @@
     values from testing parameters """
 
 import tvm
-from .infrastructure import get_packed_filter_layout
+from .infrastructure import get_packed_filter_shape
 
 
 @tvm.testing.fixture
@@ -34,4 +34,4 @@ def shape_oihw(out_channel, in_channel, kernel):
 
 @tvm.testing.fixture
 def shape_oihw8i32o4i(out_channel, in_channel, kernel):
-    return get_packed_filter_layout(out_channel, in_channel, kernel, kernel)
+    return get_packed_filter_shape(out_channel, in_channel, kernel, kernel)
diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py
@@ -18,36 +18,43 @@
 """ Hexagon testing infrastructure """
 
 import tvm
+from tvm import te
 import numpy
 
 
 def ceildiv(o, d):
     return tvm.tir.floordiv(o + d - 1, d)
 
 
-def get_packed_activation_layout(shape_nhwc, block_shape, packed_C=True):
+def get_block_shape():
+    return 8, 8, 32
+
+
+def get_filter_block_shape():
+    return 8, 32, 4
+
+
+def get_packed_shape(shape_nhwc):
     assert len(shape_nhwc) == 4
     shape = [shape_nhwc[0]]
+    block_shape = get_block_shape()
     off_h, off_w, off_c = block_shape
     shape.append(ceildiv(shape_nhwc[1], off_h))
     shape.append(ceildiv(shape_nhwc[2], off_w))
-    if packed_C:
-        shape.append(ceildiv(shape_nhwc[3], off_c))
-        shape.extend(block_shape)
-    else:
-        shape.extend([off_h, off_w, shape_nhwc[3]])
+    shape.append(ceildiv(shape_nhwc[3], off_c))
+    shape.extend(block_shape)
     return shape
 
 
-def get_block_shape():
-    return 8, 8, 32
-
-
-def get_filter_block_shape():
-    return 8, 32, 4
+def get_logical_shape(shape_nhwc8h8w32c):
+    shape = [shape_nhwc8h8w32c[0]]
+    shape.append(shape_nhwc8h8w32c[1] * shape_nhwc8h8w32c[4])
+    shape.append(shape_nhwc8h8w32c[2] * shape_nhwc8h8w32c[5])
+    shape.append(shape_nhwc8h8w32c[3] * shape_nhwc8h8w32c[6])
+    return shape
 
 
-def get_packed_filter_layout(out_channel, in_channel, kernel_h, kernel_w):
+def get_packed_filter_shape(out_channel, in_channel, kernel_h, kernel_w):
     filter_Cio, filter_Ki, filter_Cii = get_filter_block_shape()
     filter_Ci = filter_Cio * filter_Cii
     return (
@@ -93,26 +100,10 @@ def get_conv2d_nhwc_shape(shape_nhwc, kernel_size, strides, padding, dilation, o
     )
 
 
-def verify_conv2d(output, ref_output, dtype):
-    # nhwc8h8w32c
-    if len(output.shape) == 7:
-        # nhwc8h8w32c -> nhwc
-        output = output.transpose(0, 1, 4, 2, 5, 3, 6).reshape(
-            output.shape[0],
-            output.shape[1] * output.shape[4],
-            output.shape[2] * output.shape[5],
-            output.shape[3] * output.shape[6],
-        )
-
-    # nhwhwc
-    else:
-        # nhwhwc -> nhwc
-        output = output.transpose(0, 1, 3, 2, 4, 5).reshape(
-            output.shape[0],
-            output.shape[1] * output.shape[3],
-            output.shape[2] * output.shape[4],
-            output.shape[5],
-        )
+def conv2d_verify(output, ref_output, dtype):
+    # nhwc8h8w32c -> nhwc
+    logical_output_shape = get_logical_shape(output.shape)
+    output = output.transpose(0, 1, 4, 2, 5, 3, 6).reshape(logical_output_shape)
 
     # slice output to match ref_output shape
     # e.g. 8x8 spatial 3x3 filter = 6x6 ref output
@@ -129,3 +120,64 @@ def verify_conv2d(output, ref_output, dtype):
     elif dtype == "float32":
         tol = {"rtol": 1e-4, "atol": 2e-4}
     tvm.testing.assert_allclose(output, ref_output, **tol)
+
+
+def conv2d_compute(X, filt, pad, stride, dilation):
+    block_shape = get_block_shape()
+    block_H, block_W, block_C = block_shape
+    filter_Cio, filter_Ki, filter_Cii = get_filter_block_shape()
+    filter_Ci = filter_Cio * filter_Cii
+
+    shape_filter = filt.shape
+    kernel_size = tuple(shape_filter[2:4])
+    out_channels = shape_filter[0] * shape_filter[5]
+
+    logical_input_shape = get_logical_shape(X.shape)
+    logical_output_shape = get_conv2d_nhwc_shape(
+        logical_input_shape,
+        kernel_size,
+        stride,
+        pad,
+        dilation,
+        out_channels,
+    )
+
+    output_shape = get_packed_shape(logical_output_shape)
+    n, ho, wo, ko, hi, wi, ki = output_shape
+    rh = te.reduce_axis((0, kernel_size[0]), name="rh")
+    rw = te.reduce_axis((0, kernel_size[1]), name="rw")
+    rc = te.reduce_axis((0, logical_input_shape[3]), name="rc")
+
+    def compute(n, ho, wo, ko, hi, wi, ki):
+        h = ho * block_H + hi
+        h_contig = h * stride[0] + rh
+        h_block_id = h_contig // block_H
+        h_block_offset = h_contig % block_H
+
+        w = wo * block_W + wi
+        w_contig = w * stride[1] + rw
+        w_block_id = w_contig // block_W
+        w_block_offset = w_contig % block_W
+
+        c_block_id = rc // block_C
+        c_block_offset = rc % block_C
+
+        rco = rc // filter_Ci
+        rcio = (rc % filter_Ci) // filter_Cii
+        rcii = rc % filter_Cii
+
+        return te.sum(
+            X[
+                n,
+                h_block_id,
+                w_block_id,
+                c_block_id,
+                h_block_offset,
+                w_block_offset,
+                c_block_offset,
+            ]
+            * filt[ko, rco, rh, rw, rcio, ki, rcii],
+            axis=[rh, rw, rc],
+        )
+
+    return output_shape, compute
diff --git a/tests/python/contrib/test_hexagon/test_conv2d_blocked.md b/tests/python/contrib/test_hexagon/test_conv2d_blocked.md
@@ -23,24 +23,23 @@ This is a baseline 1x1 conv2d schedule for Hexagon.
 
 ## Command
 
-pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[conv2d_nhwc8h8w32c-1-1-0-float32-1-1-1-64-64-128-llvm]"
+pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[1-64-64-0-1-1-128-1-1-float32-llvm]"
 
 ## Parameters
 
-| Parameter | Value       |
-| --------- | ----------- |
-| Batch     | 1           |
-| Filter    | 1x1         |
-| Spatial   | 64x64       |
-| Input Ch  | 64          |
-| Output Ch | 128         |
-| Stride    | 1           |
-| Padding   | 0           |
-| Layout    | NHWC8h8w32c |
+| Parameter | Value |
+| --------- | ----- |
+| Batch     | 1     |
+| Spatial   | 64x64 |
+| Input Ch  | 64    |
+| Padding   | 0     |
+| Stride    | 1     |
+| Filter    | 1x1   |
+| Output Ch | 128   |
 
 ## Assumptions
 
-* Pattern matching for microkernels is not senstive to cache reads and writes between the outer height (ho) and outer width (wo) loops.
+* n/a
 
 ## To Do
 
@@ -174,26 +173,25 @@ The key changes in TIR versus the above are...
 
 ## Command
 
-pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[conv2d_nhwc8h8w32c-1-1-0-float32-2-2-1-64-64-128-llvm]"
+pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[1-64-64-0-1-1-128-2-2-float32-llvm]"
 
 ## Parameters
 
-| Parameter | Value       |
-| --------- | ----------- |
-| Batch     | 1           |
-| Filter    | 1x1         |
-| Spatial   | 64x64       |
-| Input Ch  | 64          |
-| Output Ch | 128         |
-| Stride    | 1           |
-| Padding   | 0           |
-| Layout    | NHWC8h8w32c |
-| k_split   | 2           |
-| h_split   | 2           |
+| Parameter | Value |
+| --------- | ----- |
+| Batch     | 1     |
+| Spatial   | 64x64 |
+| Input Ch  | 64    |
+| Padding   | 0     |
+| Stride    | 1     |
+| Filter    | 1x1   |
+| Output Ch | 128   |
+| k_split   | 2     |
+| h_split   | 2     |
 
 ## Assumptions
 
-* n/a - With the loop splits on `ko` and `ho` the compute schedule is now over `ko.inner` `ho.inner` `wo` etc. This should fit the pattern matching for microkernels.
+* n/a
 
 ## To Do
 
@@ -350,21 +348,21 @@ The `if` statement above indicates NOT to prefetch the vertically adjacent slice
 
 ## Command
 
-pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[conv2d_nhwc8h8w32c-3-1-0-float32-2-2-1-64-64-128-llvm]"
+pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[1-64-64-0-1-3-128-2-2-float32-llvm]"
 
 ## Parameters
 
-| Parameter | Value       |
-| --------- | ----------- |
-| Batch     | 1           |
-| Filter    | 3x3         |
-| Spatial   | 64x64       |
-| Input Ch  | 64          |
-| Output Ch | 128         |
-| Stride    | 1           |
-| Padding   | 0           |
-| Layout    | NHWC8h8w32c |
-| h_split   | 2           |
+| Parameter | Value |
+| --------- | ----- |
+| Batch     | 1     |
+| Spatial   | 64x64 |
+| Input Ch  | 64    |
+| Padding   | 0     |
+| Stride    | 1     |
+| Filter    | 1x1   |
+| Output Ch | 128   |
+| k_split   | 2     |
+| h_split   | 2     |
 
 ## Assumptions