Skip to content

Commit 559e0c2

Browse files
asparkhipfk-beta
authored andcommitted
[CMSIS-NN] Aligned scale computation with TFLM to fix numerical mismatch (apache#10817)
Fixes numerical mismatch in Conv2D layers by aligning order of output scale computation with TFLM. Correct output scale is needed to calculate quantization parameters needed by CMSIS-NN.
1 parent fd1ba74 commit 559e0c2

3 files changed

Lines changed: 175 additions & 7 deletions

File tree

src/relay/backend/contrib/cmsisnn/generate_constants.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ class GenerateConstantsMutator : public MixedModeMutator {
123123
// Obtain input and output scales from Relay's Requantization
124124
int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
125125
float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
126-
auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
127-
ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
126+
auto input_scale = GetScalarFromConstant<float>(conv2d_call->args[4]);
127+
auto filter_scales = tvm::relay::qnn::GetFloatVectorFromConstant(conv2d_call->args[5]);
128128

129129
// Calculate requantization multiplier and shift
130130
Device dev{DLDeviceType::kDLCPU, 0};
@@ -134,10 +134,10 @@ class GenerateConstantsMutator : public MixedModeMutator {
134134
int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
135135
int32_t* shift = static_cast<int32_t*>(shift_nda->data);
136136
for (int i = 0; i < out_channels; ++i) {
137-
double quantized_multiplier =
138-
static_cast<double>(input_scales[i]) / static_cast<double>(output_scale);
137+
double effective_output_scale =
138+
static_cast<double>(input_scale) * filter_scales[i] / static_cast<double>(output_scale);
139139
std::tie(*(multiplier + i), *(shift + i)) =
140-
tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
140+
tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
141141
}
142142

143143
// Create constants from requantization multiplier and shift

tests/python/contrib/test_cmsisnn/test_conv2d.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,14 @@
3535
from utils import (
3636
skip_if_no_reference_system,
3737
make_module,
38+
create_conv2d_tflite_relay_models,
3839
get_range_for_dtype_str,
3940
get_same_padding,
4041
get_conv2d_qnn_params,
4142
make_qnn_relu,
4243
assert_partitioned_function,
4344
assert_no_external_function,
45+
generate_ref_data_tflite,
4446
)
4547

4648

@@ -282,7 +284,6 @@ def test_conv2d_asymmetric_padding_int8(
282284
)
283285
orig_mod = make_module(model)
284286
cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
285-
286287
# validate pattern matching
287288
assert_partitioned_function(orig_mod, cmsisnn_mod)
288289

@@ -304,6 +305,43 @@ def test_conv2d_asymmetric_padding_int8(
304305
)
305306

306307

308+
@tvm.testing.requires_cmsisnn
309+
@pytest.mark.parametrize("ifm_shape", [(1, 55, 55, 3)])
310+
@pytest.mark.parametrize("kernel_shape", [(3, 2), (1, 3)])
311+
@pytest.mark.parametrize("strides, dilation", [((3, 2), (1, 1))])
312+
@pytest.mark.parametrize("padding", ["SAME", "VALID"])
313+
@pytest.mark.parametrize("activation", ["NONE", "RELU"])
314+
def test_conv2d_int8_tflite(ifm_shape, kernel_shape, strides, dilation, padding, activation):
315+
interface_api = "c"
316+
use_unpacked_api = True
317+
test_runner = AOT_USMP_CORSTONE300_RUNNER
318+
319+
dtype = "int8"
320+
tflite_model, relay_mod, params = create_conv2d_tflite_relay_models(
321+
ifm_shape, kernel_shape, strides, dilation, padding, activation, dtype
322+
)
323+
324+
cmsisnn_mod = cmsisnn.partition_for_cmsisnn(relay_mod, params)
325+
326+
# validate pattern matching
327+
assert_partitioned_function(relay_mod, cmsisnn_mod)
328+
329+
# validate CMSIS-NN output against TFLite output
330+
input_map, output_map, output_tolerance = generate_ref_data_tflite(tflite_model)
331+
compile_and_run(
332+
AOTTestModel(
333+
module=cmsisnn_mod,
334+
inputs=input_map,
335+
outputs=output_map,
336+
params=params,
337+
output_tolerance=output_tolerance,
338+
),
339+
test_runner,
340+
interface_api,
341+
use_unpacked_api,
342+
)
343+
344+
307345
@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/10314")
308346
@tvm.testing.requires_cmsisnn
309347
@pytest.mark.parametrize("ifm_shape", [(1, 28, 28, 12), (1, 64, 100, 4)])

tests/python/contrib/test_cmsisnn/utils.py

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
"""CMSIS-NN functions for testing networks"""
1919

2020
import platform
21-
2221
import math
2322
import numpy as np
2423
import pytest
@@ -226,3 +225,134 @@ def make_qnn_relu(expr, fused_activation_fn, scale, zero_point, dtype):
226225
)
227226
if fused_activation_fn == "RELU":
228227
return tvm.relay.op.clip(expr, a_min=max(qmin, quantize(0.0)), a_max=qmax)
228+
229+
230+
def generate_random_input_data(seed, shape, dtype):
231+
"""
232+
Generates randomized input numpy arrays based on shape and dtype
233+
"""
234+
random_state = np.random.RandomState(seed)
235+
if dtype == np.float32:
236+
return random_state.uniform(-1, 1, size).astype(dtype)
237+
else:
238+
low = np.iinfo(dtype).min
239+
high = np.iinfo(dtype).max + 1
240+
return random_state.randint(low, high, shape, dtype)
241+
242+
243+
def generate_ref_data_tflite(model):
244+
"""
245+
This method uses TFLite reference kernels to generate reference output.
246+
Random input generator is used to get the input data.
247+
It returns randomized inputs and reference outputs.
248+
"""
249+
import tensorflow as tf
250+
from distutils.version import LooseVersion
251+
252+
output_tolerance = None
253+
if tf.__version__ < LooseVersion("2.5.0"):
254+
output_tolerance = 1
255+
interpreter = tf.lite.Interpreter(model_content=model)
256+
else:
257+
from tensorflow.lite.python.interpreter import OpResolverType
258+
259+
output_tolerance = 0
260+
interpreter = tf.lite.Interpreter(
261+
model_content=model,
262+
experimental_op_resolver_type=OpResolverType.BUILTIN_REF,
263+
experimental_preserve_all_tensors=False,
264+
)
265+
266+
interpreter.allocate_tensors()
267+
input_details = interpreter.get_input_details()
268+
output_details = interpreter.get_output_details()
269+
270+
# Generate predictable randomized input
271+
seed = 0
272+
input_data = {}
273+
for input_detail in input_details:
274+
input_values = generate_random_input_data(
275+
seed, input_detail["shape"], input_detail["dtype"]
276+
)
277+
interpreter.set_tensor(input_detail["index"], input_values)
278+
input_data.update({input_detail["name"]: input_values})
279+
280+
interpreter.invoke()
281+
282+
# Obtain the expected output from interpreter
283+
expected_output_data = {}
284+
for output_detail in output_details:
285+
expected_output_data.update(
286+
{output_detail["name"]: interpreter.get_tensor(output_detail["index"])}
287+
)
288+
289+
return input_data, expected_output_data, output_tolerance
290+
291+
292+
def create_conv2d_tflite_model(ifm_shape, kernel_shape, strides, dilation, padding, activation):
293+
""" This method prepares TFlite graph with a single Conv2d layer """
294+
import tensorflow as tf
295+
296+
class Model(tf.Module):
297+
@tf.function
298+
def tf_function(self, x):
299+
# Use tf.nn API to create the model
300+
tf_strides = [1, strides[0], strides[1], 1]
301+
op = tf.nn.conv2d(
302+
x,
303+
filters=tf.constant(
304+
np.random.uniform(size=[kernel_shape[0], kernel_shape[1], 3, 3]),
305+
dtype=tf.float32,
306+
),
307+
strides=tf_strides,
308+
padding=padding,
309+
dilations=dilation,
310+
)
311+
if activation:
312+
op = tf.nn.relu(op)
313+
return op
314+
315+
model = Model()
316+
concrete_func = model.tf_function.get_concrete_function(
317+
tf.TensorSpec(ifm_shape, dtype=tf.float32)
318+
)
319+
320+
def representative_dataset():
321+
for _ in range(100):
322+
data = np.random.rand(*tuple(ifm_shape))
323+
yield [data.astype(np.float32)]
324+
325+
converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
326+
converter.optimizations = [tf.lite.Optimize.DEFAULT]
327+
converter.representative_dataset = representative_dataset
328+
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
329+
converter.inference_input_type = tf.int8
330+
converter.inference_output_type = tf.int8
331+
tflite_model = converter.convert()
332+
return tflite_model
333+
334+
335+
def create_conv2d_tflite_relay_models(
336+
ifm_shape, kernel_shape, strides, dilation, padding, activation, dtype
337+
):
338+
"""
339+
This method creates a conv2d TFLite layer and prepared TFLite model from it.
340+
Converts that into the Relay module and params.
341+
Returns TFLite model, Relay module and params.
342+
"""
343+
pytest.importorskip("tflite")
344+
import tflite.Model
345+
346+
serialized_tflite_model = create_conv2d_tflite_model(
347+
ifm_shape, kernel_shape, strides, dilation, padding, activation
348+
)
349+
350+
tflite_model = tflite.Model.Model.GetRootAsModel(serialized_tflite_model, 0)
351+
352+
relay_module, params = relay.frontend.from_tflite(
353+
tflite_model,
354+
shape_dict={"input": ifm_shape},
355+
dtype_dict={"input": dtype},
356+
)
357+
358+
return serialized_tflite_model, relay_module, params

0 commit comments

Comments
 (0)