Tensor extensions (#4260)

KsenijaS · codemzs · commit d2908813638e · 2019-10-01T21:23:29.000-07:00
* Add TensorTypeExtensions

* Upgrade performance by replacing ToArray method

* Fix failing tests

* Remove unused code

* Remove duplicate file TensorTypeExtensions

* Remove comented code

* Add exceptions

* Add copyright header

* Rename ToSpan to CopyTo

* Use Utils.EnsureSize method

* Use CopyTo method to copy the values to span

* Fix Destination too short error

* Call CopyTo inside ToArray method to ensure code reuse

* No need to assign values span to dst

* Use checked keyword around cast to len

* Set keepOld to false instead of true in Utils.EnsureSize

* Add assert to ensure that imageArray and featurizedImage are the same size

* Cast tensor.size to int instead of long

* Remove FetchData since it's no longer used

* Add checked keyword around cast
diff --git a/src/Microsoft.ML.Dnn/DnnRetrainTransform.cs b/src/Microsoft.ML.Dnn/DnnRetrainTransform.cs
@@ -379,14 +379,18 @@ private void TrainCore(DnnRetrainEstimator.Options options, IDataView input, IDa
             ITensorValueGetter[] srcTensorGetters,
             Runner runner)
         {
-            float loss = 0;
-            float metric = 0;
+            float loss = 0.0f;
+            float metric = 0.0f;
             for (int i = 0; i < inputs.Length; i++)
                 runner.AddInput(inputs[i], srcTensorGetters[i].GetBufferedBatchTensor());
 
             Tensor[] tensor = runner.Run();
-            loss = tensor.Length > 0 && tensor[0] != IntPtr.Zero ? (float)tensor[0].ToArray<float>()[0] : 0.0f;
-            metric = tensor.Length > 1 && tensor[1] != IntPtr.Zero ? (float)tensor[1].ToArray<float>()[0] : 0.0f;
+            if (tensor.Length > 0 && tensor[0] != IntPtr.Zero)
+                tensor[0].ToScalar<float>(ref loss);
+
+            if (tensor.Length > 1 && tensor[1] != IntPtr.Zero)
+                tensor[1].ToScalar<float>(ref metric);
+
             return (loss, metric);
         }
 
@@ -871,7 +875,7 @@ private Delegate MakeGetter<T>(DataViewRow input, int iinfo, ITensorValueGetter[
                         UpdateCacheIfNeeded(input.Position, srcTensorGetters, activeOutputColNames, outputCache);
 
                         var tensor = outputCache.Outputs[_parent._outputs[iinfo]];
-                        dst = tensor.ToArray<T>()[0];
+                        tensor.ToScalar<T>(ref dst);
                     };
                     return valuegetter;
                 }
@@ -903,7 +907,7 @@ private Delegate MakeGetter<T>(DataViewRow input, int iinfo, ITensorValueGetter[
 
                             var editor = VBufferEditor.Create(ref dst, (int)tensorSize);
 
-                            DnnUtils.FetchData<T>(tensor.ToArray<T>(), editor.Values);
+                            tensor.CopyTo<T>(editor.Values);
                             dst = editor.Commit();
                         };
                         return valuegetter;
diff --git a/src/Microsoft.ML.Dnn/DnnUtils.cs b/src/Microsoft.ML.Dnn/DnnUtils.cs
@@ -277,14 +277,6 @@ internal static Session GetSession(IHostEnvironment env, string modelPath, bool
             return LoadTFSessionByModelFilePath(env, modelPath, metaGraph);
         }
 
-        internal static unsafe void FetchData<T>(T[] data, Span<T> result)
-        {
-            var dataCopy = new T[data.Length];
-            Array.Copy(data, dataCopy, data.Length);
-            var dataSpan = new Span<T>(dataCopy, 0, result.Length);
-            dataSpan.CopyTo(result);
-        }
-
         internal static unsafe void FetchStringData<T>(Tensor tensor, Span<T> result)
         {
             if (tensor == null)
diff --git a/src/Microsoft.ML.Dnn/ImageClassificationTransform.cs b/src/Microsoft.ML.Dnn/ImageClassificationTransform.cs
@@ -235,15 +235,18 @@ private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName
                 ImageClassificationMetrics metrics = new ImageClassificationMetrics();
                 metrics.Bottleneck = new BottleneckMetrics();
                 metrics.Bottleneck.DatasetUsed = dataset;
+                float[] imageArray = null;
                 while (cursor.MoveNext())
                 {
                     labelGetter(ref label);
                     imagePathGetter(ref imagePath);
                     var imagePathStr = imagePath.ToString();
                     var imageTensor = imageProcessor.ProcessImage(imagePathStr);
                     runner.AddInput(imageTensor, 0);
-                    var featurizedImage = runner.Run()[0]; // Reuse memory?
-                    writer.WriteLine(label - 1 + "," + string.Join(",", featurizedImage.ToArray<float>()));
+                    var featurizedImage = runner.Run()[0]; // Reuse memory
+                    featurizedImage.ToArray<float>(ref imageArray);
+                    Host.Assert((int)featurizedImage.size == imageArray.Length);
+                    writer.WriteLine(label - 1 + "," + string.Join(",", imageArray));
                     featurizedImage.Dispose();
                     imageTensor.Dispose();
                     metrics.Bottleneck.Index++;
@@ -338,6 +341,8 @@ private void TrainAndEvaluateClassificationLayer(string trainBottleneckFilePath,
 
             ImageClassificationMetrics metrics = new ImageClassificationMetrics();
             metrics.Train = new TrainMetrics();
+            float accuracy = 0;
+            float crossentropy = 0;
             for (int epoch = 0; epoch < epochs; epoch += 1)
             {
                 metrics.Train.Accuracy = 0;
@@ -378,8 +383,10 @@ private void TrainAndEvaluateClassificationLayer(string trainBottleneckFilePath,
                                     .AddInput(new Tensor(labelBatchPtr, labelTensorShape, TF_DataType.TF_INT64, labelBatchSizeInBytes), 1)
                                     .Run();
 
-                                metrics.Train.Accuracy += outputTensors[0].ToArray<float>()[0];
-                                metrics.Train.CrossEntropy += outputTensors[1].ToArray<float>()[0];
+                                outputTensors[0].ToScalar<float>(ref accuracy);
+                                outputTensors[1].ToScalar<float>(ref crossentropy);
+                                metrics.Train.Accuracy += accuracy;
+                                metrics.Train.CrossEntropy += crossentropy;
 
                                 outputTensors[0].Dispose();
                                 outputTensors[1].Dispose();
@@ -429,7 +436,8 @@ private void TrainAndEvaluateClassificationLayer(string trainBottleneckFilePath,
                                 .AddInput(new Tensor(labelBatchPtr, labelTensorShape, TF_DataType.TF_INT64, labelBatchSizeInBytes), 1)
                                 .Run();
 
-                            metrics.Train.Accuracy += outputTensors[0].ToArray<float>()[0];
+                            outputTensors[0].ToScalar<float>(ref accuracy);
+                            metrics.Train.Accuracy += accuracy;
                             metrics.Train.BatchProcessedCount += 1;
                             batchIndex = 0;
 
@@ -799,8 +807,10 @@ private class OutputCache
                 private ReadOnlyMemory<char> _imagePath;
                 private Runner _runner;
                 private ImageProcessor _imageProcessor;
-                public UInt32 PredictedLabel { get; set; }
-                public float[] ClassProbabilities { get; set; }
+                private long _predictedLabel;
+                public UInt32 PredictedLabel => (uint)_predictedLabel;
+                private float[] _classProbability;
+                public float[] ClassProbabilities => _classProbability;
                 private DataViewRow _inputRow;
 
                 public OutputCache(DataViewRow input, ImageClassificationTransformer transformer)
@@ -826,8 +836,8 @@ public void UpdateCacheIfNeeded()
                             _imagePathGetter(ref _imagePath);
                             var processedTensor = _imageProcessor.ProcessImage(_imagePath.ToString());
                             var outputTensor = _runner.AddInput(processedTensor, 0).Run();
-                            ClassProbabilities = outputTensor[0].ToArray<float>();
-                            PredictedLabel = (UInt32)outputTensor[1].ToArray<long>()[0];
+                            outputTensor[0].ToArray<float>(ref _classProbability);
+                            outputTensor[1].ToScalar<long>(ref _predictedLabel);
                             outputTensor[0].Dispose();
                             outputTensor[1].Dispose();
                             processedTensor.Dispose();
diff --git a/src/Microsoft.ML.Dnn/TensorTypeExtensions.cs b/src/Microsoft.ML.Dnn/TensorTypeExtensions.cs
@@ -0,0 +1,51 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using Microsoft.ML.Internal.Utilities;
+using NumSharp.Backends;
+using NumSharp.Backends.Unmanaged;
+using NumSharp.Utilities;
+using Tensorflow;
+
+namespace Microsoft.ML.Transforms
+{
+    [BestFriend]
+    internal static class TensorTypeExtensions
+    {
+        public static void ToScalar<T>(this Tensor tensor, ref T dst) where T : unmanaged
+        {
+            if (typeof(T).as_dtype() != tensor.dtype)
+                throw new NotSupportedException();
+
+            unsafe
+            {
+                dst = *(T*)tensor.buffer;
+            }
+
+        }
+
+        public static void CopyTo<T>(this Tensor tensor, Span<T> values) where T: unmanaged
+        {
+            if (typeof(T).as_dtype() != tensor.dtype)
+                throw new NotSupportedException();
+
+            unsafe
+            {
+                var len = checked((int)tensor.size);
+                var src = (T*)tensor.buffer;
+                var span = new Span<T>(src, len);
+                span.CopyTo(values);
+            }
+        }
+
+        public static void ToArray<T>(this Tensor tensor, ref T[] array) where T : unmanaged
+        {
+            Utils.EnsureSize(ref array, (int)tensor.size, (int)tensor.size, false);
+            var span = new Span<T>(array);
+
+            CopyTo(tensor, span);
+        }
+    }
+}
diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs
@@ -593,15 +593,13 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func<int, b
             private Delegate MakeGetter<T>(DataViewRow input, int iinfo, ITensorValueGetter[] srcTensorGetters, string[] activeOutputColNames, OutputCache outputCache) where T : unmanaged
             {
                 Host.AssertValue(input);
-
                 if (_parent.OutputTypes[iinfo].IsStandardScalar())
                 {
                     ValueGetter<T> valuegetter = (ref T dst) =>
                     {
                         UpdateCacheIfNeeded(input.Position, srcTensorGetters, activeOutputColNames, outputCache);
-
                         var tensor = outputCache.Outputs[_parent.Outputs[iinfo]];
-                        dst = tensor.ToArray<T>()[0];
+                        tensor.ToScalar<T>(ref dst);
                     };
                     return valuegetter;
                 }
@@ -633,7 +631,7 @@ private Delegate MakeGetter<T>(DataViewRow input, int iinfo, ITensorValueGetter[
 
                             var editor = VBufferEditor.Create(ref dst, (int)tensorSize);
 
-                            DnnUtils.FetchData<T>(tensor.ToArray<T>(), editor.Values);
+                            tensor.CopyTo<T>(editor.Values);
                             dst = editor.Commit();
                         };
                         return valuegetter;

Original file line number	Diff line number	Diff line change
`@@ -277,14 +277,6 @@ internal static Session GetSession(IHostEnvironment env, string modelPath, bool`
`277`	`277`	`return LoadTFSessionByModelFilePath(env, modelPath, metaGraph);`
`278`	`278`	`}`
`279`	`279`
`280`		`- internal static unsafe void FetchData<T>(T[] data, Span<T> result)`
`281`		`- {`
`282`		`- var dataCopy = new T[data.Length];`
`283`		`- Array.Copy(data, dataCopy, data.Length);`
`284`		`- var dataSpan = new Span<T>(dataCopy, 0, result.Length);`
`285`		`- dataSpan.CopyTo(result);`
`286`		`- }`
`287`		`-`
`288`	`280`	`internal static unsafe void FetchStringData<T>(Tensor tensor, Span<T> result)`
`289`	`281`	`{`
`290`	`282`	`if (tensor == null)`
Original file line number	Diff line number	Diff line change
`@@ -593,15 +593,13 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func<int, b`
`593`	`593`	`private Delegate MakeGetter<T>(DataViewRow input, int iinfo, ITensorValueGetter[] srcTensorGetters, string[] activeOutputColNames, OutputCache outputCache) where T : unmanaged`
`594`	`594`	`{`
`595`	`595`	`Host.AssertValue(input);`
`596`		`-`
`597`	`596`	`if (_parent.OutputTypes[iinfo].IsStandardScalar())`
`598`	`597`	`{`
`599`	`598`	`ValueGetter<T> valuegetter = (ref T dst) =>`
`600`	`599`	`{`
`601`	`600`	`UpdateCacheIfNeeded(input.Position, srcTensorGetters, activeOutputColNames, outputCache);`
`602`		`-`
`603`	`601`	`var tensor = outputCache.Outputs[_parent.Outputs[iinfo]];`
`604`		`- dst = tensor.ToArray<T>()[0];`
	`602`	`+ tensor.ToScalar<T>(ref dst);`
`605`	`603`	`};`
`606`	`604`	`return valuegetter;`
`607`	`605`	`}`
`@@ -633,7 +631,7 @@ private Delegate MakeGetter<T>(DataViewRow input, int iinfo, ITensorValueGetter[`
`633`	`631`
`634`	`632`	`var editor = VBufferEditor.Create(ref dst, (int)tensorSize);`
`635`	`633`
`636`		`- DnnUtils.FetchData<T>(tensor.ToArray<T>(), editor.Values);`
	`634`	`+ tensor.CopyTo<T>(editor.Values);`
`637`	`635`	`dst = editor.Commit();`
`638`	`636`	`};`
`639`	`637`	`return valuegetter;`