From ad6ad8493c0e66d664330ec2b4233c7469168856 Mon Sep 17 00:00:00 2001 From: Oceania2018 Date: Sat, 13 Feb 2021 08:10:39 -0600 Subject: [PATCH] Align with tensorflow v2.4.1 --- src/TensorFlowNET.Console/MemoryBasicTest.cs | 10 +- src/TensorFlowNET.Console/Program.cs | 2 +- src/TensorFlowNET.Core/APIs/c_api.cs | 2 +- .../Tensorflow.Binding.csproj | 10 +- .../Tensors/Tensor.String.cs | 107 ++++-------------- src/TensorFlowNET.Core/Tensors/Tensor.cs | 18 ++- .../Tensors/c_api.tensor.cs | 5 +- .../Tensors/TensorTest.cs | 9 +- 8 files changed, 62 insertions(+), 101 deletions(-) diff --git a/src/TensorFlowNET.Console/MemoryBasicTest.cs b/src/TensorFlowNET.Console/MemoryBasicTest.cs index d61cca69..bbb23391 100644 --- a/src/TensorFlowNET.Console/MemoryBasicTest.cs +++ b/src/TensorFlowNET.Console/MemoryBasicTest.cs @@ -4,6 +4,8 @@ using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine.DataAdapters; using static Tensorflow.Binding; using static Tensorflow.KerasApi; +using System.Linq; +using System.Collections.Generic; namespace Tensorflow { @@ -35,13 +37,15 @@ namespace Tensorflow public Action ConstantString => (epoch, iterate) => { - var tensor = tf.constant(new string[] + var strList = new string[] { "Biden immigration bill would put millions of illegal immigrants on 8-year fast-track to citizenship", "The Associated Press, which also reported that the eight-year path is in the bill.", "The bill would also include provisions to stem the flow of migration by addressing root causes of migration from south of the border." - }); - var data = tensor.numpy(); + }; + + var tensor = tf.constant(strList, TF_DataType.TF_STRING); + var data = tensor.StringData(); }; public Action Variable diff --git a/src/TensorFlowNET.Console/Program.cs b/src/TensorFlowNET.Console/Program.cs index 38b878af..4b7f52de 100644 --- a/src/TensorFlowNET.Console/Program.cs +++ b/src/TensorFlowNET.Console/Program.cs @@ -47,7 +47,7 @@ namespace Tensorflow // explaination of constant mm.Execute(10, 100 * batchSize, basic.Constant2x3); - mm.Execute(10, 100 * batchSize, basic.ConstantString); + mm.Execute(10, batchSize, basic.ConstantString); // 100K float variable. mm.Execute(10, batchSize, basic.Variable); diff --git a/src/TensorFlowNET.Core/APIs/c_api.cs b/src/TensorFlowNET.Core/APIs/c_api.cs index 10f678e0..11c17abd 100644 --- a/src/TensorFlowNET.Core/APIs/c_api.cs +++ b/src/TensorFlowNET.Core/APIs/c_api.cs @@ -43,7 +43,7 @@ namespace Tensorflow /// public partial class c_api { - public const string TensorFlowLibName = "tensorflow"; + public const string TensorFlowLibName = @"D:\Projects\tensorflow-haiping\bazel-bin\tensorflow\tensorflow"; public static string StringPiece(IntPtr handle) { diff --git a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj index 6017d510..139f98dc 100644 --- a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj +++ b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj @@ -5,7 +5,7 @@ TensorFlow.NET Tensorflow 2.2.0 - 0.33.0 + 0.40.0 8.0 Haiping Chen, Meinrad Recheis, Eli Belash SciSharp STACK @@ -19,7 +19,7 @@ Google's TensorFlow full binding in .NET Standard. Building, training and infering deep learning models. https://tensorflownet.readthedocs.io - 0.33.0.0 + 0.40.0.0 tf.net 0.20.x and above are based on tensorflow native 2.x. * Eager Mode is added finally. @@ -29,8 +29,10 @@ https://tensorflownet.readthedocs.io * Improve memory usage. TensorFlow .NET v0.3x is focused on making more Keras API works. -Keras API is a separate package released as TensorFlow.Keras. - 0.33.0.0 +Keras API is a separate package released as TensorFlow.Keras. + +tf.net 0.4x.x aligns with TensorFlow v2.4.1 native library. + 0.40.0.0 LICENSE true true diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.String.cs b/src/TensorFlowNET.Core/Tensors/Tensor.String.cs index abe07c75..e9780836 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.String.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.String.cs @@ -8,27 +8,7 @@ namespace Tensorflow { public partial class Tensor { - const ulong TF_TSRING_SIZE = 24; - - public IntPtr StringTensor25(string[] strings, TensorShape shape) - { - var handle = c_api.TF_AllocateTensor(TF_DataType.TF_STRING, - shape.dims.Select(x => (long)x).ToArray(), - shape.ndim, - (ulong)shape.size * TF_TSRING_SIZE); - - var data = c_api.TF_TensorData(handle); - var tstr = c_api.TF_StringInit(handle); - // AllocationHandle = tstr; - // AllocationType = AllocationType.Tensorflow; - for (int i = 0; i< strings.Length; i++) - { - c_api.TF_StringCopy(tstr, strings[i], strings[i].Length); - tstr += (int)TF_TSRING_SIZE; - } - // c_api.TF_StringDealloc(tstr); - return handle; - } + const int TF_TSRING_SIZE = 24; public IntPtr StringTensor(string[] strings, TensorShape shape) { @@ -40,69 +20,28 @@ namespace Tensorflow return StringTensor(buffer, shape); } - public unsafe IntPtr StringTensor(byte[][] buffer, TensorShape shape) + public IntPtr StringTensor(byte[][] buffer, TensorShape shape) { - ulong size = 0; - foreach (var b in buffer) - size += c_api.TF_StringEncodedSize((ulong)b.Length); - - var src_size = size + (ulong)buffer.Length * sizeof(ulong); var handle = c_api.TF_AllocateTensor(TF_DataType.TF_STRING, - shape.dims.Select(x => (long)x).ToArray(), + shape.ndim == 0 ? null : shape.dims.Select(x => (long)x).ToArray(), shape.ndim, - src_size); - AllocationType = AllocationType.Tensorflow; + (ulong)shape.size * TF_TSRING_SIZE); - IntPtr data_start = c_api.TF_TensorData(handle); - IntPtr string_start = data_start + buffer.Length * sizeof(ulong); - IntPtr limit = data_start + (int)src_size; - ulong offset = 0; + var tstr = c_api.TF_TensorData(handle); +#if TRACK_TENSOR_LIFE + print($"New TString 0x{handle.ToString("x16")} {AllocationType} Data: 0x{tstr.ToString("x16")}"); +#endif for (int i = 0; i < buffer.Length; i++) { - Marshal.WriteInt64(data_start, i * sizeof(ulong), (long)offset); - if (buffer[i].Length == 0) - { - Marshal.WriteByte(string_start, 0); - break; - } - - fixed (byte* src = &buffer[i][0]) - { - /*Marshal.WriteByte(string_start, Convert.ToByte(buffer[i].Length)); - tf.memcpy((string_start + 1).ToPointer(), src, (ulong)buffer[i].Length); - string_start += buffer[i].Length + 1; - offset += buffer[i].Length + 1;*/ - - var written = c_api.TF_StringEncode(src, (ulong)buffer[i].Length, (byte*)string_start, (ulong)(limit.ToInt64() - string_start.ToInt64()), tf.Status.Handle); - tf.Status.Check(true); - string_start += (int)written; - offset += written; - } + c_api.TF_StringInit(tstr); + c_api.TF_StringCopy(tstr, buffer[i], buffer[i].Length); + var data = c_api.TF_StringGetDataPointer(tstr); + tstr += TF_TSRING_SIZE; } return handle; } - public string[] StringData25() - { - string[] strings = new string[c_api.TF_Dim(_handle, 0)]; - var tstrings = TensorDataPointer; - for (int i = 0; i< strings.Length; i++) - { - var tstringData = c_api.TF_StringGetDataPointer(tstrings); - /*var size = c_api.TF_StringGetSize(tstrings); - var capacity = c_api.TF_StringGetCapacity(tstrings); - var type = c_api.TF_StringGetType(tstrings);*/ - strings[i] = c_api.StringPiece(tstringData); - tstrings += (int)TF_TSRING_SIZE; - } - return strings; - } - - /// - /// Extracts string array from current Tensor. - /// - /// When != TF_DataType.TF_STRING public string[] StringData() { var buffer = StringBytes(); @@ -114,7 +53,7 @@ namespace Tensorflow return _str; } - public unsafe byte[][] StringBytes() + public byte[][] StringBytes() { if (dtype != TF_DataType.TF_STRING) throw new InvalidOperationException($"Unable to call StringData when dtype != TF_DataType.TF_STRING (dtype is {dtype})"); @@ -123,24 +62,22 @@ namespace Tensorflow // TF_STRING tensors are encoded with a table of 8-byte offsets followed by TF_StringEncode-encoded bytes. // [offset1, offset2,...,offsetn, s1size, s1bytes, s2size, s2bytes,...,snsize,snbytes] // - long size = 1; + int size = 1; foreach (var s in TensorShape.dims) size *= s; var buffer = new byte[size][]; - var data_start = c_api.TF_TensorData(_handle); - data_start += (int)(size * sizeof(ulong)); + var tstrings = TensorDataPointer; for (int i = 0; i < buffer.Length; i++) { - IntPtr dst = IntPtr.Zero; - ulong dstLen = 0; - var read = c_api.TF_StringDecode((byte*)data_start, bytesize, (byte**)&dst, ref dstLen, tf.Status.Handle); - tf.Status.Check(true); - buffer[i] = new byte[(int)dstLen]; - Marshal.Copy(dst, buffer[i], 0, buffer[i].Length); - data_start += (int)read; + var data = c_api.TF_StringGetDataPointer(tstrings); + var len = c_api.TF_StringGetSize(tstrings); + buffer[i] = new byte[len]; + // var capacity = c_api.TF_StringGetCapacity(tstrings); + // var type = c_api.TF_StringGetType(tstrings); + Marshal.Copy(data, buffer[i], 0, Convert.ToInt32(len)); + tstrings += TF_TSRING_SIZE; } - return buffer; } } diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.cs b/src/TensorFlowNET.Core/Tensors/Tensor.cs index cfc60b1f..037a370a 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.cs @@ -15,7 +15,6 @@ ******************************************************************************/ using NumSharp; -using NumSharp.Backends.Unmanaged; using System; using System.Diagnostics.CodeAnalysis; using System.Globalization; @@ -24,7 +23,6 @@ using System.Runtime.InteropServices; using Tensorflow.Eager; using Tensorflow.Framework; using Tensorflow.Keras.Engine; -using Tensorflow.Variables; using static Tensorflow.Binding; namespace Tensorflow @@ -287,6 +285,22 @@ namespace Tensorflow throw new InvalidOperationException($"Tensor.AllocationHandle is not null ({AllocationHandle}) but AllocationType is not matched to a C# allocation type ({AllocationType})."); } + if (dtype == TF_DataType.TF_STRING) + { + int size = 1; + foreach (var s in TensorShape.dims) + size *= s; + var tstr = TensorDataPointer; +#if TRACK_TENSOR_LIFE + print($"Delete TString 0x{handle.ToString("x16")} {AllocationType} Data: 0x{tstrings.ToString("x16")}"); +#endif + for (int i = 0; i < size; i++) + { + c_api.TF_StringDealloc(tstr); + tstr += TF_TSRING_SIZE; + } + } + c_api.TF_DeleteTensor(handle); } diff --git a/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs b/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs index 0fd2527e..4b3601b0 100644 --- a/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs +++ b/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs @@ -182,7 +182,10 @@ namespace Tensorflow public static extern unsafe ulong TF_StringEncode(byte* src, ulong src_len, byte* dst, ulong dst_len, SafeStatusHandle status); [DllImport(TensorFlowLibName)] - public static extern IntPtr TF_StringInit(IntPtr t); + public static extern void TF_StringInit(IntPtr t); + + [DllImport(TensorFlowLibName)] + public static extern void TF_StringCopy(IntPtr dst, byte[] text, long size); [DllImport(TensorFlowLibName)] public static extern void TF_StringCopy(IntPtr dst, string text, long size); diff --git a/test/TensorFlowNET.Native.UnitTest/Tensors/TensorTest.cs b/test/TensorFlowNET.Native.UnitTest/Tensors/TensorTest.cs index 7f1591e9..65404089 100644 --- a/test/TensorFlowNET.Native.UnitTest/Tensors/TensorTest.cs +++ b/test/TensorFlowNET.Native.UnitTest/Tensors/TensorTest.cs @@ -111,7 +111,7 @@ namespace Tensorflow.Native.UnitTest.Tensors /// Port from c_api_test.cc /// `TEST_F(CApiAttributesTest, StringTensor)` /// - [TestMethod, Ignore("Waiting for PR https://github.com/tensorflow/tensorflow/pull/46804")] + [TestMethod] public void StringTensor() { string text = "Hello world!."; @@ -120,13 +120,14 @@ namespace Tensorflow.Native.UnitTest.Tensors null, 0, 1 * 24); - var tstr = c_api.TF_StringInit(tensor); - var data = c_api.TF_StringGetDataPointer(tstr); + var tstr = c_api.TF_TensorData(tensor); + c_api.TF_StringInit(tstr); c_api.TF_StringCopy(tstr, text, text.Length); + var data = c_api.TF_StringGetDataPointer(tstr); Assert.AreEqual((ulong)text.Length, c_api.TF_StringGetSize(tstr)); Assert.AreEqual(text, c_api.StringPiece(data)); - Assert.AreEqual((ulong)text.Length, c_api.TF_TensorByteSize(tensor)); + Assert.AreEqual(TF_TString_Type.TF_TSTR_SMALL, c_api.TF_StringGetType(tensor)); Assert.AreEqual(0, c_api.TF_NumDims(tensor)); TF_DeleteTensor(tensor);