From 03a9e95d344af87378b27b85042fcede4e4fc6f0 Mon Sep 17 00:00:00 2001 From: Oceania2018 Date: Fri, 24 Jul 2020 06:34:14 -0500 Subject: [PATCH] Create tensor from string array. --- .../Eager/EagerTensor.Creation.cs | 6 ++ .../Tensors/Tensor.Creation.cs | 78 ++++++++++++++----- .../Tensors/c_api.tensor.cs | 8 +- src/TensorFlowNET.Core/Tensors/constant_op.cs | 2 + src/TensorFlowNET.Core/tensorflow.memory.cs | 4 + .../TF_API/StringsApiTest.cs | 14 ++++ 6 files changed, 84 insertions(+), 28 deletions(-) diff --git a/src/TensorFlowNET.Core/Eager/EagerTensor.Creation.cs b/src/TensorFlowNET.Core/Eager/EagerTensor.Creation.cs index 75677e22..ab558f5d 100644 --- a/src/TensorFlowNET.Core/Eager/EagerTensor.Creation.cs +++ b/src/TensorFlowNET.Core/Eager/EagerTensor.Creation.cs @@ -32,6 +32,12 @@ namespace Tensorflow.Eager Resolve(); } + public EagerTensor(string[] value, string device_name) : base(value) + { + EagerTensorHandle = c_api.TFE_NewTensorHandle(_handle, tf.status.Handle); + Resolve(); + } + public EagerTensor(NDArray value, string device_name) : base(value) { EagerTensorHandle = c_api.TFE_NewTensorHandle(_handle, tf.status.Handle); diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.Creation.cs b/src/TensorFlowNET.Core/Tensors/Tensor.Creation.cs index 9507f0c0..eca91570 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.Creation.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.Creation.cs @@ -154,7 +154,7 @@ namespace Tensorflow /// public unsafe Tensor(sbyte value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(sbyte)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(sbyte)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(sbyte)), dims: new long[0], num_dims: 0, len: sizeof(sbyte)); *(sbyte*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -180,7 +180,7 @@ namespace Tensorflow /// public unsafe Tensor(bool value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(bool)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(bool)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(bool)), dims: new long[0], num_dims: 0, len: sizeof(bool)); *(bool*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -206,7 +206,7 @@ namespace Tensorflow /// public unsafe Tensor(byte value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(byte)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(byte)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(byte)), dims: new long[0], num_dims: 0, len: sizeof(byte)); *(byte*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -232,7 +232,7 @@ namespace Tensorflow /// public unsafe Tensor(short value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(short)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(short)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(short)), dims: new long[0], num_dims: 0, len: sizeof(short)); *(short*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -258,7 +258,7 @@ namespace Tensorflow /// public unsafe Tensor(ushort value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(ushort)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(ushort)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(ushort)), dims: new long[0], num_dims: 0, len: sizeof(ushort)); *(ushort*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -284,7 +284,7 @@ namespace Tensorflow /// public unsafe Tensor(int value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(int)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(int)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(int)), dims: new long[0], num_dims: 0, len: sizeof(int)); *(int*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -310,7 +310,7 @@ namespace Tensorflow /// public unsafe Tensor(uint value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(uint)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(uint)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(uint)), dims: new long[0], num_dims: 0, len: sizeof(uint)); *(uint*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -336,7 +336,7 @@ namespace Tensorflow /// public unsafe Tensor(long value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(long)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(long)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(long)), dims: new long[0], num_dims: 0, len: sizeof(long)); *(long*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -362,7 +362,7 @@ namespace Tensorflow /// public unsafe Tensor(ulong value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(ulong)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(ulong)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(ulong)), dims: new long[0], num_dims: 0, len: sizeof(ulong)); *(ulong*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -388,7 +388,7 @@ namespace Tensorflow /// public unsafe Tensor(float value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(float)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(float)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(float)), dims: new long[0], num_dims: 0, len: sizeof(float)); *(float*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -414,7 +414,7 @@ namespace Tensorflow /// public unsafe Tensor(double value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(double)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(double)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(double)), dims: new long[0], num_dims: 0, len: sizeof(double)); *(double*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -440,7 +440,7 @@ namespace Tensorflow /// public unsafe Tensor(Complex value, TF_DataType? dType = null) { - _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(Complex)), dims: new long[0], num_dims: 0, len: (UIntPtr) sizeof(Complex)); + _handle = TF_AllocateTensor(dType ?? dtypes.as_dtype(typeof(Complex)), dims: new long[0], num_dims: 0, len: (ulong)sizeof(Complex)); *(Complex*) TF_TensorData(_handle) = value; AllocationType = AllocationType.Tensorflow; } @@ -453,17 +453,53 @@ namespace Tensorflow { var buffer = Encoding.UTF8.GetBytes(str); var size = c_api.TF_StringEncodedSize((ulong)buffer.Length); - var handle = TF_AllocateTensor(TF_DataType.TF_STRING, IntPtr.Zero, 0, (UIntPtr)(size + sizeof(ulong))); + var handle = TF_AllocateTensor(TF_DataType.TF_STRING, null, 0, size + sizeof(ulong)); AllocationType = AllocationType.Tensorflow; IntPtr tensor = c_api.TF_TensorData(handle); Marshal.WriteInt64(tensor, 0); fixed (byte* src = buffer) - c_api.TF_StringEncode(src, (ulong)buffer.Length, (sbyte*)(tensor + sizeof(long)), size, tf.status.Handle); + c_api.TF_StringEncode(src, (ulong)buffer.Length, (byte*)(tensor + sizeof(long)), size, tf.status.Handle); _handle = handle; tf.status.Check(true); } + public unsafe Tensor(string[] strings) + { + var num_elements = (ulong)strings.Length; + var string_length = new ulong[num_elements]; + ulong size = 0; + + for (ulong i = 0; i < num_elements; i++) + { + var buffer = Encoding.UTF8.GetBytes(strings[i]); + string_length[i] = c_api.TF_StringEncodedSize((ulong)buffer.Length); + size += string_length[i]; + } + + size = size + num_elements * sizeof(ulong); + var handle = TF_AllocateTensor(TF_DataType.TF_STRING, new long[] { (long)num_elements }, 1, size); + AllocationType = AllocationType.Tensorflow; + + IntPtr tensor = c_api.TF_TensorData(handle); + tf.memcpy(tensor, string_length, num_elements); + + IntPtr data_start = tensor + sizeof(ulong) * (int)num_elements; + for (var i = 0; i < strings.Length; i++) + { + var buffer = Encoding.UTF8.GetBytes(strings[i]); + fixed (byte* src = buffer) + { + var encoded_size = c_api.TF_StringEncode(src, (ulong)buffer.Length, (byte*)data_start, string_length[i], tf.status.Handle); + data_start += (int)encoded_size; + } + + tf.status.Check(true); + } + + _handle = handle; + } + public unsafe Tensor(NDArray nd, TF_DataType? tensorDType = null) { if (tensorDType == null) @@ -476,27 +512,27 @@ namespace Tensorflow { var bytesLength = (ulong)nd.size; var size = c_api.TF_StringEncodedSize(bytesLength); - var handle = TF_AllocateTensor(TF_DataType.TF_STRING, IntPtr.Zero, 0, (UIntPtr) ((ulong) size + 8)); + var handle = TF_AllocateTensor(TF_DataType.TF_STRING, null, 0, size + 8); AllocationType = AllocationType.Tensorflow; IntPtr tensor = c_api.TF_TensorData(handle); Marshal.WriteInt64(tensor, 0); - c_api.TF_StringEncode((byte*) nd.Unsafe.Address, bytesLength, (sbyte*) (tensor + sizeof(long)), size, tf.status.Handle); + c_api.TF_StringEncode((byte*) nd.Unsafe.Address, bytesLength, (byte*) (tensor + sizeof(long)), size, tf.status.Handle); tf.status.Check(true); _handle = handle; } else { var buffer = nd.ToArray(); var size = c_api.TF_StringEncodedSize((ulong)buffer.Length); - var handle = TF_AllocateTensor(TF_DataType.TF_STRING, IntPtr.Zero, 0, (UIntPtr) ((ulong) size + 8)); + var handle = TF_AllocateTensor(TF_DataType.TF_STRING, null, 0, size + 8); AllocationType = AllocationType.Tensorflow; IntPtr tensor = c_api.TF_TensorData(handle); Marshal.WriteInt64(tensor, 0); fixed (byte* src = buffer) - c_api.TF_StringEncode(src, (ulong)buffer.Length, (sbyte*) (tensor + sizeof(Int64)), size, tf.status.Handle); + c_api.TF_StringEncode(src, (ulong)buffer.Length, (byte*) (tensor + sizeof(Int64)), size, tf.status.Handle); tf.status.Check(true); _handle = handle; @@ -543,7 +579,7 @@ namespace Tensorflow int totalSize = size + buffer.Length * 8; ulong offset = 0; - IntPtr handle = TF_AllocateTensor(TF_DataType.TF_STRING, shape, shape.Length, (UIntPtr) totalSize); + IntPtr handle = TF_AllocateTensor(TF_DataType.TF_STRING, shape, shape.Length, (ulong)totalSize); AllocationType = AllocationType.Tensorflow; // Clear offset table @@ -557,7 +593,7 @@ namespace Tensorflow { fixed (byte* src = &buffer[i][0]) { - var written = TF_StringEncode(src, (ulong)buffer[i].Length, (sbyte*)dst, (ulong)(dstLimit.ToInt64() - dst.ToInt64()), status.Handle); + var written = TF_StringEncode(src, (ulong)buffer[i].Length, (byte*)dst, (ulong)(dstLimit.ToInt64() - dst.ToInt64()), status.Handle); status.Check(true); pOffset += 8; dst += (int) written; @@ -609,7 +645,7 @@ namespace Tensorflow Marshal.WriteInt64(tensor, 0); fixed (byte* src = buffer) - c_api.TF_StringEncode(src, (ulong)buffer.Length, (sbyte*)(tensor + sizeof(long)), size, tf.status.Handle); + c_api.TF_StringEncode(src, (ulong)buffer.Length, (byte*)(tensor + sizeof(long)), size, tf.status.Handle); tf.status.Check(true); return handle; diff --git a/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs b/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs index ee249de1..d5efb75d 100644 --- a/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs +++ b/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs @@ -30,15 +30,9 @@ namespace Tensorflow /// int /// size_t /// - [DllImport(TensorFlowLibName)] - public static extern IntPtr TF_AllocateTensor(TF_DataType dtype, IntPtr dims, int num_dims, UIntPtr len); - [DllImport(TensorFlowLibName)] public static extern IntPtr TF_AllocateTensor(TF_DataType dtype, long[] dims, int num_dims, ulong len); - [DllImport(TensorFlowLibName)] - public static extern IntPtr TF_AllocateTensor(TF_DataType dtype, long[] dims, int num_dims, UIntPtr len); - /// /// returns the sizeof() for the underlying type corresponding to the given TF_DataType enum value. /// @@ -185,7 +179,7 @@ namespace Tensorflow /// TF_Status* /// On success returns the size in bytes of the encoded string. [DllImport(TensorFlowLibName)] - public static extern unsafe ulong TF_StringEncode(byte* src, ulong src_len, sbyte* dst, ulong dst_len, SafeStatusHandle status); + public static extern unsafe ulong TF_StringEncode(byte* src, ulong src_len, byte* dst, ulong dst_len, SafeStatusHandle status); /// /// Decode a string encoded using TF_StringEncode. diff --git a/src/TensorFlowNET.Core/Tensors/constant_op.cs b/src/TensorFlowNET.Core/Tensors/constant_op.cs index 0149ae1b..2ed21e65 100644 --- a/src/TensorFlowNET.Core/Tensors/constant_op.cs +++ b/src/TensorFlowNET.Core/Tensors/constant_op.cs @@ -146,6 +146,8 @@ namespace Tensorflow return new EagerTensor(val, ctx.device_name); case string val: return new EagerTensor(val, ctx.device_name); + case string[] val: + return new EagerTensor(val, ctx.device_name); case bool val: return new EagerTensor(val, ctx.device_name); case byte val: diff --git a/src/TensorFlowNET.Core/tensorflow.memory.cs b/src/TensorFlowNET.Core/tensorflow.memory.cs index c442c1c0..764a38ed 100644 --- a/src/TensorFlowNET.Core/tensorflow.memory.cs +++ b/src/TensorFlowNET.Core/tensorflow.memory.cs @@ -40,6 +40,10 @@ namespace Tensorflow public unsafe void memcpy(IntPtr dst, T[] src, ulong size) where T : unmanaged { + if (src.Length == 0) return; + + size = size * (ulong)sizeof(T); + fixed (void* p = &src[0]) System.Buffer.MemoryCopy(p, dst.ToPointer(), size, size); } diff --git a/test/TensorFlowNET.UnitTest/TF_API/StringsApiTest.cs b/test/TensorFlowNET.UnitTest/TF_API/StringsApiTest.cs index 314e57fb..538936b3 100644 --- a/test/TensorFlowNET.UnitTest/TF_API/StringsApiTest.cs +++ b/test/TensorFlowNET.UnitTest/TF_API/StringsApiTest.cs @@ -47,5 +47,19 @@ namespace Tensorflow.UnitTest.TF_API var result = math_ops.equal(substr, jpg_tensor); } + + [TestMethod] + public void StringArray() + { + var strings = new[] { "map_and_batch_fusion", "noop_elimination", "shuffle_and_repeat_fusion" }; + var tensor = tf.constant(strings, dtype: tf.@string, name: "optimizations"); + tensor.ToString(); + var stringData = tensor.StringData(); + + Assert.AreEqual(3, tensor.shape[0]); + Assert.AreEqual(strings[0], stringData[0]); + Assert.AreEqual(strings[1], stringData[1]); + Assert.AreEqual(strings[2], stringData[2]); + } } }