From 031670b168ad13f66054d692f91c2d698dc8c0dc Mon Sep 17 00:00:00 2001 From: haiping008 Date: Wed, 27 Feb 2019 14:41:47 -0600 Subject: [PATCH] fix keras.preprocessing.sequence --- TensorFlow.NET.sln | 10 +-- .../APIs/keras.preprocessing.cs | 15 ++++ src/TensorFlowNET.Core/Keras/Engine/Layer.cs | 17 ++++ src/TensorFlowNET.Core/Keras/Engine/Model.cs | 10 +++ .../Keras/Engine/Network.cs | 10 +++ .../Keras/Engine/Sequential.cs | 24 ++++++ src/TensorFlowNET.Core/Keras/Preprocessing.cs | 11 +++ src/TensorFlowNET.Core/Keras/Sequence.cs | 77 +++++++++++++++++++ .../TensorFlowNET.Core.csproj | 2 +- .../Checkpointable/CheckpointableBase.cs | 10 +++ .../TensorFlowNET.Examples.csproj | 2 +- .../TextClassificationWithMovieReviews.cs | 56 +++++++++----- 12 files changed, 216 insertions(+), 28 deletions(-) create mode 100644 src/TensorFlowNET.Core/APIs/keras.preprocessing.cs create mode 100644 src/TensorFlowNET.Core/Keras/Engine/Layer.cs create mode 100644 src/TensorFlowNET.Core/Keras/Engine/Model.cs create mode 100644 src/TensorFlowNET.Core/Keras/Engine/Network.cs create mode 100644 src/TensorFlowNET.Core/Keras/Engine/Sequential.cs create mode 100644 src/TensorFlowNET.Core/Keras/Preprocessing.cs create mode 100644 src/TensorFlowNET.Core/Keras/Sequence.cs create mode 100644 src/TensorFlowNET.Core/Train/Checkpointable/CheckpointableBase.cs diff --git a/TensorFlow.NET.sln b/TensorFlow.NET.sln index 7f124824..4aae6e0d 100644 --- a/TensorFlow.NET.sln +++ b/TensorFlow.NET.sln @@ -13,7 +13,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TensorFlowNET.Utility", "sr EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TensorFlowNET.Visualization", "TensorFlowNET.Visualization\TensorFlowNET.Visualization.csproj", "{4BB2ABD1-635E-41E4-B534-CB5B6A2D754D}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "KerasNET.Core", "..\Keras.NET\src\KerasNET.Core\KerasNET.Core.csproj", "{E2F0C39C-D706-4CF5-AE00-81FB447F949D}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NumSharp.Core", "..\NumSharp\src\NumSharp.Core\NumSharp.Core.csproj", "{0AB4662E-7E3C-455F-BF0C-23D56CBE74F3}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -41,10 +41,10 @@ Global {4BB2ABD1-635E-41E4-B534-CB5B6A2D754D}.Debug|Any CPU.Build.0 = Debug|Any CPU {4BB2ABD1-635E-41E4-B534-CB5B6A2D754D}.Release|Any CPU.ActiveCfg = Release|Any CPU {4BB2ABD1-635E-41E4-B534-CB5B6A2D754D}.Release|Any CPU.Build.0 = Release|Any CPU - {E2F0C39C-D706-4CF5-AE00-81FB447F949D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {E2F0C39C-D706-4CF5-AE00-81FB447F949D}.Debug|Any CPU.Build.0 = Debug|Any CPU - {E2F0C39C-D706-4CF5-AE00-81FB447F949D}.Release|Any CPU.ActiveCfg = Release|Any CPU - {E2F0C39C-D706-4CF5-AE00-81FB447F949D}.Release|Any CPU.Build.0 = Release|Any CPU + {0AB4662E-7E3C-455F-BF0C-23D56CBE74F3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0AB4662E-7E3C-455F-BF0C-23D56CBE74F3}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0AB4662E-7E3C-455F-BF0C-23D56CBE74F3}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0AB4662E-7E3C-455F-BF0C-23D56CBE74F3}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/TensorFlowNET.Core/APIs/keras.preprocessing.cs b/src/TensorFlowNET.Core/APIs/keras.preprocessing.cs new file mode 100644 index 00000000..97363bfb --- /dev/null +++ b/src/TensorFlowNET.Core/APIs/keras.preprocessing.cs @@ -0,0 +1,15 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Keras; +using Tensorflow.Keras.Engine; + +namespace Tensorflow +{ + public static partial class keras + { + public static Preprocessing preprocessing => new Preprocessing(); + public static Sequence sequence = new Sequence(); + public static Sequential Sequential() => new Sequential(); + } +} diff --git a/src/TensorFlowNET.Core/Keras/Engine/Layer.cs b/src/TensorFlowNET.Core/Keras/Engine/Layer.cs new file mode 100644 index 00000000..aa53c0ae --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Engine/Layer.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.Engine +{ + /// + /// Base layer class. + /// A layer is a class implementing common neural networks operations, such + /// as convolution, batch norm, etc. These operations require managing weights, + /// losses, updates, and inter-layer connectivity. + /// + public class Layer : CheckpointableBase + { + + } +} diff --git a/src/TensorFlowNET.Core/Keras/Engine/Model.cs b/src/TensorFlowNET.Core/Keras/Engine/Model.cs new file mode 100644 index 00000000..a0ad4a53 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Engine/Model.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.Engine +{ + internal class Model : Network + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/Engine/Network.cs b/src/TensorFlowNET.Core/Keras/Engine/Network.cs new file mode 100644 index 00000000..6eff46c4 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Engine/Network.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.Engine +{ + public class Network : Layer + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/Engine/Sequential.cs b/src/TensorFlowNET.Core/Keras/Engine/Sequential.cs new file mode 100644 index 00000000..d3762bfb --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Engine/Sequential.cs @@ -0,0 +1,24 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.Engine +{ + public class Sequential : Network, IPython + { + public void Dispose() + { + throw new NotImplementedException(); + } + + public void __enter__() + { + throw new NotImplementedException(); + } + + public void __exit__() + { + throw new NotImplementedException(); + } + } +} diff --git a/src/TensorFlowNET.Core/Keras/Preprocessing.cs b/src/TensorFlowNET.Core/Keras/Preprocessing.cs new file mode 100644 index 00000000..81148f58 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Preprocessing.cs @@ -0,0 +1,11 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras +{ + public class Preprocessing + { + public Sequence sequence => new Sequence(); + } +} diff --git a/src/TensorFlowNET.Core/Keras/Sequence.cs b/src/TensorFlowNET.Core/Keras/Sequence.cs new file mode 100644 index 00000000..02638035 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Sequence.cs @@ -0,0 +1,77 @@ +using NumSharp.Core; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; + +namespace Tensorflow.Keras +{ + public class Sequence + { + /// + /// Pads sequences to the same length. + /// https://keras.io/preprocessing/sequence/ + /// https://faroit.github.io/keras-docs/1.2.0/preprocessing/sequence/ + /// + /// List of lists, where each element is a sequence. + /// Int, maximum length of all sequences. + /// Type of the output sequences. + /// String, 'pre' or 'post': + /// String, 'pre' or 'post' + /// Float or String, padding value. + /// + public NDArray pad_sequences(NDArray sequences, + int? maxlen = null, + string dtype = "int32", + string padding = "pre", + string truncating = "pre", + object value = null) + { + int[] length = new int[sequences.size]; + switch (sequences.dtype.Name) + { + case "Object": + for (int i = 0; i < sequences.size; i++) + { + switch (sequences.Data(i)) + { + case string data: + length[i] = Regex.Matches(data, ",").Count; + break; + } + } + break; + case "Int32": + for (int i = 0; i < sequences.size; i++) + length[i] = Regex.Matches(sequences.Data(i).ToString(), ",").Count; + break; + default: + throw new NotImplementedException($"pad_sequences: {sequences.dtype.Name}"); + } + + if (maxlen == null) + maxlen = length.Max(); + + if (value == null) + value = 0f; + + var nd = new NDArray(np.int32, new Shape(sequences.size, maxlen.Value)); + for (int i = 0; i < nd.shape[0]; i++) + { + switch(sequences[i]) + { + case int[] data: + for (int j = 0; j < nd.shape[1]; j++) + nd[i, j] = j < data.Length ? data[j] : value; + break; + default: + throw new NotImplementedException("pad_sequences"); + } + } + + return nd; + } + } +} diff --git a/src/TensorFlowNET.Core/TensorFlowNET.Core.csproj b/src/TensorFlowNET.Core/TensorFlowNET.Core.csproj index ae1c5b8b..01448769 100644 --- a/src/TensorFlowNET.Core/TensorFlowNET.Core.csproj +++ b/src/TensorFlowNET.Core/TensorFlowNET.Core.csproj @@ -52,7 +52,7 @@ Docs: https://tensorflownet.readthedocs.io - + diff --git a/src/TensorFlowNET.Core/Train/Checkpointable/CheckpointableBase.cs b/src/TensorFlowNET.Core/Train/Checkpointable/CheckpointableBase.cs new file mode 100644 index 00000000..632278fe --- /dev/null +++ b/src/TensorFlowNET.Core/Train/Checkpointable/CheckpointableBase.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow +{ + public class CheckpointableBase + { + } +} diff --git a/test/TensorFlowNET.Examples/TensorFlowNET.Examples.csproj b/test/TensorFlowNET.Examples/TensorFlowNET.Examples.csproj index d5986423..34a29361 100644 --- a/test/TensorFlowNET.Examples/TensorFlowNET.Examples.csproj +++ b/test/TensorFlowNET.Examples/TensorFlowNET.Examples.csproj @@ -11,7 +11,7 @@ - + diff --git a/test/TensorFlowNET.Examples/TextClassificationWithMovieReviews.cs b/test/TensorFlowNET.Examples/TextClassificationWithMovieReviews.cs index 3b17be10..b57da319 100644 --- a/test/TensorFlowNET.Examples/TextClassificationWithMovieReviews.cs +++ b/test/TensorFlowNET.Examples/TextClassificationWithMovieReviews.cs @@ -1,12 +1,10 @@ using System; using System.Collections.Generic; using System.IO; -using System.Text; using Tensorflow; using NumSharp.Core; using Newtonsoft.Json; using System.Linq; -using Keras; using System.Text.RegularExpressions; namespace TensorFlowNET.Examples @@ -24,11 +22,21 @@ namespace TensorFlowNET.Examples // A dictionary mapping words to an integer index var word_index = GetWordIndex(); - + train_data = keras.preprocessing.sequence.pad_sequences(train_data, value: word_index[""], padding: "post", maxlen: 256); + + test_data = keras.preprocessing.sequence.pad_sequences(test_data, + value: word_index[""], + padding: "post", + maxlen: 256); + + // input shape is the vocabulary count used for the movie reviews (10,000 words) + int vocab_size = 10000; + + var model = keras.Sequential(); } private ((NDArray, NDArray), (NDArray, NDArray)) PrepareData() @@ -46,42 +54,48 @@ namespace TensorFlowNET.Examples var x_train = ReadData(Path.Join(dir, "x_train.txt")); var labels_train = ReadData(Path.Join(dir, "y_train.txt")); var indices_train = ReadData(Path.Join(dir, "indices_train.txt")); - // x_train = x_train[indices_train]; - // labels_train = labels_train[indices_train]; + x_train = x_train[indices_train]; + labels_train = labels_train[indices_train]; var x_test = ReadData(Path.Join(dir, "x_test.txt")); var labels_test = ReadData(Path.Join(dir, "y_test.txt")); var indices_test = ReadData(Path.Join(dir, "indices_test.txt")); - // x_test = x_test[indices_test]; - // labels_test = labels_test[indices_test]; + x_test = x_test[indices_test]; + labels_test = labels_test[indices_test]; // not completed - /*var xs = x_train.hstack(x_test); + var xs = x_train.hstack(x_test); var labels = labels_train.hstack(labels_test); var idx = x_train.size; var y_train = labels_train; var y_test = labels_test; - return ((x_train, y_train), (x_test, y_test));*/ - - throw new NotImplementedException(); + return ((x_train, y_train), (x_test, y_test)); } - private int[][] ReadData(string file) + private NDArray ReadData(string file) { - var lines = new List(); + var lines = File.ReadAllLines(file); + var nd = new NDArray(lines[0].StartsWith("[") ? typeof(object) : np.int32, new Shape(lines.Length)); - foreach(var line in File.ReadAllLines(file)) + if (lines[0].StartsWith("[")) { - var matches = Regex.Matches(line, @"\d+,*"); - var data = new int[matches.Count]; - for (int i = 0; i < data.Length; i++) - data[i] = Convert.ToInt32(matches[i].Value.Trim(',')); - lines.Add(data.ToArray()); + for (int i = 0; i < lines.Length; i++) + { + var matches = Regex.Matches(lines[i], @"\d+\s*"); + var data = new int[matches.Count]; + for (int j = 0; j < data.Length; j++) + data[j] = Convert.ToInt32(matches[j].Value); + nd[i] = data.ToArray(); + } } - - return lines.ToArray(); + else + { + for (int i = 0; i < lines.Length; i++) + nd[i] = Convert.ToInt32(lines[i]); + } + return nd; } private Dictionary GetWordIndex()