diff --git a/src/TensorFlowNET.Core/APIs/tf.strings.cs b/src/TensorFlowNET.Core/APIs/tf.strings.cs index f580a67d..59b89832 100644 --- a/src/TensorFlowNET.Core/APIs/tf.strings.cs +++ b/src/TensorFlowNET.Core/APIs/tf.strings.cs @@ -64,6 +64,9 @@ namespace Tensorflow public Tensor substr(string input, int pos, int len, string name = null, string @uint = "BYTE") => ops.substr(input, pos, len, @uint: @uint, name: name); + + public Tensor split(Tensor input, string sep = "", int maxsplit = -1, string name = null) + => ops.string_split_v2(input, sep: sep, maxsplit : maxsplit, name : name); } } } diff --git a/src/TensorFlowNET.Core/Data/DatasetV2.cs b/src/TensorFlowNET.Core/Data/DatasetV2.cs index c4298bc2..a8033802 100644 --- a/src/TensorFlowNET.Core/Data/DatasetV2.cs +++ b/src/TensorFlowNET.Core/Data/DatasetV2.cs @@ -68,6 +68,17 @@ namespace Tensorflow public IDatasetV2 map(Func map_func, int num_parallel_calls) => new ParallelMapDataset(this, map_func, num_parallel_calls: num_parallel_calls); + public OwnedIterator make_one_shot_iterator() + { + if (tf.Context.executing_eagerly()) + { + // with ops.colocate_with(self._variant_tensor) + return new OwnedIterator(this); + } + + throw new NotImplementedException(""); + } + public IDatasetV2 flat_map(Func map_func) => new FlatMapDataset(this, map_func); diff --git a/src/TensorFlowNET.Core/Data/IDatasetV2.cs b/src/TensorFlowNET.Core/Data/IDatasetV2.cs index d0e372dc..9ce392d9 100644 --- a/src/TensorFlowNET.Core/Data/IDatasetV2.cs +++ b/src/TensorFlowNET.Core/Data/IDatasetV2.cs @@ -72,6 +72,8 @@ namespace Tensorflow IDatasetV2 map(Func map_func, int num_parallel_calls); + OwnedIterator make_one_shot_iterator(); + IDatasetV2 flat_map(Func map_func); IDatasetV2 model(AutotuneAlgorithm algorithm, long cpu_budget); diff --git a/src/TensorFlowNET.Core/Data/OwnedIterator.cs b/src/TensorFlowNET.Core/Data/OwnedIterator.cs index 571e79a6..0a955929 100644 --- a/src/TensorFlowNET.Core/Data/OwnedIterator.cs +++ b/src/TensorFlowNET.Core/Data/OwnedIterator.cs @@ -26,6 +26,7 @@ namespace Tensorflow dataset = dataset.apply_options(); _dataset = dataset; _element_spec = dataset.element_spec; + // _flat_output_types = (_iterator_resource, _deleter) = ops.anonymous_iterator_v2(_dataset.output_types, _dataset.output_shapes); ops.make_iterator(dataset.variant_tensor, _iterator_resource); } diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Preprocessing/TextVectorizationArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Preprocessing/TextVectorizationArgs.cs index ab55da4e..ddeadc00 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Preprocessing/TextVectorizationArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Preprocessing/TextVectorizationArgs.cs @@ -11,5 +11,6 @@ namespace Tensorflow.Keras.ArgsDefinition public int MaxTokens { get; set; } = -1; public string OutputMode { get; set; } = "int"; public int OutputSequenceLength { get; set; } = -1; + public string[] Vocabulary { get; set; } } } diff --git a/src/TensorFlowNET.Core/Operations/string_ops.cs b/src/TensorFlowNET.Core/Operations/string_ops.cs index 0bd32e7f..efeb0997 100644 --- a/src/TensorFlowNET.Core/Operations/string_ops.cs +++ b/src/TensorFlowNET.Core/Operations/string_ops.cs @@ -41,5 +41,10 @@ namespace Tensorflow string @uint = "BYTE", string name = null) => tf.Context.ExecuteOp("Substr", name, new ExecuteOpArgs(input, pos, len) .SetAttributes(new { unit = @uint })); + + public Tensor string_split_v2(Tensor input, string sep = "", int maxsplit = -1, string name = null) + { + return null; + } } } diff --git a/src/TensorFlowNET.Keras/Engine/CombinerPreprocessingLayer.cs b/src/TensorFlowNET.Keras/Engine/CombinerPreprocessingLayer.cs index 11adfe9f..2e564480 100644 --- a/src/TensorFlowNET.Keras/Engine/CombinerPreprocessingLayer.cs +++ b/src/TensorFlowNET.Keras/Engine/CombinerPreprocessingLayer.cs @@ -8,11 +8,23 @@ namespace Tensorflow.Keras.Engine public class CombinerPreprocessingLayer : Layer { PreprocessingLayerArgs args; + protected ICombiner combiner; + protected bool _previously_updated; public CombinerPreprocessingLayer(PreprocessingLayerArgs args) : base(args) { - + _previously_updated = false; + } + + public virtual void adapt(IDatasetV2 data, bool reset_state = true) + { + IAccumulator accumulator; + if (!reset_state) + accumulator = combiner.Restore(); + + var next_data = data.make_one_shot_iterator(); + var data_element = next_data.next(); } } } diff --git a/src/TensorFlowNET.Keras/Engine/Interfaces/IAccumulator.cs b/src/TensorFlowNET.Keras/Engine/Interfaces/IAccumulator.cs new file mode 100644 index 00000000..df819839 --- /dev/null +++ b/src/TensorFlowNET.Keras/Engine/Interfaces/IAccumulator.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.Engine +{ + public interface IAccumulator + { + } +} diff --git a/src/TensorFlowNET.Keras/Engine/Interfaces/ICombiner.cs b/src/TensorFlowNET.Keras/Engine/Interfaces/ICombiner.cs new file mode 100644 index 00000000..8fe1764d --- /dev/null +++ b/src/TensorFlowNET.Keras/Engine/Interfaces/ICombiner.cs @@ -0,0 +1,19 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.Engine +{ + /// + /// Functional object that defines a shardable computation. + /// + public interface ICombiner + { + void Compute(Tensor values, IAccumulator accumulator = null); + void Merge(); + void Extract(); + IAccumulator Restore(); + void Serialize(); + void Deserialize(); + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Preprocessing/IndexLookup.cs b/src/TensorFlowNET.Keras/Layers/Preprocessing/IndexLookup.cs new file mode 100644 index 00000000..5e02f562 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Preprocessing/IndexLookup.cs @@ -0,0 +1,30 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Keras.Engine; + +namespace Tensorflow.Keras.Layers +{ + public class IndexLookup : CombinerPreprocessingLayer + { + public IndexLookup(int max_tokens = -1, + int num_oov_indices = 1, + string mask_token = "", + string oov_token = "[UNK]", + string encoding = "utf-8", + bool invert = false) : base(new PreprocessingLayerArgs()) + { + var num_mask_tokens = mask_token == null ? 0 : 1; + var vocab_size = max_tokens - (num_oov_indices + num_mask_tokens); + combiner = new IndexLookupCombiner(vocab_size, mask_token); + } + + public override void adapt(IDatasetV2 data, bool reset_state = true) + { + if (!reset_state) + throw new ValueError("IndexLookup does not support streaming adapts."); + base.adapt(data, reset_state); + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Preprocessing/IndexLookupAccumulator.cs b/src/TensorFlowNET.Keras/Layers/Preprocessing/IndexLookupAccumulator.cs new file mode 100644 index 00000000..e2de669d --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Preprocessing/IndexLookupAccumulator.cs @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Keras.Engine; + +namespace Tensorflow.Keras.Layers +{ + public class IndexLookupAccumulator : IAccumulator + { + public Dictionary CountDict { get; set; } + public IndexLookupAccumulator() + { + CountDict = new Dictionary(); + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Preprocessing/IndexLookupCombiner.cs b/src/TensorFlowNET.Keras/Layers/Preprocessing/IndexLookupCombiner.cs new file mode 100644 index 00000000..ac4c5dc9 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Preprocessing/IndexLookupCombiner.cs @@ -0,0 +1,55 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Keras.Engine; + +namespace Tensorflow.Keras.Layers +{ + /// + /// Combiner for the IndexLookup preprocessing layer. + /// + public class IndexLookupCombiner : ICombiner + { + int _vocab_size; + string _mask_value; + + public IndexLookupCombiner(int vocab_size = -1, string mask_value = null) + { + _vocab_size = vocab_size; + _mask_value = mask_value; + } + + public void Compute(Tensor values, IAccumulator accumulator = null) + { + if(accumulator == null) + { + accumulator = new IndexLookupAccumulator(); + } + } + + public void Deserialize() + { + throw new NotImplementedException(); + } + + public void Extract() + { + throw new NotImplementedException(); + } + + public void Merge() + { + throw new NotImplementedException(); + } + + public IAccumulator Restore() + { + throw new NotImplementedException(); + } + + public void Serialize() + { + throw new NotImplementedException(); + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Preprocessing/StringLookup.cs b/src/TensorFlowNET.Keras/Layers/Preprocessing/StringLookup.cs new file mode 100644 index 00000000..616af1c6 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Preprocessing/StringLookup.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.Layers +{ + /// + /// Maps strings from a vocabulary to integer indices. + /// + class StringLookup : IndexLookup + { + public StringLookup(int max_tokens = -1, + int num_oov_indices = 1, + string mask_token = "", + string[] vocabulary = null, + string oov_token = "[UNK]", + string encoding = "utf-8", + bool invert = false) + { + + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Preprocessing/TextVectorization.cs b/src/TensorFlowNET.Keras/Layers/Preprocessing/TextVectorization.cs index c72860a6..d2966213 100644 --- a/src/TensorFlowNET.Keras/Layers/Preprocessing/TextVectorization.cs +++ b/src/TensorFlowNET.Keras/Layers/Preprocessing/TextVectorization.cs @@ -3,12 +3,14 @@ using System.Collections.Generic; using System.Text; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; +using static Tensorflow.Binding; namespace Tensorflow.Keras.Layers { public class TextVectorization : CombinerPreprocessingLayer { TextVectorizationArgs args; + IndexLookup _index_lookup_layer; public TextVectorization(TextVectorizationArgs args) : base(args) @@ -16,6 +18,11 @@ namespace Tensorflow.Keras.Layers this.args = args; args.DType = TF_DataType.TF_STRING; // string standardize = "lower_and_strip_punctuation", + + var mask_token = args.OutputMode == "int" ? "" : null; + _index_lookup_layer = new StringLookup(max_tokens: args.MaxTokens, + mask_token: mask_token, + vocabulary: args.Vocabulary); } /// @@ -23,13 +30,14 @@ namespace Tensorflow.Keras.Layers /// /// /// - public void adapt(IDatasetV2 data, bool reset_state = true) + public override void adapt(IDatasetV2 data, bool reset_state = true) { var shape = data.output_shapes[0]; if (shape.rank == 1) data = data.map(tensor => array_ops.expand_dims(tensor, -1)); build(data.variant_tensor); var preprocessed_inputs = data.map(_preprocess); + _index_lookup_layer.adapt(preprocessed_inputs); } protected override void build(Tensors inputs) @@ -45,6 +53,8 @@ namespace Tensorflow.Keras.Layers { if (inputs.shape.ndim > 1) inputs = array_ops.squeeze(inputs, axis: new[] { -1 }); + if (args.Split == "whitespace") + inputs = tf.strings.split(inputs); } return inputs; } diff --git a/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.index_directory.cs b/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.index_directory.cs index 6b62b9b2..03c9f8d1 100644 --- a/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.index_directory.cs +++ b/src/TensorFlowNET.Keras/Preprocessings/DatasetUtils.index_directory.cs @@ -1,4 +1,5 @@ using NumSharp; +using System; using System.Collections.Generic; using System.IO; using System.Linq; @@ -60,6 +61,7 @@ namespace Tensorflow.Keras.Preprocessings } } + Console.WriteLine($"Found {return_file_paths.Length} files belonging to {class_names.Length} classes."); return (return_file_paths, return_labels, class_names); } } diff --git a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj index f8ee6b79..6325707d 100644 --- a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj +++ b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj @@ -63,10 +63,6 @@ Keras is an API designed for human beings, not machines. Keras follows best prac - - - -