diff --git a/src/TensorFlowNET.Core/Extensions/DictionaryExtension.cs b/src/TensorFlowNET.Core/Common/Extensions/DictionaryExtension.cs similarity index 100% rename from src/TensorFlowNET.Core/Extensions/DictionaryExtension.cs rename to src/TensorFlowNET.Core/Common/Extensions/DictionaryExtension.cs diff --git a/src/TensorFlowNET.Core/Extensions/JObjectExtensions.cs b/src/TensorFlowNET.Core/Common/Extensions/JObjectExtensions.cs similarity index 80% rename from src/TensorFlowNET.Core/Extensions/JObjectExtensions.cs rename to src/TensorFlowNET.Core/Common/Extensions/JObjectExtensions.cs index 2e758dbf..6ceba445 100644 --- a/src/TensorFlowNET.Core/Extensions/JObjectExtensions.cs +++ b/src/TensorFlowNET.Core/Common/Extensions/JObjectExtensions.cs @@ -3,16 +3,16 @@ using System; using System.Collections.Generic; using System.Text; -namespace Tensorflow.Extensions +namespace Tensorflow.Common.Extensions { public static class JObjectExtensions { public static T? TryGetOrReturnNull(this JObject obj, string key) { var res = obj[key]; - if(res is null) + if (res is null) { - return default(T); + return default; } else { diff --git a/src/TensorFlowNET.Core/Common/Extensions/LinqExtensions.cs b/src/TensorFlowNET.Core/Common/Extensions/LinqExtensions.cs new file mode 100644 index 00000000..0402fca0 --- /dev/null +++ b/src/TensorFlowNET.Core/Common/Extensions/LinqExtensions.cs @@ -0,0 +1,26 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Tensorflow.Common.Extensions +{ + public static class LinqExtensions + { +#if NETSTANDARD2_0 + public static IEnumerable TakeLast(this IEnumerable sequence, int count) + { + return sequence.Skip(sequence.Count() - count); + } + + public static IEnumerable SkipLast(this IEnumerable sequence, int count) + { + return sequence.Take(sequence.Count() - count); + } +#endif + public static Tensors ToTensors(this IEnumerable tensors) + { + return new Tensors(tensors); + } + } +} diff --git a/src/TensorFlowNET.Core/Extensions/OneofExtension.cs b/src/TensorFlowNET.Core/Common/Extensions/OneofExtension.cs similarity index 100% rename from src/TensorFlowNET.Core/Extensions/OneofExtension.cs rename to src/TensorFlowNET.Core/Common/Extensions/OneofExtension.cs diff --git a/src/TensorFlowNET.Core/Common/Types/GeneralizedTensorShape.cs b/src/TensorFlowNET.Core/Common/Types/GeneralizedTensorShape.cs new file mode 100644 index 00000000..edb9a802 --- /dev/null +++ b/src/TensorFlowNET.Core/Common/Types/GeneralizedTensorShape.cs @@ -0,0 +1,79 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Text; + +namespace Tensorflow.Common.Types +{ + public class GeneralizedTensorShape: IEnumerable + { + public TensorShapeConfig[] Shapes { get; set; } + /// + /// create a single-dim generalized Tensor shape. + /// + /// + public GeneralizedTensorShape(int dim) + { + Shapes = new TensorShapeConfig[] { new TensorShapeConfig() { Items = new long?[] { dim } } }; + } + + public GeneralizedTensorShape(Shape shape) + { + Shapes = new TensorShapeConfig[] { shape }; + } + + public GeneralizedTensorShape(TensorShapeConfig shape) + { + Shapes = new TensorShapeConfig[] { shape }; + } + + public GeneralizedTensorShape(TensorShapeConfig[] shapes) + { + Shapes = shapes; + } + + public GeneralizedTensorShape(IEnumerable shape) + { + Shapes = shape.Select(x => (TensorShapeConfig)x).ToArray(); + } + + public Shape ToSingleShape() + { + if (Shapes.Length != 1) + { + throw new ValueError("The generalized shape contains more than 1 dim."); + } + var shape_config = Shapes[0]; + Debug.Assert(shape_config is not null); + return new Shape(shape_config.Items.Select(x => x is null ? -1 : x.Value).ToArray()); + } + + public long ToNumber() + { + if(Shapes.Length != 1 || Shapes[0].Items.Length != 1) + { + throw new ValueError("The generalized shape contains more than 1 dim."); + } + var res = Shapes[0].Items[0]; + return res is null ? -1 : res.Value; + } + + public Shape[] ToShapeArray() + { + return Shapes.Select(x => new Shape(x.Items.Select(y => y is null ? -1 : y.Value).ToArray())).ToArray(); + } + + public IEnumerator GetEnumerator() + { + foreach (var shape in Shapes) + { + yield return shape.Items; + } + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } +} diff --git a/src/TensorFlowNET.Core/Common/Types/IOptionalArgs.cs b/src/TensorFlowNET.Core/Common/Types/IOptionalArgs.cs new file mode 100644 index 00000000..427e71aa --- /dev/null +++ b/src/TensorFlowNET.Core/Common/Types/IOptionalArgs.cs @@ -0,0 +1,21 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Common.Types +{ + /// + /// This interface is used when some corresponding python methods have optional args. + /// For example, `Keras.Layer.Apply` generally takes three args as the inputs, while + /// `Keras.Layer.RNN` takes more. Then when calling RNN, you should add `RnnOptionalArgs` + /// as the parameter of the method. + /// + public interface IOptionalArgs + { + /// + /// The identifier of the class. It is not an argument but only something to + /// separate different OptionalArgs. + /// + string Identifier { get; } + } +} diff --git a/src/TensorFlowNET.Core/Extensions/NamedTuple.cs b/src/TensorFlowNET.Core/Common/Types/NamedTuple.cs similarity index 100% rename from src/TensorFlowNET.Core/Extensions/NamedTuple.cs rename to src/TensorFlowNET.Core/Common/Types/NamedTuple.cs diff --git a/src/TensorFlowNET.Core/Keras/Saving/TensorShapeConfig.cs b/src/TensorFlowNET.Core/Common/Types/TensorShapeConfig.cs similarity index 95% rename from src/TensorFlowNET.Core/Keras/Saving/TensorShapeConfig.cs rename to src/TensorFlowNET.Core/Common/Types/TensorShapeConfig.cs index 7abcfde2..a36930ec 100644 --- a/src/TensorFlowNET.Core/Keras/Saving/TensorShapeConfig.cs +++ b/src/TensorFlowNET.Core/Common/Types/TensorShapeConfig.cs @@ -3,7 +3,7 @@ using System; using System.Collections.Generic; using System.Linq; -namespace Tensorflow.Keras.Saving +namespace Tensorflow.Common.Types { public class TensorShapeConfig { diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs index 2585592c..ed5a1d6d 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RNNArgs.cs @@ -1,17 +1,15 @@ using Newtonsoft.Json; using System.Collections.Generic; +using Tensorflow.Keras.Layers.Rnn; namespace Tensorflow.Keras.ArgsDefinition.Rnn { + // TODO(Rinne): add regularizers. public class RNNArgs : AutoSerializeLayerArgs { - public interface IRnnArgCell : ILayer - { - object state_size { get; } - } [JsonProperty("cell")] // TODO: the cell should be serialized with `serialize_keras_object`. - public IRnnArgCell Cell { get; set; } = null; + public IRnnCell Cell { get; set; } = null; [JsonProperty("return_sequences")] public bool ReturnSequences { get; set; } = false; [JsonProperty("return_state")] @@ -34,6 +32,9 @@ namespace Tensorflow.Keras.ArgsDefinition.Rnn public IInitializer KernelInitializer { get; set; } public IInitializer RecurrentInitializer { get; set; } public IInitializer BiasInitializer { get; set; } + public float Dropout { get; set; } = .0f; + public bool ZeroOutputForMask { get; set; } = false; + public float RecurrentDropout { get; set; } = .0f; // kernel_regularizer=None, // recurrent_regularizer=None, diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RnnOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RnnOptionalArgs.cs new file mode 100644 index 00000000..64b500bb --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/RnnOptionalArgs.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Common.Types; + +namespace Tensorflow.Keras.ArgsDefinition.Rnn +{ + public class RnnOptionalArgs: IOptionalArgs + { + public string Identifier => "Rnn"; + public Tensor Mask { get; set; } = null; + public Tensors Constants { get; set; } = null; + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNCellArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNCellArgs.cs new file mode 100644 index 00000000..1dfcbe9c --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNCellArgs.cs @@ -0,0 +1,29 @@ +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition.Rnn +{ + public class SimpleRNNCellArgs: AutoSerializeLayerArgs + { + [JsonProperty("units")] + public int Units { get; set; } + // TODO(Rinne): lack of initialized value of Activation. Merging keras + // into tf.net could resolve it. + [JsonProperty("activation")] + public Activation Activation { get; set; } + [JsonProperty("use_bias")] + public bool UseBias { get; set; } = true; + [JsonProperty("dropout")] + public float Dropout { get; set; } = .0f; + [JsonProperty("recurrent_dropout")] + public float RecurrentDropout { get; set; } = .0f; + [JsonProperty("kernel_initializer")] + public IInitializer KernelInitializer { get; set; } + [JsonProperty("recurrent_initializer")] + public IInitializer RecurrentInitializer { get; set; } + [JsonProperty("bias_initializer")] + public IInitializer BiasInitializer { get; set; } + } +} diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs index f7669394..e94c8bf1 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/ILayer.cs @@ -1,4 +1,5 @@ -using Tensorflow.Keras.Engine; +using Tensorflow.Common.Types; +using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; using Tensorflow.NumPy; using Tensorflow.Training; @@ -14,7 +15,7 @@ namespace Tensorflow.Keras List Layers { get; } List InboundNodes { get; } List OutboundNodes { get; } - Tensors Apply(Tensors inputs, Tensor state = null, bool training = false); + Tensors Apply(Tensors inputs, Tensors states = null, bool training = false, IOptionalArgs? optional_args = null); List TrainableVariables { get; } List TrainableWeights { get; } List NonTrainableWeights { get; } diff --git a/src/TensorFlowNET.Core/Keras/Layers/Rnn/IRnnCell.cs b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IRnnCell.cs new file mode 100644 index 00000000..df6222cd --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IRnnCell.cs @@ -0,0 +1,19 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Common.Types; + +namespace Tensorflow.Keras.Layers.Rnn +{ + public interface IRnnCell: ILayer + { + GeneralizedTensorShape StateSize { get; } + GeneralizedTensorShape OutputSize { get; } + /// + /// Whether the optional RNN args are supported when appying the layer. + /// In other words, whether `Apply` is overwrited with process of `RnnOptionalArgs`. + /// + bool SupportOptionalArgs { get; } + (Tensor, Tensors) Call(Tensors inputs, Tensors states, bool? training = null); + } +} diff --git a/src/TensorFlowNET.Core/Keras/Layers/Rnn/IStackedRnnCells.cs b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IStackedRnnCells.cs new file mode 100644 index 00000000..e73244a5 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Layers/Rnn/IStackedRnnCells.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.Layers.Rnn +{ + public interface IStackedRnnCells : IRnnCell + { + int Count { get; } + IRnnCell this[int idx] { get; } + } +} diff --git a/src/TensorFlowNET.Core/Keras/Saving/Json/CustomizedKerasShapesWrapperJsonConverter.cs b/src/TensorFlowNET.Core/Keras/Saving/Json/CustomizedKerasShapesWrapperJsonConverter.cs index 1a4245bf..3a21db9d 100644 --- a/src/TensorFlowNET.Core/Keras/Saving/Json/CustomizedKerasShapesWrapperJsonConverter.cs +++ b/src/TensorFlowNET.Core/Keras/Saving/Json/CustomizedKerasShapesWrapperJsonConverter.cs @@ -3,6 +3,7 @@ using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Text; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Saving.Json { diff --git a/src/TensorFlowNET.Core/Keras/Saving/KerasShapesWrapper.cs b/src/TensorFlowNET.Core/Keras/Saving/KerasShapesWrapper.cs index d91d3161..ea6fe976 100644 --- a/src/TensorFlowNET.Core/Keras/Saving/KerasShapesWrapper.cs +++ b/src/TensorFlowNET.Core/Keras/Saving/KerasShapesWrapper.cs @@ -6,6 +6,7 @@ using System.Text; using System.Diagnostics; using OneOf.Types; using Tensorflow.Keras.Saving.Json; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Saving { diff --git a/src/TensorFlowNET.Core/NumPy/Axis.cs b/src/TensorFlowNET.Core/NumPy/Axis.cs index 976c764f..7a3ecbf1 100644 --- a/src/TensorFlowNET.Core/NumPy/Axis.cs +++ b/src/TensorFlowNET.Core/NumPy/Axis.cs @@ -74,8 +74,3 @@ namespace Tensorflow => IsScalar ? $"{axis[0]}" : $"({string.Join(", ", axis)})"; } } - -namespace System.Runtime.CompilerServices -{ - internal static class IsExternalInit { } -} diff --git a/src/TensorFlowNET.Core/Operations/Initializers/Orthogonal.cs b/src/TensorFlowNET.Core/Operations/Initializers/Orthogonal.cs index 492047c9..88673bb5 100644 --- a/src/TensorFlowNET.Core/Operations/Initializers/Orthogonal.cs +++ b/src/TensorFlowNET.Core/Operations/Initializers/Orthogonal.cs @@ -53,7 +53,7 @@ public class Orthogonal : IInitializer // Compute the qr factorization var (q, r) = tf.linalg.qr(a, full_matrices: false); // Make Q uniform - var d = tf.linalg.tensor_diag_part(r); + var d = tf.linalg.tensor_diag_part(r.Single); q *= tf.sign(d); if (num_rows < num_cols) diff --git a/src/TensorFlowNET.Core/Operations/NnOps/BasicLSTMCell.cs b/src/TensorFlowNET.Core/Operations/NnOps/BasicLSTMCell.cs index d3592514..b2cda952 100644 --- a/src/TensorFlowNET.Core/Operations/NnOps/BasicLSTMCell.cs +++ b/src/TensorFlowNET.Core/Operations/NnOps/BasicLSTMCell.cs @@ -11,6 +11,7 @@ namespace Tensorflow /// Basic LSTM recurrent network cell. /// The implementation is based on: http://arxiv.org/abs/1409.2329. /// + [Obsolete("This is an incompleted tf v1 api, pleas use keras RNNs instead.")] public class BasicLstmCell : LayerRnnCell { int _num_units; diff --git a/src/TensorFlowNET.Core/Operations/NnOps/BasicRNNCell.cs b/src/TensorFlowNET.Core/Operations/NnOps/BasicRNNCell.cs index 17d51363..3308aebb 100644 --- a/src/TensorFlowNET.Core/Operations/NnOps/BasicRNNCell.cs +++ b/src/TensorFlowNET.Core/Operations/NnOps/BasicRNNCell.cs @@ -20,6 +20,7 @@ using static Tensorflow.Binding; namespace Tensorflow { + [Obsolete("This is an incompleted tf v1 api, pleas use keras RNNs instead.")] public class BasicRnnCell : LayerRnnCell { int _num_units; diff --git a/src/TensorFlowNET.Core/Operations/NnOps/LayerRNNCell.cs b/src/TensorFlowNET.Core/Operations/NnOps/LayerRNNCell.cs index 7394cb7f..65de4fe9 100644 --- a/src/TensorFlowNET.Core/Operations/NnOps/LayerRNNCell.cs +++ b/src/TensorFlowNET.Core/Operations/NnOps/LayerRNNCell.cs @@ -19,6 +19,7 @@ using static Tensorflow.Binding; namespace Tensorflow { + [Obsolete("This is an incompleted tf v1 api, pleas use keras RNNs instead.")] public class LayerRnnCell : RnnCell { protected InputSpec inputSpec; diff --git a/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs b/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs index ecc9ca11..71fdc301 100644 --- a/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs +++ b/src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs @@ -16,10 +16,12 @@ using System; using System.Collections.Generic; +using Tensorflow.Common.Types; using Tensorflow.Keras; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.ArgsDefinition.Rnn; using Tensorflow.Keras.Engine; +using Tensorflow.Keras.Layers.Rnn; using Tensorflow.Keras.Saving; using Tensorflow.NumPy; using Tensorflow.Operations; @@ -50,7 +52,8 @@ namespace Tensorflow /// matching structure of Tensors having shape `[batch_size].concatenate(s)` /// for each `s` in `self.batch_size`. /// - public abstract class RnnCell : ILayer, RNNArgs.IRnnArgCell + [Obsolete("This is an incompleted tf v1 api, pleas use keras RNNs instead.")] + public abstract class RnnCell : ILayer, IRnnCell { /// /// Attribute that indicates whether the cell is a TF RNN cell, due the slight @@ -142,7 +145,7 @@ namespace Tensorflow throw new NotImplementedException("_zero_state_tensors"); } - public Tensors Apply(Tensors inputs, Tensor state = null, bool is_training = false) + public Tensors Apply(Tensors inputs, Tensors state = null, bool is_training = false, IOptionalArgs? optional_args = null) { throw new NotImplementedException(); } @@ -173,5 +176,13 @@ namespace Tensorflow { throw new NotImplementedException(); } + + public (Tensor, Tensors) Call(Tensors inputs, Tensors states, bool? training = null) + { + throw new NotImplementedException(); + } + public GeneralizedTensorShape StateSize => throw new NotImplementedException(); + public GeneralizedTensorShape OutputSize => throw new NotImplementedException(); + public bool SupportOptionalArgs => throw new NotImplementedException(); } } diff --git a/src/TensorFlowNET.Core/Operations/logging_ops.cs b/src/TensorFlowNET.Core/Operations/logging_ops.cs index e38e60b5..3303cadc 100644 --- a/src/TensorFlowNET.Core/Operations/logging_ops.cs +++ b/src/TensorFlowNET.Core/Operations/logging_ops.cs @@ -30,7 +30,7 @@ namespace Tensorflow name: name); return tf.Context.ExecuteOp("PrintV2", name, new ExecuteOpArgs(formatted_string) - .SetAttributes(new { output_stream, end })); + .SetAttributes(new { output_stream, end })).SingleOrNull; } } } diff --git a/src/TensorFlowNET.Core/Operations/sort_ops.cs b/src/TensorFlowNET.Core/Operations/sort_ops.cs index 34b90323..db38a073 100644 --- a/src/TensorFlowNET.Core/Operations/sort_ops.cs +++ b/src/TensorFlowNET.Core/Operations/sort_ops.cs @@ -44,7 +44,7 @@ namespace Tensorflow { sorted = true })); - return indices; + return indices.Single; } public static Tensor sort(Tensor values, Axis axis, string direction = "ASCENDING", string? name = null) diff --git a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj index 09f5b077..b08b2e2b 100644 --- a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj +++ b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj @@ -114,4 +114,9 @@ https://tensorflownet.readthedocs.io + + + + + diff --git a/src/TensorFlowNET.Core/Tensors/Tensors.cs b/src/TensorFlowNET.Core/Tensors/Tensors.cs index d063ee39..caa36b76 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensors.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensors.cs @@ -23,6 +23,38 @@ namespace Tensorflow public Graph graph => items.First().graph; public bool IsList { get; set; } public int Length => items.Count(); + /// + /// Return a Tensor if `Tensors` has only one tensor, otherwise throw an exception. + /// + public Tensor Single + { + get + { + if (Length != 1) + { + throw new ValueError("Tensors with more than one tensor cannot be " + + "implicitly converted to Tensor."); + } + return items.First(); + } + } + + /// + /// Return a Tensor if `Tensors` has only one tensor, and return null when `Tensors` is empty, + /// otherwise throw an exception. + /// + public Tensor? SingleOrNull + { + get + { + if (Length > 1) + { + throw new ValueError($"Tensors with {Length} tensor cannot be " + + "implicitly converted to Tensor."); + } + return items.FirstOrDefault(); + } + } public Tensor this[int index] { @@ -183,18 +215,18 @@ namespace Tensorflow public static implicit operator Tensors(List tensors) => new Tensors(tensors.ToArray()); - public static implicit operator Tensor(Tensors tensors) - => tensors.FirstOrDefault(); + public static implicit operator Tensor(Tensors? tensors) + => tensors?.SingleOrNull; public static implicit operator Tensor[](Tensors tensors) => tensors.items.ToArray(); #endregion - public void Deconstruct(out Tensor a, out Tensor b) + public void Deconstruct(out Tensor a, out Tensors? b) { a = items[0]; - b = items[1]; + b = Length == 1? null : new Tensors(items.Skip(1)); } private static void EnsureSingleTensor(Tensors tensors, string methodnName) diff --git a/src/TensorFlowNET.Core/Util/nest.py.cs b/src/TensorFlowNET.Core/Util/nest.py.cs index eb94f4d0..ab6f56b3 100644 --- a/src/TensorFlowNET.Core/Util/nest.py.cs +++ b/src/TensorFlowNET.Core/Util/nest.py.cs @@ -170,6 +170,39 @@ namespace Tensorflow.Util throw new TypeError("Type of sequence not supported (yet): " + instance.GetType()); } + public static bool is_nested(object obj) + { + // Refer to https://www.tensorflow.org/api_docs/python/tf/nest + //if (obj is IList || obj is IDictionary || obj is ITuple) + // return true; + if (obj is IList || obj is IDictionary) + return true; + + if (obj is NDArray || obj is Tensor || obj is string || obj.GetType().IsGenericType + || obj is ISet || obj is ISet || obj is ISet) + return false; + + if (obj.GetType().IsNested) return true; + // Check if the object is an IEnumerable + if (obj is IEnumerable) + { + // If it is, check if it is a nested structure + foreach (object item in (IEnumerable)obj) + { + if (is_nested(item)) + { + return true; + } + } + return true; + } + else + { + // If it is not, return false + return false; + } + } + /// /// Yields the next value from the given iterable. /// diff --git a/src/TensorFlowNET.Keras/BackendImpl.cs b/src/TensorFlowNET.Keras/BackendImpl.cs index 80403ad6..a7c1bcad 100644 --- a/src/TensorFlowNET.Keras/BackendImpl.cs +++ b/src/TensorFlowNET.Keras/BackendImpl.cs @@ -22,6 +22,7 @@ using Tensorflow.Functions; using Tensorflow.Graphs; using static Tensorflow.Binding; using static Tensorflow.Graphs.SubGraphUtility; +using Tensorflow.Util; namespace Tensorflow.Keras { @@ -450,5 +451,514 @@ namespace Tensorflow.Keras return x; } + + public static (Tensors, Tensors, Tensors) rnn( + Func step_function, // args:inputs, states, return:output, new_states + Tensors inputs, // inputs is a tuple of tensors (one per input sequence) + Tensors initial_states, + bool go_backwards = false, + Tensor? mask = null, + Tensors? constants = null, + bool unroll = false, + Tensors? input_length = null, // An integer or a 1-D Tensor,depending on whether the time dimension is fixed-length or not + bool time_major = false, + bool zero_output_for_mask = false, + bool return_all_outputs = true) + { + + Tensors swap_batch_timestep(Tensors input_t) + { + var axes = Enumerable.Range(0, input_t.rank).ToArray(); + axes[0] = 1; + axes[1] = 0; + return tf.transpose(input_t, axes); + } + + if (!time_major) + { + inputs = nest.map_structure(swap_batch_timestep, inputs); + } + + var flatted_inptus = nest.flatten(inputs); + var time_steps = flatted_inptus[0].shape[0]; + var batch = flatted_inptus[0].shape[1]; + var time_step_t = tf.shape(flatted_inptus[0])[0]; + + foreach (var input_ in flatted_inptus) + { + input_.shape.with_rank_at_least(3); + } + + if (mask != null) + { + if (mask.dtype != TF_DataType.TF_BOOL) + { + mask = tf.cast(mask, TF_DataType.TF_BOOL); + } + + if (mask.rank == 2) + { + mask = tf.expand_dims(mask, -1); + } + + if (!time_major) + { + mask = swap_batch_timestep(mask); + } + + } + + if (constants == null) + { + constants = new List(); + } + + // tf.where needs its condition tensor to be the same shape as its two + // result tensors, but in our case the condition (mask) tensor is + // (nsamples, 1), and inputs are (nsamples, ndimensions) or even more. + // So we need to broadcast the mask to match the shape of inputs. + // That's what the tile call does, it just repeats the mask along its + // second dimension n times. + + Tensors _expand_mask(Tensors mask_t, Tensors input_t, int fixed_dim = 1) + { + if (nest.is_nested(mask_t)) + { + throw new ValueError($"mask_t is expected to be tensor, but got {mask_t}"); + } + + if (nest.is_nested(input_t)) + { + throw new ValueError($"input_t is expected to be tensor, but got {input_t}"); + } + + var rank_diff = input_t.rank - mask_t.rank; + for (int i = 0; i < rank_diff; i++) + { + mask_t = tf.expand_dims(mask_t, -1); + } + var multiples = Enumerable.Repeat(1, fixed_dim).ToArray().concat(input_t.shape.as_int_list().ToList().GetRange(fixed_dim, input_t.rank)); + return tf.tile(mask_t, multiples); + } + + Tensors outputs = new Tensors(); + Tensors output_time_zero = new Tensors(); + Tensors last_output = new Tensors(); + Tensors new_states = new Tensors(); + if (unroll) + { + if (time_steps == 0) + { + throw new ValueError("Unrolling requires a fixed number of timesteps."); + } + + // Process the input tensors. The input tensor need to be split on the + // time_step dim, and reverse if go_backwards is True. In the case of + // nested input, the input is flattened and then transformed + // individually. The result of this will be a tuple of lists, each of + // the item in tuple is list of the tensor with shape (batch, feature) + + + // TODO(Wanglongzhi2001),step_func接受的第二个参数为List,但是最后却用的tuple + //var states = Tuple.Create(initial_states); + var states = initial_states; + + var successive_states = new Tensors(); + var successive_outputs = new Tensors(); + + // Process the input tensors. The input tensor need to be split on the + // time_step dim, and reverse if go_backwards is True. In the case of + // nested input, the input is flattened and then transformed + // individually. The result of this will be a tuple of lists, each of + // the item in tuple is list of the tensor with shape (batch, feature) + + + + + Tensors _process_single_input_t(Tensors input_t) + { + input_t = tf.unstack(input_t); // unstack for time_step dim + if (go_backwards) + { + input_t.Reverse(); + } + return input_t; + } + + // TODO(Wanglongzhi2001) + Tensors processed_input; + if (nest.is_nested(inputs)) + { + processed_input = nest.map_structure(_process_single_input_t, inputs); + } + else + { + processed_input = _process_single_input_t(inputs); + } + + object _get_input_tensor(int time) + { + List inp = new List(); + foreach (var t_ in processed_input) + { + inp.Add(t_[time]); + } + return nest.pack_sequence_as(inputs, inp); + } + + //if (mask != null) + //{ + // var mask_list = tf.unstack(mask); + // if (go_backwards) + // { + // mask_list.Reverse(); + // } + + // for (int i = 0; i < time_steps; i++) + // { + // // TODO(Wanglongzhi2001),deal with _get_input_tensor + // var inp = _get_input_tensor(i); + // var mask_t = mask_list[i]; + // // TODO + // var (output, newStates) = step_function((Tensors)inp, new Tensors { states, constants }); + + // var tiled_mask_t = _expand_mask(mask_t, output); + + // Tensors prev_output; + // if (successive_outputs == null) + // { + // prev_output = tf.zeros_like(output); + // } + // else + // { + // prev_output = successive_outputs[successive_outputs.Length - 1]; + // } + + // output = tf.where(tiled_mask_t, output, prev_output); + + // //var flat_states = nest.flatten(states); + // //var flat_new_states = nest.flatten(newStates); + // var flat_states = states.ToList(); + // var flat_new_states = newStates.ToList(); + + // var tiledMaskT = flat_states + // .Select(s => _expand_mask(mask_t, s)) + // .ToArray(); + // var tuple = Tuple.Create(tiledMaskT); + + // List flat_final_states = new List(); + // foreach (var (m, s, ps) in Enumerable.Zip(tiled_mask_t, flat_new_states, flat_states)) + // { + // flat_final_states.Add(tf.where(m, s, ps)); + // } + + // states = (Tensors)nest.pack_sequence_as(states, flat_final_states); + // if (return_all_outputs) + // { + // successive_outputs.Add(output); + // successive_states.Add(states); + // } + // else + // { + // successive_outputs = new Tensors { output }; + // successive_states = new Tensors { states }; + // } + + // } + // last_output = successive_outputs[successive_outputs.Length - 1]; + // new_states = successive_states[successive_states.Length - 1]; + // outputs = tf.stack(successive_outputs); + + // if (zero_output_for_mask) + // { + // last_output = tf.where(_expand_mask(mask_list[mask_list.Length - 1], last_output), last_output, tf.zeros_like(last_output)); + // outputs = tf.where(_expand_mask(mask, outputs, fixed_dim: 2), outputs, tf.zeros_like(outputs)); + // } + // else // mask is null + // { + // for (int i = 0; i < time_steps; i++) + // { + // var inp = _get_input_tensor(i); + // var (output, newStates) = step_function((Tensors)inp, new Tensors { states, constants }); + // states = newStates; + + // if (return_all_outputs) + // { + // successive_outputs.Add(output); + // successive_states.Add(newStates); + // } + // else + // { + // successive_outputs = new Tensors { output }; + // successive_states = new Tensors { newStates }; + // } + // } + // last_output = successive_outputs[successive_outputs.Length - 1]; + // new_states = successive_states[successive_states.Length - 1]; + // outputs = tf.stack(successive_outputs); + // } + //} + } + //else // unroll == false + //{ + // var states = initial_states; + // // Create input tensor array, if the inputs is nested tensors, then it + // // will be flattened first, and tensor array will be created one per + // // flattened tensor. + // var input_ta = new List(); + // for (int i = 0; i < flatted_inptus.Count; i++) + // { + // input_ta.Add(tf.TensorArray(dtype: flatted_inptus[i].dtype, size: time_step_t)); + // } + + // // Get the time(0) input and compute the output for that, the output will + // // be used to determine the dtype of output tensor array. Don't read from + // // input_ta due to TensorArray clear_after_read default to True. + // var inps = new Tensors(); + // foreach (var inp in flatted_inptus) + // { + // inps.Add(inp[0]); + // } + // var input_time_zero = nest.pack_sequence_as(inputs, inps); + + // // output_time_zero is used to determine the cell output shape and its + // // dtype. the value is discarded. + // (output_time_zero, _) = step_function((Tensor)input_time_zero, new Tensors { initial_states, constants }); + + // var output_ta_size = return_all_outputs ? time_step_t : tf.constant(1); + // var output_ta = new List(); + // for (int i = 0; i < output_time_zero.ToList().Count; i++) + // { + // var Out = output_time_zero.ToList()[i]; + // output_ta.Add(tf.TensorArray(dtype: Out.dtype, size: output_ta_size, element_shape: Out.shape)); + // } + + // var time = tf.constant(0, dtype: TF_DataType.TF_INT32, name: "time"); + + + + // Func? masking_fn; + // Func? compute_masked_output = null; + // if (mask != null) + // { + // if (go_backwards) + // { + // mask = tf.reverse(mask, axis: new[] { 0 }); + // } + // var mask_ta = tf.TensorArray(dtype: TF_DataType.TF_BOOL, size: time_step_t); + // mask_ta = mask_ta.unstack(mask); + + // masking_fn = (time) => + // { + // return mask_ta.read(time); + // }; + + // compute_masked_output = (mask_t, flat_out, flat_mask) => + // { + // var tiled_mask_t = new Tensors(); + // foreach (var o in flat_out) + // { + // tiled_mask_t.Add(_expand_mask(mask_t, o, fixed_dim: mask_t.rank)); + // } + + // Tensors res = new Tensors(); + // foreach (var (m, o, fm) in Enumerable.Zip(tiled_mask_t, flat_out, flat_mask)) + // { + // res.Add(tf.where(m, o, fm)); + // } + // return res; + // }; + // } + // // TODO(Wanglongzhi2001), what the input_length's type should be(an integer or a single tensor)? + // else if (input_length is Tensor) + // { + // if (go_backwards) + // { + // var max_len = tf.reduce_max(input_length, axis: 0); + // var rev_input_length = tf.subtract(max_len - 1, input_length); + + // masking_fn = (time) => + // { + // return tf.less(rev_input_length, time); + // }; + // } + // else + // { + // masking_fn = (time) => + // { + // return tf.greater(input_length, time); + // }; + // } + + // compute_masked_output = (mask_t, flat_out, flat_mask) => + // { + // var res = new List(); + // foreach (var (o, zo) in zip(flat_out, flat_mask)) + // { + // res.Add(tf.where(mask_t, o, zo)); + // } + // return res; + // }; + // } + // else + // { + // masking_fn = null; + // } + + + // if (masking_fn != null) + // { + // // Mask for the T output will be base on the output of T - 1. In the + // // case T = 0, a zero filled tensor will be used. + // var flat_zero_output = new Tensors(); + // foreach (var o in nest.flatten(output_time_zero)) + // { + // flat_zero_output.Add(tf.zeros_like(o)); + // } + + + // (Tensor, List, Tensors, Tensors) _step(Tensor time, List output_ta_t, Tensors prev_output, Tensors states) + // { + // /* + // RNN step function. + // Args: + // time: Current timestep value. + // output_ta_t: TensorArray. + // prev_output: tuple of outputs from time - 1. + // *states: List of states. + // Returns: + // Tuple(todo): `(time + 1, output_ta_t, output) + tuple(new_states)` + // */ + + // var current_input = input_ta.Select(x => x.read(time)).ToList(); + // // maybe set shape + // // TODO(Wanglongzhi2001),deal with nest.pack_sequence_as's return type + // current_input = (List)nest.pack_sequence_as(inputs, current_input); + // var mask_t = masking_fn(time); + // var (output, new_states) = step_function(current_input, new Tensors { states, constants }); + // // mask output + // //var flat_output = nest.flatten(output); + // var flat_output = output.ToList(); + + // var flat_mask_output = zero_output_for_mask ? flat_zero_output : prev_output.ToList(); + + // // TODO(Wanglongzhi2001),deal with compute_masked_output's third parameter's type + // var flat_new_output = compute_masked_output(mask_t, flat_output, flat_mask_output); + + // // mask states + // var flat_state = states.ToList(); + // var flat_new_state = new_states.ToList(); + + // foreach (var (state, new_state) in zip(flat_state, flat_new_state)) + // { + // if (new_state is Tensor) + // { + // new_state.set_shape(state.shape); + // } + // } + + // var flat_final_state = compute_masked_output(mask_t, flat_new_state, flat_state); + // new_states = (Tensors)nest.pack_sequence_as(new_states, flat_final_state); + + // var ta_index_to_write = return_all_outputs ? time : tf.constant(0); + // var Output_ta_t = new List(); + // // TODO(Wanglongzhi2001),deal with zip output_ta_t + // foreach (var (ta, Out) in zip(output_ta_t, flat_new_output)) + // { + // Output_ta_t.Add(ta.write(ta_index_to_write, Out)); + // } + + + + // //new_states = (Tensors)nest.pack_sequence_as(initial_states, flat_new_state); + + + // return (time + 1, Output_ta_t, flat_new_output, new_states); + + // } + // Func cond = (time) => (time < time_step_t); + + // var final_outputs = tf.while_loop(cond: cond, body: _step, loop_vars: (time, output_ta, flat_zero_output, states)); + // new_states = final_outputs.Item4; + // output_ta = final_outputs.Item2; + + // } + // else + // { + // (Tensor, List, Tensors) _step(Tensor time, List output_ta_t, Tensors states) + // { + // var current_input = input_ta.Select(x => x.read(time)).ToList(); + // // maybe set shape + // // TODO(Wanglongzhi2001),deal with nest.pack_sequence_as's return type + // current_input = (List)nest.pack_sequence_as(inputs, current_input); + // var (output, new_states) = step_function(current_input, new Tensors { states, constants }); + // var flat_state = states.ToList(); + // var flat_new_state = new_states.ToList(); + // foreach (var (state, new_state) in zip(flat_state, flat_new_state)) + // { + // if (new_state is Tensor) + // { + // new_state.set_shape(state.shape); + // } + // } + // var flat_output = output.ToList(); + // var ta_index_to_write = return_all_outputs ? time : tf.constant(0); + // var Output_ta_t = new List(); + // foreach (var (ta, out_) in zip(output_ta_t, flat_output)) + // { + // Output_ta_t.Add(ta.write(ta_index_to_write, out_)); + // } + + // new_states = (Tensors)nest.pack_sequence_as(initial_states, flat_new_state); + // return (time + 1, Output_ta_t, new_states); + // } + // Func cond = (time) => (time < time_step_t); + // var final_outputs = tf.while_loop(cond: cond, body: _step, loop_vars: (time, output_ta, states)); + // new_states = final_outputs.Item3; + // output_ta = final_outputs.Item2; + + // } + // //Tensors outputs = new Tensors(); + // foreach (var o in output_ta) + // { + // outputs.Add(o.stack()); + // } + // foreach (var o in outputs) + // { + // last_output.Add(o[-1]); + // } + // outputs = (Tensors)nest.pack_sequence_as(output_time_zero, outputs); + // last_output = (Tensors)nest.pack_sequence_as(output_time_zero, last_output); + + //} + + Func set_shape; + set_shape = (output_) => + { + if (output_ is Tensor) + { + var shape = output_.shape.as_int_list(); + if (return_all_outputs) + { + shape[0] = (int)time_steps; + } + else + { + shape[0] = 1; + } + shape[1] = (int)batch; + output_.set_shape(new Tensor(shape)); + } + return output_; + }; + + var Outputs = (Tensors)nest.map_structure(set_shape, outputs); + if (!time_major) + { + Outputs = nest.map_structure(swap_batch_timestep, outputs); + } + return (last_output, Outputs, new_states); + + } } } diff --git a/src/TensorFlowNET.Keras/Engine/Functional.cs b/src/TensorFlowNET.Keras/Engine/Functional.cs index e768bd0b..7347585f 100644 --- a/src/TensorFlowNET.Keras/Engine/Functional.cs +++ b/src/TensorFlowNET.Keras/Engine/Functional.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Linq; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Saving.SavedModel; using Tensorflow.Keras.Utils; @@ -81,7 +82,7 @@ namespace Tensorflow.Keras.Engine } else { - _buildInputShape = new Saving.TensorShapeConfig(); + _buildInputShape = new TensorShapeConfig(); } if (outputs.Any(x => x.KerasHistory == null)) @@ -325,7 +326,7 @@ namespace Tensorflow.Keras.Engine nodes_in_decreasing_depth.append(node); } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { var tensor_dict = new Dictionary>(); // map input values diff --git a/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs b/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs index c0430458..a0358f07 100644 --- a/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs +++ b/src/TensorFlowNET.Keras/Engine/Layer.Apply.cs @@ -1,4 +1,5 @@ using System.Threading; +using Tensorflow.Common.Types; using static Tensorflow.Binding; namespace Tensorflow.Keras.Engine @@ -8,11 +9,11 @@ namespace Tensorflow.Keras.Engine /// /// Wraps `call`, applying pre- and post-processing steps. /// - /// + /// /// /// /// - public Tensors Apply(Tensors inputs, Tensor state = null, bool training = false) + public virtual Tensors Apply(Tensors inputs, Tensors states = null, bool training = false, IOptionalArgs? optional_args = null) { if (callContext.Value == null) callContext.Value = new CallContext(); @@ -30,7 +31,7 @@ namespace Tensorflow.Keras.Engine if (!built) MaybeBuild(inputs); - var outputs = Call(inputs, state: state, training: training); + var outputs = Call(inputs, state: states, training: training); // memory leak // _set_connectivity_metadata_(inputs, outputs); diff --git a/src/TensorFlowNET.Keras/Engine/Layer.cs b/src/TensorFlowNET.Keras/Engine/Layer.cs index 5942efd9..2f758a85 100644 --- a/src/TensorFlowNET.Keras/Engine/Layer.cs +++ b/src/TensorFlowNET.Keras/Engine/Layer.cs @@ -32,7 +32,7 @@ using Tensorflow.Util; using static Tensorflow.Binding; using Tensorflow.Framework; using Tensorflow.Sessions; - +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Engine { @@ -332,7 +332,7 @@ namespace Tensorflow.Keras.Engine /// /// /// - protected virtual Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected virtual Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { if(ReplacedCall is not null) { diff --git a/src/TensorFlowNET.Keras/Engine/Model.cs b/src/TensorFlowNET.Keras/Engine/Model.cs index 83702b23..7b35d547 100644 --- a/src/TensorFlowNET.Keras/Engine/Model.cs +++ b/src/TensorFlowNET.Keras/Engine/Model.cs @@ -1,8 +1,8 @@ using System.Diagnostics; +using Tensorflow.Common.Types; using Tensorflow.Framework.Models; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Losses; -using Tensorflow.Keras.Saving; using Tensorflow.Keras.Saving.SavedModel; using Tensorflow.Keras.Utils; using Tensorflow.Train; diff --git a/src/TensorFlowNET.Keras/Engine/Sequential.cs b/src/TensorFlowNET.Keras/Engine/Sequential.cs index 27874751..6a468ad2 100644 --- a/src/TensorFlowNET.Keras/Engine/Sequential.cs +++ b/src/TensorFlowNET.Keras/Engine/Sequential.cs @@ -21,6 +21,7 @@ using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Layers; using Tensorflow.Keras.Utils; using static Tensorflow.KerasApi; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Engine { @@ -143,7 +144,7 @@ namespace Tensorflow.Keras.Engine } } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { if (!_has_explicit_input_shape) { diff --git a/src/TensorFlowNET.Keras/Layers/Activation/ELU.cs b/src/TensorFlowNET.Keras/Layers/Activation/ELU.cs index 739c0d56..23f36c86 100644 --- a/src/TensorFlowNET.Keras/Layers/Activation/ELU.cs +++ b/src/TensorFlowNET.Keras/Layers/Activation/ELU.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Text; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; @@ -29,7 +30,7 @@ namespace Tensorflow.Keras.Layers { base.build(input_shape); } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor output = inputs; output = tf.where(output > 0f, output, diff --git a/src/TensorFlowNET.Keras/Layers/Activation/Exponential.cs b/src/TensorFlowNET.Keras/Layers/Activation/Exponential.cs index 17636302..81fefb31 100644 --- a/src/TensorFlowNET.Keras/Layers/Activation/Exponential.cs +++ b/src/TensorFlowNET.Keras/Layers/Activation/Exponential.cs @@ -4,7 +4,7 @@ using System.Text; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; -using static Tensorflow.Binding; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { public class Exponential : Layer @@ -17,7 +17,7 @@ namespace Tensorflow.Keras.Layers { { base.build(input_shape); } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor output = inputs; return tf.exp(output); diff --git a/src/TensorFlowNET.Keras/Layers/Activation/HardSigmoid.cs b/src/TensorFlowNET.Keras/Layers/Activation/HardSigmoid.cs index b498d1b9..e0f91380 100644 --- a/src/TensorFlowNET.Keras/Layers/Activation/HardSigmoid.cs +++ b/src/TensorFlowNET.Keras/Layers/Activation/HardSigmoid.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Text; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; +using Tensorflow.Common.Types; using static Tensorflow.Binding; namespace Tensorflow.Keras.Layers { @@ -10,7 +11,7 @@ namespace Tensorflow.Keras.Layers { public HardSigmoid ( LayerArgs args ) : base(args) { // hard sigmoid has no arguments } - protected override Tensors Call ( Tensors inputs, Tensor state = null, bool? training = null ) { + protected override Tensors Call ( Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null ) { Tensor x = inputs; return tf.clip_by_value( tf.add(tf.multiply(x, 0.2f), 0.5f), 0f, 1f); diff --git a/src/TensorFlowNET.Keras/Layers/Activation/LeakyReLu.cs b/src/TensorFlowNET.Keras/Layers/Activation/LeakyReLu.cs index 1fbbf4ea..cfbd0186 100644 --- a/src/TensorFlowNET.Keras/Layers/Activation/LeakyReLu.cs +++ b/src/TensorFlowNET.Keras/Layers/Activation/LeakyReLu.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Text; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; +using Tensorflow.Common.Types; using static Tensorflow.Binding; namespace Tensorflow.Keras.Layers @@ -19,7 +20,7 @@ namespace Tensorflow.Keras.Layers this.args = args; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { return tf.nn.leaky_relu(inputs, alpha: alpha); } diff --git a/src/TensorFlowNET.Keras/Layers/Activation/SELU.cs b/src/TensorFlowNET.Keras/Layers/Activation/SELU.cs index 53101fbb..2e943d5f 100644 --- a/src/TensorFlowNET.Keras/Layers/Activation/SELU.cs +++ b/src/TensorFlowNET.Keras/Layers/Activation/SELU.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Text; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; @@ -22,7 +23,7 @@ namespace Tensorflow.Keras.Layers { } base.build(input_shape); } - protected override Tensors Call ( Tensors inputs, Tensor state = null, bool? training = null ) { + protected override Tensors Call ( Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor output = inputs; return tf.where(output > 0f, tf.multiply(scale, output), diff --git a/src/TensorFlowNET.Keras/Layers/Activation/Softmax.cs b/src/TensorFlowNET.Keras/Layers/Activation/Softmax.cs index 3ffae27f..d018128d 100644 --- a/src/TensorFlowNET.Keras/Layers/Activation/Softmax.cs +++ b/src/TensorFlowNET.Keras/Layers/Activation/Softmax.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Text; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using static Tensorflow.Binding; @@ -11,8 +12,8 @@ namespace Tensorflow.Keras.Layers { public Softmax ( SoftmaxArgs args ) : base(args) { axis = args.axis; } - protected override Tensors Call ( Tensors inputs, Tensor state = null, bool? training = null ) { - Tensor x = inputs.Length == 2 ? inputs + ((1.0 - tf.cast(inputs[1], inputs.dtype)) * 1e-9) + protected override Tensors Call ( Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { + Tensor x = inputs.Length == 2 ? inputs[0] + ((1.0 - tf.cast(inputs[1], inputs.dtype)) * 1e-9) : inputs; Tensor e = tf.exp(tf.sub(x, tf.reduce_max(x, axis: this.axis, keepdims: true))); Tensor s = tf.reduce_sum(e, axis: this.axis, keepdims: true); diff --git a/src/TensorFlowNET.Keras/Layers/Activation/Softplus.cs b/src/TensorFlowNET.Keras/Layers/Activation/Softplus.cs index e82b0198..1e6c59b4 100644 --- a/src/TensorFlowNET.Keras/Layers/Activation/Softplus.cs +++ b/src/TensorFlowNET.Keras/Layers/Activation/Softplus.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Text; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using static Tensorflow.Binding; @@ -10,7 +11,7 @@ namespace Tensorflow.Keras.Layers { public Softplus ( LayerArgs args ) : base(args) { // Softplus has no arguments } - protected override Tensors Call ( Tensors inputs, Tensor state = null, bool? training = null ) { + protected override Tensors Call ( Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor x = inputs; return tf.log( tf.add(tf.exp(x), 1f)); diff --git a/src/TensorFlowNET.Keras/Layers/Activation/Softsign.cs b/src/TensorFlowNET.Keras/Layers/Activation/Softsign.cs index 59329fd4..5ad33e99 100644 --- a/src/TensorFlowNET.Keras/Layers/Activation/Softsign.cs +++ b/src/TensorFlowNET.Keras/Layers/Activation/Softsign.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Text; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using static Tensorflow.Binding; @@ -10,7 +11,7 @@ namespace Tensorflow.Keras.Layers { public Softsign ( LayerArgs args ) : base(args) { // Softsign has no arguments } - protected override Tensors Call ( Tensors inputs, Tensor state = null, bool? training = null ) { + protected override Tensors Call ( Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor x = inputs; // x / (abs(x) + 1) return tf.div(x, tf.add(1f, tf.abs(x))); diff --git a/src/TensorFlowNET.Keras/Layers/Activation/Swish.cs b/src/TensorFlowNET.Keras/Layers/Activation/Swish.cs index 1dcb92b3..ed0d105a 100644 --- a/src/TensorFlowNET.Keras/Layers/Activation/Swish.cs +++ b/src/TensorFlowNET.Keras/Layers/Activation/Swish.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Text; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using static Tensorflow.Binding; @@ -10,7 +11,7 @@ namespace Tensorflow.Keras.Layers { public Swish ( LayerArgs args ) : base(args) { // Swish has no arguments } - protected override Tensors Call ( Tensors inputs, Tensor state = null, bool? training = null ) { + protected override Tensors Call ( Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor x = inputs; // x / (1 + exp(-x)) diff --git a/src/TensorFlowNET.Keras/Layers/Activation/Tanh.cs b/src/TensorFlowNET.Keras/Layers/Activation/Tanh.cs index 99b80394..7e90cf9d 100644 --- a/src/TensorFlowNET.Keras/Layers/Activation/Tanh.cs +++ b/src/TensorFlowNET.Keras/Layers/Activation/Tanh.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Text; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; +using Tensorflow.Common.Types; using static Tensorflow.Binding; namespace Tensorflow.Keras.Layers @@ -13,7 +14,7 @@ namespace Tensorflow.Keras.Layers { // Tanh has no arguments } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor x = inputs; diff --git a/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs b/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs index 1348e19c..19b29272 100644 --- a/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs +++ b/src/TensorFlowNET.Keras/Layers/Attention/BaseDenseAttention.cs @@ -6,6 +6,7 @@ using System; using System.Collections.Generic; using System.Linq; using Tensorflow.Keras.Saving; +using Tensorflow.Common.Types; /// /// Base class for attention layers that can be used in sequence DNN/CNN models. @@ -114,7 +115,7 @@ namespace Tensorflow.Keras.Layers return (tf.linalg.einsum("bij,bjk->bik", (weights, value)), weights); } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensors _inp; Tensors _mask = null; diff --git a/src/TensorFlowNET.Keras/Layers/Attention/MultiHeadAttention.cs b/src/TensorFlowNET.Keras/Layers/Attention/MultiHeadAttention.cs index 701724d5..75dd4a41 100644 --- a/src/TensorFlowNET.Keras/Layers/Attention/MultiHeadAttention.cs +++ b/src/TensorFlowNET.Keras/Layers/Attention/MultiHeadAttention.cs @@ -6,6 +6,7 @@ using static Tensorflow.Binding; using static Tensorflow.KerasApi; using System; using System.Linq; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -252,7 +253,7 @@ namespace Tensorflow.Keras.Layers return (attention_output, attention_scores); } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensors _inp; Tensor _mask = null; @@ -349,7 +350,7 @@ namespace Tensorflow.Keras.Layers //} if (return_attention_scores) - return (attention_output, attention_scores); + return (attention_output, attention_scores.Single); return attention_output; } } diff --git a/src/TensorFlowNET.Keras/Layers/Convolution/Conv2DTranspose.cs b/src/TensorFlowNET.Keras/Layers/Convolution/Conv2DTranspose.cs index bbd49acd..94ad7914 100644 --- a/src/TensorFlowNET.Keras/Layers/Convolution/Conv2DTranspose.cs +++ b/src/TensorFlowNET.Keras/Layers/Convolution/Conv2DTranspose.cs @@ -20,6 +20,7 @@ using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Utils; using static Tensorflow.KerasApi; using Tensorflow.Keras.Saving; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -83,7 +84,7 @@ namespace Tensorflow.Keras.Layers _buildInputShape = input_shape; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { var inputs_shape = array_ops.shape(inputs); var batch_size = inputs_shape[0]; diff --git a/src/TensorFlowNET.Keras/Layers/Convolution/Convolutional.cs b/src/TensorFlowNET.Keras/Layers/Convolution/Convolutional.cs index c575362c..d8e00d52 100644 --- a/src/TensorFlowNET.Keras/Layers/Convolution/Convolutional.cs +++ b/src/TensorFlowNET.Keras/Layers/Convolution/Convolutional.cs @@ -17,6 +17,7 @@ using System; using System.Collections.Generic; using System.Linq; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; @@ -103,7 +104,7 @@ namespace Tensorflow.Keras.Layers _buildInputShape = input_shape; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = false) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = false, IOptionalArgs? optional_args = null) { var outputs = _convolution_op.Apply(inputs, kernel.AsTensor()); if (use_bias) diff --git a/src/TensorFlowNET.Keras/Layers/Core/Dense.cs b/src/TensorFlowNET.Keras/Layers/Core/Dense.cs index aa6617dd..db5d626e 100644 --- a/src/TensorFlowNET.Keras/Layers/Core/Dense.cs +++ b/src/TensorFlowNET.Keras/Layers/Core/Dense.cs @@ -18,6 +18,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; @@ -69,7 +70,7 @@ namespace Tensorflow.Keras.Layers built = true; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor outputs = null; var rank = inputs.rank; diff --git a/src/TensorFlowNET.Keras/Layers/Core/EinsumDense.cs b/src/TensorFlowNET.Keras/Layers/Core/EinsumDense.cs index fb604f77..0cbd5084 100644 --- a/src/TensorFlowNET.Keras/Layers/Core/EinsumDense.cs +++ b/src/TensorFlowNET.Keras/Layers/Core/EinsumDense.cs @@ -7,6 +7,7 @@ using System.Text.RegularExpressions; using Tensorflow.Keras.Engine; using Tensorflow.Keras.ArgsDefinition.Core; using Tensorflow.Keras.Saving; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -189,7 +190,7 @@ namespace Tensorflow.Keras.Layers // return new dict(base_config.items().ToList() + config.items().ToList()); //} - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { var ret = tf.linalg.einsum(this.equation, (inputs, this.kernel.AsTensor())); if (this.bias != null) diff --git a/src/TensorFlowNET.Keras/Layers/Core/Embedding.cs b/src/TensorFlowNET.Keras/Layers/Core/Embedding.cs index 9487a7d0..87b42bb7 100644 --- a/src/TensorFlowNET.Keras/Layers/Core/Embedding.cs +++ b/src/TensorFlowNET.Keras/Layers/Core/Embedding.cs @@ -15,6 +15,7 @@ ******************************************************************************/ using System.Linq; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; @@ -66,7 +67,7 @@ namespace Tensorflow.Keras.Layers _buildInputShape = input_shape; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { var dtype = inputs.dtype; if (dtype != tf.int32 && dtype != tf.int64) diff --git a/src/TensorFlowNET.Keras/Layers/Merging/Merge.cs b/src/TensorFlowNET.Keras/Layers/Merging/Merge.cs index 7df654ee..bcbb20d8 100644 --- a/src/TensorFlowNET.Keras/Layers/Merging/Merge.cs +++ b/src/TensorFlowNET.Keras/Layers/Merging/Merge.cs @@ -5,6 +5,7 @@ using static Tensorflow.Binding; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -21,7 +22,7 @@ namespace Tensorflow.Keras.Layers _buildInputShape = input_shape; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { return _merge_function(inputs); } diff --git a/src/TensorFlowNET.Keras/Layers/Normalization/BatchNormalization.cs b/src/TensorFlowNET.Keras/Layers/Normalization/BatchNormalization.cs index d02d2509..65558157 100644 --- a/src/TensorFlowNET.Keras/Layers/Normalization/BatchNormalization.cs +++ b/src/TensorFlowNET.Keras/Layers/Normalization/BatchNormalization.cs @@ -17,6 +17,7 @@ using System; using System.Collections.Generic; using System.Linq; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; @@ -146,7 +147,7 @@ namespace Tensorflow.Keras.Layers return false; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor outputs = null; var training_tensor = training == null diff --git a/src/TensorFlowNET.Keras/Layers/Normalization/LayerNormalization.cs b/src/TensorFlowNET.Keras/Layers/Normalization/LayerNormalization.cs index e90c0402..1898f24c 100644 --- a/src/TensorFlowNET.Keras/Layers/Normalization/LayerNormalization.cs +++ b/src/TensorFlowNET.Keras/Layers/Normalization/LayerNormalization.cs @@ -17,6 +17,7 @@ using System; using System.Collections.Generic; using System.Linq; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; @@ -101,7 +102,7 @@ namespace Tensorflow.Keras.Layers return input_shape; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor outputs = null; var inputs_dtype = inputs.dtype.as_base_dtype(); diff --git a/src/TensorFlowNET.Keras/Layers/Normalization/Normalization.cs b/src/TensorFlowNET.Keras/Layers/Normalization/Normalization.cs index a65154bf..987b56bc 100644 --- a/src/TensorFlowNET.Keras/Layers/Normalization/Normalization.cs +++ b/src/TensorFlowNET.Keras/Layers/Normalization/Normalization.cs @@ -14,6 +14,7 @@ limitations under the License. ******************************************************************************/ +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Saving; @@ -157,7 +158,7 @@ namespace Tensorflow.Keras.Layers base.adapt(data, batch_size: batch_size, steps: steps); } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { if (_args.Invert) { diff --git a/src/TensorFlowNET.Keras/Layers/Pooling/GlobalAveragePooling1D.cs b/src/TensorFlowNET.Keras/Layers/Pooling/GlobalAveragePooling1D.cs index d62fb63a..ffaabec9 100644 --- a/src/TensorFlowNET.Keras/Layers/Pooling/GlobalAveragePooling1D.cs +++ b/src/TensorFlowNET.Keras/Layers/Pooling/GlobalAveragePooling1D.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Text; using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -12,7 +13,7 @@ namespace Tensorflow.Keras.Layers { } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { if (data_format == "channels_last") return math_ops.reduce_mean(inputs, 1, false); diff --git a/src/TensorFlowNET.Keras/Layers/Pooling/GlobalAveragePooling2D.cs b/src/TensorFlowNET.Keras/Layers/Pooling/GlobalAveragePooling2D.cs index 000e4b8b..e0666517 100644 --- a/src/TensorFlowNET.Keras/Layers/Pooling/GlobalAveragePooling2D.cs +++ b/src/TensorFlowNET.Keras/Layers/Pooling/GlobalAveragePooling2D.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Text; using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -12,7 +13,7 @@ namespace Tensorflow.Keras.Layers { } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { if (data_format == "channels_last") return math_ops.reduce_mean(inputs, (1, 2), false); diff --git a/src/TensorFlowNET.Keras/Layers/Pooling/GlobalMaxPooling1D.cs b/src/TensorFlowNET.Keras/Layers/Pooling/GlobalMaxPooling1D.cs index 2de4671c..15695e8a 100644 --- a/src/TensorFlowNET.Keras/Layers/Pooling/GlobalMaxPooling1D.cs +++ b/src/TensorFlowNET.Keras/Layers/Pooling/GlobalMaxPooling1D.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Text; using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -12,7 +13,7 @@ namespace Tensorflow.Keras.Layers { } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { if (data_format == "channels_last") return math_ops.reduce_max(inputs, 1, false); diff --git a/src/TensorFlowNET.Keras/Layers/Pooling/GlobalMaxPooling2D.cs b/src/TensorFlowNET.Keras/Layers/Pooling/GlobalMaxPooling2D.cs index b7e2c945..76db858d 100644 --- a/src/TensorFlowNET.Keras/Layers/Pooling/GlobalMaxPooling2D.cs +++ b/src/TensorFlowNET.Keras/Layers/Pooling/GlobalMaxPooling2D.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Text; using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -12,7 +13,7 @@ namespace Tensorflow.Keras.Layers { } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { if (data_format == "channels_last") return math_ops.reduce_max(inputs, (1, 2), false); diff --git a/src/TensorFlowNET.Keras/Layers/Pooling/Pooling1D.cs b/src/TensorFlowNET.Keras/Layers/Pooling/Pooling1D.cs index a2f4c51b..81a34019 100644 --- a/src/TensorFlowNET.Keras/Layers/Pooling/Pooling1D.cs +++ b/src/TensorFlowNET.Keras/Layers/Pooling/Pooling1D.cs @@ -18,6 +18,7 @@ using System.Linq; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Utils; +using Tensorflow.Common.Types; using static Tensorflow.Binding; namespace Tensorflow.Keras.Layers @@ -36,7 +37,7 @@ namespace Tensorflow.Keras.Layers input_spec = new InputSpec(ndim: 3); } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { int pad_axis = args.DataFormat == "channels_first" ? 2 : 3; inputs = tf.expand_dims(inputs, pad_axis); diff --git a/src/TensorFlowNET.Keras/Layers/Pooling/Pooling2D.cs b/src/TensorFlowNET.Keras/Layers/Pooling/Pooling2D.cs index 27032255..f83f1e15 100644 --- a/src/TensorFlowNET.Keras/Layers/Pooling/Pooling2D.cs +++ b/src/TensorFlowNET.Keras/Layers/Pooling/Pooling2D.cs @@ -17,6 +17,7 @@ using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Utils; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -36,7 +37,7 @@ namespace Tensorflow.Keras.Layers input_spec = new InputSpec(ndim: 4); } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { int[] pool_shape; int[] strides; diff --git a/src/TensorFlowNET.Keras/Layers/Preprocessing/CategoryEncoding.cs b/src/TensorFlowNET.Keras/Layers/Preprocessing/CategoryEncoding.cs index 5620a916..20d2a53d 100644 --- a/src/TensorFlowNET.Keras/Layers/Preprocessing/CategoryEncoding.cs +++ b/src/TensorFlowNET.Keras/Layers/Preprocessing/CategoryEncoding.cs @@ -1,6 +1,6 @@ using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; - +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { /// @@ -15,7 +15,7 @@ namespace Tensorflow.Keras.Layers this.args = args; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { var depth = args.NumTokens; var max_value = tf.reduce_max(inputs); diff --git a/src/TensorFlowNET.Keras/Layers/Preprocessing/Rescaling.cs b/src/TensorFlowNET.Keras/Layers/Preprocessing/Rescaling.cs index 5fc581af..7fa367ee 100644 --- a/src/TensorFlowNET.Keras/Layers/Preprocessing/Rescaling.cs +++ b/src/TensorFlowNET.Keras/Layers/Preprocessing/Rescaling.cs @@ -1,5 +1,6 @@ using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -17,7 +18,7 @@ namespace Tensorflow.Keras.Layers this.args = args; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { scale = constant_op.constant(args.Scale, args.DType); offset = constant_op.constant(args.Offset, args.DType); diff --git a/src/TensorFlowNET.Keras/Layers/Preprocessing/Resizing.cs b/src/TensorFlowNET.Keras/Layers/Preprocessing/Resizing.cs index 603e2b07..081966ad 100644 --- a/src/TensorFlowNET.Keras/Layers/Preprocessing/Resizing.cs +++ b/src/TensorFlowNET.Keras/Layers/Preprocessing/Resizing.cs @@ -4,6 +4,7 @@ using System; using System.Text; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Saving; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -19,7 +20,7 @@ namespace Tensorflow.Keras.Layers this.args = args; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { return image_ops_impl.resize_images_v2(inputs, new[] { args.Height, args.Width }, method: args.Interpolation); } diff --git a/src/TensorFlowNET.Keras/Layers/Regularization/Dropout.cs b/src/TensorFlowNET.Keras/Layers/Regularization/Dropout.cs index aa3a92a4..ada1851c 100644 --- a/src/TensorFlowNET.Keras/Layers/Regularization/Dropout.cs +++ b/src/TensorFlowNET.Keras/Layers/Regularization/Dropout.cs @@ -1,4 +1,5 @@ -using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Common.Types; +using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Utils; using static Tensorflow.Binding; @@ -15,7 +16,7 @@ namespace Tensorflow.Keras.Layers this.args = args; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { if (training == null) training = false; diff --git a/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs b/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs index 9ead15cb..31285438 100644 --- a/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs +++ b/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping1D.cs @@ -1,6 +1,8 @@ using Tensorflow.Keras.ArgsDefinition.Reshaping; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; +using Tensorflow.Common.Types; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers.Reshaping { @@ -27,7 +29,7 @@ namespace Tensorflow.Keras.Layers.Reshaping _buildInputShape = input_shape; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor output = inputs; if (output.rank != 3) diff --git a/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping2D.cs b/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping2D.cs index 087d59a1..4a5c6eab 100644 --- a/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping2D.cs +++ b/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping2D.cs @@ -1,6 +1,7 @@ using Tensorflow.Keras.ArgsDefinition.Reshaping; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers.Reshaping { @@ -21,7 +22,7 @@ namespace Tensorflow.Keras.Layers.Reshaping built = true; _buildInputShape = input_shape; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor output = inputs; if (output.rank != 4) diff --git a/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping3D.cs b/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping3D.cs index 04a1af60..83f86c6f 100644 --- a/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping3D.cs +++ b/src/TensorFlowNET.Keras/Layers/Reshaping/Cropping3D.cs @@ -1,6 +1,7 @@ using Tensorflow.Keras.ArgsDefinition.Reshaping; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers.Reshaping { @@ -21,7 +22,7 @@ namespace Tensorflow.Keras.Layers.Reshaping _buildInputShape = input_shape; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor output = inputs; if (output.rank != 5) diff --git a/src/TensorFlowNET.Keras/Layers/Reshaping/Flatten.cs b/src/TensorFlowNET.Keras/Layers/Reshaping/Flatten.cs index 539b5f62..a6192849 100644 --- a/src/TensorFlowNET.Keras/Layers/Reshaping/Flatten.cs +++ b/src/TensorFlowNET.Keras/Layers/Reshaping/Flatten.cs @@ -1,5 +1,6 @@ using System; using System.Linq; +using Tensorflow.Common.Types; using Tensorflow.Framework; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; @@ -23,7 +24,7 @@ namespace Tensorflow.Keras.Layers _channels_first = args.DataFormat == "channels_first"; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { if (_channels_first) { diff --git a/src/TensorFlowNET.Keras/Layers/Reshaping/Permute.cs b/src/TensorFlowNET.Keras/Layers/Reshaping/Permute.cs index e391775c..7fdb816b 100644 --- a/src/TensorFlowNET.Keras/Layers/Reshaping/Permute.cs +++ b/src/TensorFlowNET.Keras/Layers/Reshaping/Permute.cs @@ -6,6 +6,7 @@ using Tensorflow.Keras.Utils; using static Tensorflow.Binding; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Saving; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { public class Permute : Layer @@ -28,7 +29,7 @@ namespace Tensorflow.Keras.Layers { built = true; _buildInputShape = input_shape; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { Tensor outputs = inputs; return tf.transpose(outputs, new Axis(permute)); diff --git a/src/TensorFlowNET.Keras/Layers/Reshaping/Reshape.cs b/src/TensorFlowNET.Keras/Layers/Reshaping/Reshape.cs index 92a772f3..4b3d30e2 100644 --- a/src/TensorFlowNET.Keras/Layers/Reshaping/Reshape.cs +++ b/src/TensorFlowNET.Keras/Layers/Reshaping/Reshape.cs @@ -4,6 +4,7 @@ using static Tensorflow.Binding; using System.Collections.Generic; using System; using System.Linq; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -19,7 +20,7 @@ namespace Tensorflow.Keras.Layers this.args = args; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { var shapes = new List(); shapes.Add(array_ops.shape(inputs)[0]); diff --git a/src/TensorFlowNET.Keras/Layers/Reshaping/UpSampling2D.cs b/src/TensorFlowNET.Keras/Layers/Reshaping/UpSampling2D.cs index 8314151f..223f33d4 100644 --- a/src/TensorFlowNET.Keras/Layers/Reshaping/UpSampling2D.cs +++ b/src/TensorFlowNET.Keras/Layers/Reshaping/UpSampling2D.cs @@ -6,6 +6,7 @@ using Tensorflow.Keras.Engine; using Tensorflow.Keras.Utils; using static Tensorflow.Binding; using static Tensorflow.KerasApi; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -24,7 +25,7 @@ namespace Tensorflow.Keras.Layers inputSpec = new InputSpec(ndim: 4); } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { return keras.backend.resize_images(inputs, size[0], size[1], diff --git a/src/TensorFlowNET.Keras/Layers/Reshaping/ZeroPadding2D.cs b/src/TensorFlowNET.Keras/Layers/Reshaping/ZeroPadding2D.cs index 7c87100a..3b37dac4 100644 --- a/src/TensorFlowNET.Keras/Layers/Reshaping/ZeroPadding2D.cs +++ b/src/TensorFlowNET.Keras/Layers/Reshaping/ZeroPadding2D.cs @@ -2,6 +2,7 @@ using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Utils; +using Tensorflow.Common.Types; using static Tensorflow.KerasApi; namespace Tensorflow.Keras.Layers @@ -26,7 +27,7 @@ namespace Tensorflow.Keras.Layers this.input_spec = new InputSpec(ndim: 4); } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { return keras.backend.spatial_2d_padding(inputs, padding: padding, diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/DropoutRNNCellMixin.cs b/src/TensorFlowNET.Keras/Layers/Rnn/DropoutRNNCellMixin.cs new file mode 100644 index 00000000..21396853 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Rnn/DropoutRNNCellMixin.cs @@ -0,0 +1,85 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Common.Types; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Keras.Engine; + +namespace Tensorflow.Keras.Layers.Rnn +{ + public abstract class DropoutRNNCellMixin: RnnCellBase + { + public float dropout; + public float recurrent_dropout; + // TODO(Rinne): deal with cache. + public DropoutRNNCellMixin(LayerArgs args): base(args) + { + + } + + public Tensors? get_dropout_maskcell_for_cell(Tensors input, bool training, int count = 1) + { + if (dropout == 0f) + return null; + return _generate_dropout_mask( + tf.ones_like(input), + dropout, + training, + count); + } + + // Get the recurrent dropout mask for RNN cell. + public Tensors? get_recurrent_dropout_maskcell_for_cell(Tensors input, bool training, int count = 1) + { + if (dropout == 0f) + return null; + return _generate_dropout_mask( + tf.ones_like(input), + recurrent_dropout, + training, + count); + } + + public Tensors _create_dropout_mask(Tensors input, bool training, int count = 1) + { + return _generate_dropout_mask( + tf.ones_like(input), + dropout, + training, + count); + } + + public Tensors _create_recurrent_dropout_mask(Tensors input, bool training, int count = 1) + { + return _generate_dropout_mask( + tf.ones_like(input), + recurrent_dropout, + training, + count); + } + + public Tensors _generate_dropout_mask(Tensor ones, float rate, bool training, int count = 1) + { + Tensors dropped_inputs() + { + DropoutArgs args = new DropoutArgs(); + args.Rate = rate; + var DropoutLayer = new Dropout(args); + var mask = DropoutLayer.Apply(ones, training: training); + return mask; + } + + if (count > 1) + { + Tensors results = new Tensors(); + for (int i = 0; i < count; i++) + { + results.Add(dropped_inputs()); + } + return results; + } + + return dropped_inputs(); + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs b/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs index 59555e62..1449c908 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/LSTM.cs @@ -1,6 +1,7 @@ using System.Linq; using Tensorflow.Keras.ArgsDefinition.Rnn; using Tensorflow.Keras.Engine; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers.Rnn { @@ -26,9 +27,9 @@ namespace Tensorflow.Keras.Layers.Rnn .ToArray(); } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { - return base.Call(inputs, state: state, training: training); + return base.Call(inputs, initial_state: state, training: training); } } } diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs b/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs index 310e8057..b014737f 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs @@ -1,53 +1,466 @@ -using System; +using OneOf; +using System; using System.Collections.Generic; +using System.Reflection; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.ArgsDefinition.Rnn; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; +using Tensorflow.Util; +using Tensorflow.Common.Extensions; +using System.Linq.Expressions; +using Tensorflow.Keras.Utils; +using Tensorflow.Common.Types; // from tensorflow.python.distribute import distribution_strategy_context as ds_context; namespace Tensorflow.Keras.Layers.Rnn { - public class RNN : Layer + /// + /// Base class for recurrent layers. + /// See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) + /// for details about the usage of RNN API. + /// + public class RNN : RnnBase { - private RNNArgs args; - private object input_spec = null; // or NoneValue?? - private object state_spec = null; - private object _states = null; - private object constants_spec = null; - private int _num_constants = 0; - protected IVariableV1 kernel; - protected IVariableV1 bias; - protected ILayer cell; + private RNNArgs _args; + private object _input_spec = null; // or NoneValue?? + private object _state_spec = null; + private Tensors _states = null; + private object _constants_spec = null; + private int _num_constants; + protected IVariableV1 _kernel; + protected IVariableV1 _bias; + protected IRnnCell _cell; + public RNN(RNNArgs args) : base(PreConstruct(args)) { - this.args = args; + _args = args; SupportsMasking = true; - // The input shape is unknown yet, it could have nested tensor inputs, and - // the input spec will be the list of specs for nested inputs, the structure - // of the input_spec will be the same as the input. + // if is StackedRnncell + _cell = args.Cell; - //if(stateful) - //{ - // if (ds_context.has_strategy()) // ds_context???? - // { - // throw new Exception("RNNs with stateful=True not yet supported with tf.distribute.Strategy"); - // } - //} + // get input_shape + _args = PreConstruct(args); + + _num_constants = 0; + } + + // States is a tuple consist of cell states_size, like (cell1.state_size, cell2.state_size,...) + // state_size can be a single integer, can also be a list/tuple of integers, can also be TensorShape or a list/tuple of TensorShape + public Tensors States + { + get + { + if (_states == null) + { + // CHECK(Rinne): check if this is correct. + var state = nest.map_structure(x => null, _cell.StateSize); + return new Tensors { state }; + } + return _states; + } + set { _states = value; } + } + + private OneOf> compute_output_shape(Shape input_shape) + { + var batch = input_shape[0]; + var time_step = input_shape[1]; + if (_args.TimeMajor) + { + (batch, time_step) = (time_step, batch); + } + + // state_size is a array of ints or a positive integer + var state_size = _cell.StateSize.ToSingleShape(); + + // TODO(wanglongzhi2001),flat_output_size应该是什么类型的,Shape还是Tensor + Func _get_output_shape; + _get_output_shape = (flat_output_size) => + { + var output_dim = flat_output_size.as_int_list(); + Shape output_shape; + if (_args.ReturnSequences) + { + if (_args.TimeMajor) + { + output_shape = new Shape(new int[] { (int)time_step, (int)batch }.concat(output_dim)); + } + else + { + output_shape = new Shape(new int[] { (int)batch, (int)time_step }.concat(output_dim)); + + } + } + else + { + output_shape = new Shape(new int[] { (int)batch }.concat(output_dim)); + } + return output_shape; + }; + + Type type = _cell.GetType(); + PropertyInfo output_size_info = type.GetProperty("output_size"); + Shape output_shape; + if (output_size_info != null) + { + output_shape = nest.map_structure(_get_output_shape, _cell.OutputSize.ToSingleShape()); + // TODO(wanglongzhi2001),output_shape应该简单的就是一个元组还是一个Shape类型 + output_shape = (output_shape.Length == 1 ? (int)output_shape[0] : output_shape); + } + else + { + output_shape = _get_output_shape(state_size); + } + + if (_args.ReturnState) + { + Func _get_state_shape; + _get_state_shape = (flat_state) => + { + var state_shape = new int[] { (int)batch }.concat(flat_state.as_int_list()); + return new Shape(state_shape); + }; + var state_shape = _get_state_shape(state_size); + + return new List { output_shape, state_shape }; + } + else + { + return output_shape; + } + + } + + private Tensors compute_mask(Tensors inputs, Tensors mask) + { + // Time step masks must be the same for each input. + // This is because the mask for an RNN is of size [batch, time_steps, 1], + // and specifies which time steps should be skipped, and a time step + // must be skipped for all inputs. + + mask = nest.flatten(mask)[0]; + var output_mask = _args.ReturnSequences ? mask : null; + if (_args.ReturnState) + { + var state_mask = new List(); + for (int i = 0; i < len(States); i++) + { + state_mask.Add(null); + } + return new List { output_mask }.concat(state_mask); + } + else + { + return output_mask; + } } public override void build(KerasShapesWrapper input_shape) { - if (!cell.Built) + object get_input_spec(Shape shape) + { + var input_spec_shape = shape.as_int_list(); + + var (batch_index, time_step_index) = _args.TimeMajor ? (1, 0) : (0, 1); + if (!_args.Stateful) + { + input_spec_shape[batch_index] = -1; + } + input_spec_shape[time_step_index] = -1; + return new InputSpec(shape: input_spec_shape); + } + + Shape get_step_input_shape(Shape shape) + { + + // return shape[1:] if self.time_major else (shape[0],) + shape[2:] + if (_args.TimeMajor) + { + return shape.as_int_list().ToList().GetRange(1, shape.Length - 1).ToArray(); + } + else + { + return new int[] { shape.as_int_list()[0] }.concat(shape.as_int_list().ToList().GetRange(2, shape.Length - 2).ToArray()); + } + + + } + + object get_state_spec(Shape shape) + { + var state_spec_shape = shape.as_int_list(); + // append bacth dim + state_spec_shape = new int[] { -1 }.concat(state_spec_shape); + return new InputSpec(shape: state_spec_shape); + + } + + // Check whether the input shape contains any nested shapes. It could be + // (tensor_shape(1, 2), tensor_shape(3, 4)) or (1, 2, 3) which is from + // numpy inputs. + + + if (!_cell.Built) { - cell.build(input_shape); + _cell.build(input_shape); } } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + /// + /// + /// + /// + /// Binary tensor of shape [batch_size, timesteps] indicating whether a given timestep should be masked + /// + /// List of initial state tensors to be passed to the first call of the cell + /// List of constant tensors to be passed to the cell at each timestep + /// + /// + /// + protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bool? training = null, IOptionalArgs? optional_args = null) { - return base.Call(inputs, state, training); + RnnOptionalArgs? rnn_optional_args = optional_args as RnnOptionalArgs; + if(optional_args is not null && rnn_optional_args is null) + { + throw new ArgumentException("The optional args shhould be of type `RnnOptionalArgs`"); + } + Tensors? constants = rnn_optional_args?.Constants; + Tensors? mask = rnn_optional_args?.Mask; + //var (inputs_padded, row_length) = BackendImpl.convert_inputs_if_ragged(inputs); + // 暂时先不接受ragged tensor + int? row_length = null; + bool is_ragged_input = false; + _validate_args_if_ragged(is_ragged_input, mask); + + (inputs, initial_state, constants) = _process_inputs(inputs, initial_state, constants); + + _maybe_reset_cell_dropout_mask(_cell); + if (_cell is StackedRNNCells) + { + var stack_cell = _cell as StackedRNNCells; + foreach (var cell in stack_cell.Cells) + { + _maybe_reset_cell_dropout_mask(cell); + } + } + + if (mask != null) + { + // Time step masks must be the same for each input. + mask = nest.flatten(mask)[0]; + } + + Shape input_shape; + if (nest.is_nested(inputs)) + { + // In the case of nested input, use the first element for shape check + // input_shape = nest.flatten(inputs)[0].shape; + // TODO(Wanglongzhi2001) + input_shape = nest.flatten(inputs)[0].shape; + } + else + { + input_shape = inputs.shape; + } + + var timesteps = _args.TimeMajor ? input_shape[0] : input_shape[1]; + + if (_args.Unroll && timesteps != null) + { + throw new ValueError( + "Cannot unroll a RNN if the " + + "time dimension is undefined. \n" + + "- If using a Sequential model, " + + "specify the time dimension by passing " + + "an `input_shape` or `batch_input_shape` " + + "argument to your first layer. If your " + + "first layer is an Embedding, you can " + + "also use the `input_length` argument.\n" + + "- If using the functional API, specify " + + "the time dimension by passing a `shape` " + + "or `batch_shape` argument to your Input layer." + ); + } + + // cell_call_fn = (self.cell.__call__ if callable(self.cell) else self.cell.call) + Func step; + if (constants is not null) + { + if (!_cell.SupportOptionalArgs) + { + throw new ValueError( + $"RNN cell {_cell} does not support constants." + + $"Received: constants={constants}"); + } + + step = (inputs, states) => + { + constants = new Tensors(states.TakeLast(_num_constants)); + states = new Tensors(states.SkipLast(_num_constants)); + var(output, new_states) = _cell.Apply(inputs, states, optional_args: new RnnOptionalArgs() { Constants = constants }); + // TODO(Wanglongzhi2001),should cell_call_fn's return value be Tensors, Tensors? + return (output, new_states.Single); + }; + } + else + { + step = (inputs, states) => + { + // states = (states[0] if len(states) == 1 and is_tf_rnn_cell else states) + var (output, new_states) = _cell.Apply(inputs, states); + return (output, new_states.Single); + }; + } + + var (last_output, outputs, states) = BackendImpl.rnn(step, + inputs, + initial_state, + constants: constants, + go_backwards: _args.GoBackwards, + mask: mask, + unroll: _args.Unroll, + input_length: row_length != null ? new Tensor(row_length) : new Tensor(timesteps), + time_major: _args.TimeMajor, + zero_output_for_mask: _args.ZeroOutputForMask, + return_all_outputs: _args.ReturnSequences); + + if (_args.Stateful) + { + throw new NotImplementedException("this argument havn't been developed."); + } + + Tensors output = new Tensors(); + if (_args.ReturnSequences) + { + throw new NotImplementedException("this argument havn't been developed."); + + } + else + { + output = last_output; + } + + if (_args.ReturnState) + { + foreach (var state in states) + { + output.Add(state); + } + return output; + } + else + { + return output; + } + } + + public override Tensors Apply(Tensors inputs, Tensors initial_states = null, bool training = false, IOptionalArgs? optional_args = null) + { + RnnOptionalArgs? rnn_optional_args = optional_args as RnnOptionalArgs; + if (optional_args is not null && rnn_optional_args is null) + { + throw new ArgumentException("The type of optional args should be `RnnOptionalArgs`."); + } + Tensors? constants = rnn_optional_args?.Constants; + (inputs, initial_states, constants) = RnnUtils.standardize_args(inputs, initial_states, constants, _num_constants); + + if(initial_states is null && constants is null) + { + return base.Apply(inputs); + } + + // TODO(Rinne): implement it. + throw new NotImplementedException(); + } + + private (Tensors inputs, Tensors initial_state, Tensors constants) _process_inputs(Tensors inputs, Tensors initial_state, Tensors constants) + { + if (inputs.Length > 1) + { + if (_num_constants != 0) + { + initial_state = new Tensors(inputs.Skip(1)); + } + else + { + initial_state = new Tensors(inputs.Skip(1).SkipLast(_num_constants)); + constants = new Tensors(inputs.TakeLast(_num_constants)); + } + if (len(initial_state) == 0) + initial_state = null; + inputs = inputs[0]; + } + + if (_args.Stateful) + { + if (initial_state != null) + { + var tmp = new Tensor[] { }; + foreach (var s in nest.flatten(States)) + { + tmp.add(tf.math.count_nonzero((Tensor)s)); + } + var non_zero_count = tf.add_n(tmp); + //initial_state = tf.cond(non_zero_count > 0, () => States, () => initial_state); + if ((int)non_zero_count.numpy() > 0) + { + initial_state = States; + } + } + else + { + initial_state = States; + } + + } + else if (initial_state is null) + { + initial_state = get_initial_state(inputs); + } + + if (initial_state.Length != States.Length) + { + throw new ValueError( + $"Layer {this} expects {States.Length} state(s), " + + $"but it received {initial_state.Length} " + + $"initial state(s). Input received: {inputs}"); + } + + return (inputs, initial_state, constants); + } + + private void _validate_args_if_ragged(bool is_ragged_input, Tensors mask) + { + if (!is_ragged_input) + { + return; + } + + if (_args.Unroll) + { + throw new ValueError("The input received contains RaggedTensors and does " + + "not support unrolling. Disable unrolling by passing " + + "`unroll=False` in the RNN Layer constructor."); + } + if (mask != null) + { + throw new ValueError($"The mask that was passed in was {mask}, which " + + "cannot be applied to RaggedTensor inputs. Please " + + "make sure that there is no mask injected by upstream " + + "layers."); + } + + } + + void _maybe_reset_cell_dropout_mask(ILayer cell) + { + //if (cell is DropoutRNNCellMixin) + //{ + // cell.reset_dropout_mask(); + // cell.reset_recurrent_dropout_mask(); + //} } private static RNNArgs PreConstruct(RNNArgs args) @@ -77,60 +490,72 @@ namespace Tensorflow.Keras.Layers.Rnn return args; } - public RNN New(LayerRnnCell cell, - bool return_sequences = false, - bool return_state = false, - bool go_backwards = false, - bool stateful = false, - bool unroll = false, - bool time_major = false) - => new RNN(new RNNArgs - { - Cell = cell, - ReturnSequences = return_sequences, - ReturnState = return_state, - GoBackwards = go_backwards, - Stateful = stateful, - Unroll = unroll, - TimeMajor = time_major - }); - - public RNN New(IList cell, - bool return_sequences = false, - bool return_state = false, - bool go_backwards = false, - bool stateful = false, - bool unroll = false, - bool time_major = false) - => new RNN(new RNNArgs - { - Cell = new StackedRNNCells(new StackedRNNCellsArgs { Cells = cell }), - ReturnSequences = return_sequences, - ReturnState = return_state, - GoBackwards = go_backwards, - Stateful = stateful, - Unroll = unroll, - TimeMajor = time_major - }); - - - protected Tensor get_initial_state(Tensor inputs) + public Tensors __call__(Tensors inputs, Tensor state = null, Tensor training = null) { - return _generate_zero_filled_state_for_cell(null, null); + throw new NotImplementedException(); } - Tensor _generate_zero_filled_state_for_cell(LSTMCell cell, Tensor batch_size) + // 好像不能cell不能传接口类型 + //public RNN New(IRnnArgCell cell, + // bool return_sequences = false, + // bool return_state = false, + // bool go_backwards = false, + // bool stateful = false, + // bool unroll = false, + // bool time_major = false) + // => new RNN(new RNNArgs + // { + // Cell = cell, + // ReturnSequences = return_sequences, + // ReturnState = return_state, + // GoBackwards = go_backwards, + // Stateful = stateful, + // Unroll = unroll, + // TimeMajor = time_major + // }); + + //public RNN New(List cell, + // bool return_sequences = false, + // bool return_state = false, + // bool go_backwards = false, + // bool stateful = false, + // bool unroll = false, + // bool time_major = false) + // => new RNN(new RNNArgs + // { + // Cell = cell, + // ReturnSequences = return_sequences, + // ReturnState = return_state, + // GoBackwards = go_backwards, + // Stateful = stateful, + // Unroll = unroll, + // TimeMajor = time_major + // }); + + + protected Tensors get_initial_state(Tensors inputs) { - throw new NotImplementedException(""); + var input = inputs[0]; + var input_shape = input.shape; + var batch_size = _args.TimeMajor ? input_shape[1] : input_shape[0]; + var dtype = input.dtype; + Tensors init_state; + if (_cell is RnnCellBase rnn_base_cell) + { + init_state = rnn_base_cell.GetInitialState(null, batch_size, dtype); + } + else + { + init_state = RnnUtils.generate_zero_filled_state(batch_size, _cell.StateSize, dtype); + } + + return init_state; } // Check whether the state_size contains multiple states. - public static bool _is_multiple_state(object state_size) + public static bool is_multiple_state(GeneralizedTensorShape state_size) { - var myIndexerProperty = state_size.GetType().GetProperty("Item"); - return myIndexerProperty != null - && myIndexerProperty.GetIndexParameters().Length == 1 - && !(state_size.GetType() == typeof(Shape)); + return state_size.Shapes.Length > 1; } } } diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/RnnBase.cs b/src/TensorFlowNET.Keras/Layers/Rnn/RnnBase.cs new file mode 100644 index 00000000..018b1778 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Rnn/RnnBase.cs @@ -0,0 +1,13 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Keras.Engine; + +namespace Tensorflow.Keras.Layers.Rnn +{ + public abstract class RnnBase: Layer + { + public RnnBase(LayerArgs args): base(args) { } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/RnnCellBase.cs b/src/TensorFlowNET.Keras/Layers/Rnn/RnnCellBase.cs new file mode 100644 index 00000000..fcb5d1eb --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Rnn/RnnCellBase.cs @@ -0,0 +1,24 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Common.Types; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Keras.ArgsDefinition.Rnn; +using Tensorflow.Keras.Engine; +using Tensorflow.Keras.Utils; + +namespace Tensorflow.Keras.Layers.Rnn +{ + public abstract class RnnCellBase: Layer, IRnnCell + { + public RnnCellBase(LayerArgs args) : base(args) { } + public abstract GeneralizedTensorShape StateSize { get; } + public abstract GeneralizedTensorShape OutputSize { get; } + public abstract bool SupportOptionalArgs { get; } + public abstract (Tensor, Tensors) Call(Tensors inputs, Tensors states, bool? training = null); + public virtual Tensors GetInitialState(Tensors inputs, long batch_size, TF_DataType dtype) + { + return RnnUtils.generate_zero_filled_state_for_cell(this, inputs, batch_size, dtype); + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs index 2d7aab70..22d0e277 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNN.cs @@ -10,18 +10,36 @@ namespace Tensorflow.Keras.Layers.Rnn public class SimpleRNN : RNN { SimpleRNNArgs args; - public SimpleRNN(SimpleRNNArgs args) : base(args) + public SimpleRNN(SimpleRNNArgs args) : base(CreateCellForArgs(args)) { this.args = args; } + private static SimpleRNNArgs CreateCellForArgs(SimpleRNNArgs args) + { + args.Cell = new SimpleRNNCell(new SimpleRNNCellArgs() + { + Units = args.Units, + Activation = args.Activation, + UseBias = args.UseBias, + KernelInitializer = args.KernelInitializer, + RecurrentInitializer = args.RecurrentInitializer, + BiasInitializer = args.BiasInitializer, + Dropout = args.Dropout, + RecurrentDropout = args.RecurrentDropout, + DType = args.DType, + Trainable = args.Trainable, + }); + return args; + } + public override void build(KerasShapesWrapper input_shape) { var single_shape = input_shape.ToSingleShape(); var input_dim = single_shape[-1]; _buildInputShape = input_shape; - kernel = add_weight("kernel", (single_shape[-1], args.Units), + _kernel = add_weight("kernel", (single_shape[-1], args.Units), initializer: args.KernelInitializer //regularizer = self.kernel_regularizer, //constraint = self.kernel_constraint, diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs index 46061b21..abb57d8a 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/SimpleRNNCell.cs @@ -4,47 +4,128 @@ using System.Text; using Tensorflow.Keras.ArgsDefinition.Rnn; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers.Rnn { - public class SimpleRNNCell : Layer + /// + /// Cell class for SimpleRNN. + /// See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) + /// for details about the usage of RNN API. + /// This class processes one step within the whole time sequence input, whereas + /// `tf.keras.layer.SimpleRNN` processes the whole sequence. + /// + public class SimpleRNNCell : DropoutRNNCellMixin { - SimpleRNNArgs args; - IVariableV1 kernel; - IVariableV1 recurrent_kernel; - IVariableV1 bias; + SimpleRNNCellArgs _args; + IVariableV1 _kernel; + IVariableV1 _recurrent_kernel; + IVariableV1 _bias; + GeneralizedTensorShape _state_size; + GeneralizedTensorShape _output_size; - public SimpleRNNCell(SimpleRNNArgs args) : base(args) + public override GeneralizedTensorShape StateSize => _state_size; + public override GeneralizedTensorShape OutputSize => _output_size; + public override bool SupportOptionalArgs => false; + + public SimpleRNNCell(SimpleRNNCellArgs args) : base(args) { - this.args = args; + this._args = args; + if (args.Units <= 0) + { + throw new ValueError( + $"units must be a positive integer, got {args.Units}"); + } + this._args.Dropout = Math.Min(1f, Math.Max(0f, this._args.Dropout)); + this._args.RecurrentDropout = Math.Min(1f, Math.Max(0f, this._args.RecurrentDropout)); + _state_size = new GeneralizedTensorShape(args.Units); + _output_size = new GeneralizedTensorShape(args.Units); } public override void build(KerasShapesWrapper input_shape) { + // TODO(Rinne): add the cache. var single_shape = input_shape.ToSingleShape(); var input_dim = single_shape[-1]; - kernel = add_weight("kernel", (single_shape[-1], args.Units), - initializer: args.KernelInitializer + _kernel = add_weight("kernel", (single_shape[-1], _args.Units), + initializer: _args.KernelInitializer ); - recurrent_kernel = add_weight("recurrent_kernel", (args.Units, args.Units), - initializer: args.RecurrentInitializer + _recurrent_kernel = add_weight("recurrent_kernel", (_args.Units, _args.Units), + initializer: _args.RecurrentInitializer ); - if (args.UseBias) + if (_args.UseBias) { - bias = add_weight("bias", (args.Units), - initializer: args.BiasInitializer + _bias = add_weight("bias", (_args.Units), + initializer: _args.BiasInitializer ); } built = true; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + public override (Tensor, Tensors) Call(Tensors inputs, Tensors states, bool? training = null) { - return base.Call(inputs, state, training); + // TODO(Rinne): check if it will have multiple tensors when not nested. + Tensor prev_output = states[0]; + var dp_mask = get_dropout_maskcell_for_cell(inputs, training.Value); + var rec_dp_mask = get_recurrent_dropout_maskcell_for_cell(prev_output, training.Value); + + Tensor h; + var ranks = inputs.rank; + if (dp_mask != null) + { + if (ranks > 2) + { + // 因为multiply函数会自动添加第一个维度,所以加上下标0 + h = tf.linalg.tensordot(math_ops.multiply(inputs, dp_mask)[0], _kernel.AsTensor(), new[,] { { ranks - 1 }, { 0 } }); + } + else + { + h = math_ops.matmul(math_ops.multiply(inputs, dp_mask)[0], _kernel.AsTensor()); + } + } + else + { + if (ranks > 2) + { + h = tf.linalg.tensordot(inputs, _kernel.AsTensor(), new[,] { { ranks - 1 }, { 0 } }); + } + else + { + h = math_ops.matmul(inputs, _kernel.AsTensor()); + } + } + + if (_bias != null) + { + h = tf.nn.bias_add(h, _bias); + } + + if (rec_dp_mask != null) + { + prev_output = math_ops.multiply(prev_output, rec_dp_mask)[0]; + } + + ranks = prev_output.rank; + Tensor output; + if (ranks > 2) + { + output = h + tf.linalg.tensordot(prev_output[0], _recurrent_kernel.AsTensor(), new[,] { { ranks - 1 }, { 0 } }); + } + else + { + output = h + math_ops.matmul(prev_output, _recurrent_kernel.AsTensor()); + } + Console.WriteLine($"shape of output: {output.shape}"); + + if (_args.Activation != null) + { + output = _args.Activation.Apply(output); + } + return (output, new Tensors { output }); } } } diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs b/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs index 20962df1..7923192f 100644 --- a/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs +++ b/src/TensorFlowNET.Keras/Layers/Rnn/StackedRNNCells.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.ComponentModel; +using Tensorflow.Common.Types; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.ArgsDefinition.Rnn; using Tensorflow.Keras.Engine; @@ -8,7 +9,7 @@ using Tensorflow.Keras.Saving; namespace Tensorflow.Keras.Layers.Rnn { - public class StackedRNNCells : Layer, RNNArgs.IRnnArgCell + public class StackedRNNCells : Layer, IRnnCell { public IList Cells { get; set; } public bool reverse_state_order; @@ -51,7 +52,7 @@ namespace Tensorflow.Keras.Layers.Rnn { return lastCell.output_size; } - else if (RNN._is_multiple_state(lastCell.state_size)) + else if (RNN.is_multiple_state(lastCell.StateSize)) { // return ((dynamic)Cells[-1].state_size)[0]; throw new NotImplementedException(""); @@ -162,5 +163,13 @@ namespace Tensorflow.Keras.Layers.Rnn // deserialize_layer(cell_config, custom_objects = custom_objects)) // return cls(cells, **config) } + + public (Tensor, Tensors) Call(Tensors inputs, Tensors states, bool? training = null) + { + throw new NotImplementedException(); + } + public GeneralizedTensorShape StateSize => throw new NotImplementedException(); + public GeneralizedTensorShape OutputSize => throw new NotImplementedException(); + public bool SupportOptionalArgs => throw new NotImplementedException(); } } diff --git a/src/TensorFlowNET.Keras/Layers/TensorFlowOpLayer.cs b/src/TensorFlowNET.Keras/Layers/TensorFlowOpLayer.cs index 1ac4a277..6dfec319 100644 --- a/src/TensorFlowNET.Keras/Layers/TensorFlowOpLayer.cs +++ b/src/TensorFlowNET.Keras/Layers/TensorFlowOpLayer.cs @@ -10,6 +10,7 @@ using Tensorflow.Keras.Engine; using static Tensorflow.Binding; using Tensorflow.Functions; using System.Threading; +using Tensorflow.Common.Types; namespace Tensorflow.Keras.Layers { @@ -34,7 +35,7 @@ namespace Tensorflow.Keras.Layers built = true; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null) + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) { if (tf.Context.executing_eagerly()) return DeFunCall(inputs); diff --git a/src/TensorFlowNET.Keras/Metrics/metrics_utils.cs b/src/TensorFlowNET.Keras/Metrics/metrics_utils.cs index be6a49ec..3c2f8a7b 100644 --- a/src/TensorFlowNET.Keras/Metrics/metrics_utils.cs +++ b/src/TensorFlowNET.Keras/Metrics/metrics_utils.cs @@ -304,7 +304,7 @@ public class metrics_utils var NEG_INF = -1e10; var (_, top_k_idx) = tf.math.top_k(x, k, sorted: false); var top_k_mask = tf.reduce_sum( - tf.one_hot(top_k_idx, (int)x.shape[-1], axis: -1), axis: -2); + tf.one_hot(top_k_idx.Single, (int)x.shape[-1], axis: -1), axis: -2); return x * top_k_mask + NEG_INF * (1 - top_k_mask); } } diff --git a/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs b/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs index fa19987b..4acae426 100644 --- a/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs +++ b/src/TensorFlowNET.Keras/Preprocessings/Preprocessing.image_dataset_from_directory.cs @@ -129,7 +129,7 @@ namespace Tensorflow.Keras var indices = z.map(m => { var (i, positions) = m; - return tf.range(positions[i], positions[i] + sequence_length_tensor * sampling_rate_tensor, sampling_rate_tensor); + return tf.range(positions.Single[i], positions.Single[i] + sequence_length_tensor * sampling_rate_tensor, sampling_rate_tensor); }, num_parallel_calls: -1); var dataset = sequences_from_indices(data, indices, start_index, end_index); diff --git a/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs b/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs index a26879e0..396ad20e 100644 --- a/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs +++ b/src/TensorFlowNET.Keras/Saving/KerasObjectLoader.cs @@ -8,7 +8,7 @@ using System.Diagnostics; using System.Linq; using System.Reflection; using System.Text.RegularExpressions; -using Tensorflow.Extensions; +using Tensorflow.Common.Extensions; using Tensorflow.Framework.Models; using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; diff --git a/src/TensorFlowNET.Keras/Utils/RnnUtils.cs b/src/TensorFlowNET.Keras/Utils/RnnUtils.cs new file mode 100644 index 00000000..3109eb77 --- /dev/null +++ b/src/TensorFlowNET.Keras/Utils/RnnUtils.cs @@ -0,0 +1,93 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Text; +using Tensorflow.Common.Types; +using Tensorflow.Keras.Layers.Rnn; +using Tensorflow.Common.Extensions; + +namespace Tensorflow.Keras.Utils +{ + internal static class RnnUtils + { + internal static Tensors generate_zero_filled_state(long batch_size_tensor, GeneralizedTensorShape state_size, TF_DataType dtype) + { + Func create_zeros; + create_zeros = (GeneralizedTensorShape unnested_state_size) => + { + var flat_dims = unnested_state_size.ToSingleShape().dims; + var init_state_size = new long[] { batch_size_tensor }.Concat(flat_dims).ToArray(); + return array_ops.zeros(new Shape(init_state_size), dtype: dtype); + }; + + // TODO(Rinne): map structure with nested tensors. + if(state_size.Shapes.Length > 1) + { + return new Tensors(state_size.ToShapeArray().Select(s => create_zeros(new GeneralizedTensorShape(s)))); + } + else + { + return create_zeros(state_size); + } + + } + + internal static Tensors generate_zero_filled_state_for_cell(IRnnCell cell, Tensors inputs, long batch_size, TF_DataType dtype) + { + if (inputs != null) + { + batch_size = inputs.shape[0]; + dtype = inputs.dtype; + } + return generate_zero_filled_state(batch_size, cell.StateSize, dtype); + } + + /// + /// Standardizes `__call__` to a single list of tensor inputs. + /// + /// When running a model loaded from a file, the input tensors + /// `initial_state` and `constants` can be passed to `RNN.__call__()` as part + /// of `inputs` instead of by the dedicated keyword arguments.This method + /// makes sure the arguments are separated and that `initial_state` and + /// `constants` are lists of tensors(or None). + /// + /// Tensor or list/tuple of tensors. which may include constants + /// and initial states.In that case `num_constant` must be specified. + /// Tensor or list of tensors or None, initial states. + /// Tensor or list of tensors or None, constant tensors. + /// Expected number of constants (if constants are passed as + /// part of the `inputs` list. + /// + internal static (Tensors, Tensors, Tensors) standardize_args(Tensors inputs, Tensors initial_state, Tensors constants, int num_constants) + { + if(inputs.Length > 1) + { + // There are several situations here: + // In the graph mode, __call__ will be only called once. The initial_state + // and constants could be in inputs (from file loading). + // In the eager mode, __call__ will be called twice, once during + // rnn_layer(inputs=input_t, constants=c_t, ...), and second time will be + // model.fit/train_on_batch/predict with real np data. In the second case, + // the inputs will contain initial_state and constants as eager tensor. + // + // For either case, the real input is the first item in the list, which + // could be a nested structure itself. Then followed by initial_states, which + // could be a list of items, or list of list if the initial_state is complex + // structure, and finally followed by constants which is a flat list. + Debug.Assert(initial_state is null && constants is null); + if(num_constants > 0) + { + constants = inputs.TakeLast(num_constants).ToTensors(); + inputs = inputs.SkipLast(num_constants).ToTensors(); + } + if(inputs.Length > 1) + { + initial_state = inputs.Skip(1).ToTensors(); + inputs = inputs.Take(1).ToTensors(); + } + } + + return (inputs, initial_state, constants); + } + } +} diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs index 3de33746..f4980b82 100644 --- a/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs +++ b/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs @@ -144,17 +144,6 @@ namespace Tensorflow.Keras.UnitTest.Layers Assert.AreEqual(expected_output, actual_output); } - [TestMethod, Ignore("WIP")] - public void SimpleRNN() - { - var inputs = np.arange(6 * 10 * 8).reshape((6, 10, 8)).astype(np.float32); - /*var simple_rnn = keras.layers.SimpleRNN(4); - var output = simple_rnn.Apply(inputs); - Assert.AreEqual((32, 4), output.shape);*/ - var simple_rnn = tf.keras.layers.SimpleRNN(4, return_sequences: true, return_state: true); - var (whole_sequence_output, final_state) = simple_rnn.Apply(inputs); - } - [TestMethod] public void Resizing() { diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs new file mode 100644 index 00000000..55663d41 --- /dev/null +++ b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs @@ -0,0 +1,28 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Tensorflow.NumPy; +using static Tensorflow.Binding; + +namespace Tensorflow.Keras.UnitTest.Layers +{ + [TestClass] + public class Rnn + { + [TestMethod] + public void SimpleRNN() + { + var inputs = np.arange(6 * 10 * 8).reshape((6, 10, 8)).astype(np.float32); + /*var simple_rnn = keras.layers.SimpleRNN(4); + var output = simple_rnn.Apply(inputs); + Assert.AreEqual((32, 4), output.shape);*/ + var simple_rnn = tf.keras.layers.SimpleRNN(4, return_sequences: true, return_state: true); + var (whole_sequence_output, final_state) = simple_rnn.Apply(inputs); + Console.WriteLine(whole_sequence_output); + Console.WriteLine(final_state); + } + } +} diff --git a/tools/TensorFlowNET.Console/SimpleRnnTest.cs b/tools/TensorFlowNET.Console/SimpleRnnTest.cs index 9769eb65..ae6ebb8a 100644 --- a/tools/TensorFlowNET.Console/SimpleRnnTest.cs +++ b/tools/TensorFlowNET.Console/SimpleRnnTest.cs @@ -20,7 +20,7 @@ namespace Tensorflow // whole_sequence_output has shape `[32, 10, 4]`. // final_state has shape `[32, 4]`. - var (whole_sequence_output, final_state) = simple_rnn.Apply(inputs); + var (whole_sequence_output, final_states) = simple_rnn.Apply(inputs); } } }