diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/MergeArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/MergeArgs.cs new file mode 100644 index 00000000..ce7203de --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/MergeArgs.cs @@ -0,0 +1,11 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition +{ + public class MergeArgs : LayerArgs + { + public Tensors Inputs { get; set; } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Add.cs b/src/TensorFlowNET.Keras/Layers/Add.cs new file mode 100644 index 00000000..94c8c591 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Add.cs @@ -0,0 +1,15 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Keras.ArgsDefinition; + +namespace Tensorflow.Keras.Layers +{ + public class Add : Merge + { + public Add(MergeArgs args) : base(args) + { + + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/BatchNormalization.cs b/src/TensorFlowNET.Keras/Layers/BatchNormalization.cs index e61391f6..3b0b9013 100644 --- a/src/TensorFlowNET.Keras/Layers/BatchNormalization.cs +++ b/src/TensorFlowNET.Keras/Layers/BatchNormalization.cs @@ -119,14 +119,14 @@ namespace Tensorflow.Keras.Layers built = true; } - protected override Tensors Call(Tensors inputs, Tensor state = null, bool is_training = false) + protected override Tensors Call(Tensors inputs, Tensor state = null, bool training = false) { Tensor outputs = null; - + var training_tensor = tf.logical_and(training, Trainable); if (fused) { - Tensor training = tf.convert_to_tensor(is_training); - outputs = _fused_batch_norm(inputs, training: training); + // var training = tf.convert_to_tensor(training); + outputs = _fused_batch_norm(inputs, training: training_tensor); return outputs; } @@ -150,20 +150,21 @@ namespace Tensorflow.Keras.Layers inputs, gamma, beta, - epsilon: epsilon, - data_format: _data_format); + mean: moving_mean, + variance: moving_variance, + epsilon: epsilon, is_training: true, + data_format: _data_format, + exponential_avg_factor: exponential_avg_factor); }; Func _fused_batch_norm_inference = () => { - var moving_mean_tensor = moving_mean.AsTensor(); - var moving_variance_tensor = moving_variance.AsTensor(); return tf.nn.fused_batch_norm( inputs, gamma, beta, - mean: moving_mean_tensor, - variance: moving_variance_tensor, + mean: moving_mean, + variance: moving_variance, epsilon: epsilon, is_training: false, data_format: _data_format); @@ -176,35 +177,54 @@ namespace Tensorflow.Keras.Layers var (output, mean, variance) = (results[0], results[1], results[2]); var training_value = tf_utils.constant_value(training); - Tensor momentum_tensor; - if (training_value == null) + if (!training_value.HasValue || (training_value.HasValue && training_value.Value)) { - momentum_tensor = tf_utils.smart_cond(training, - () => new float[] { momentum }, () => new float[] { 1.0f })[0]; - } - else - { - momentum_tensor = ops.convert_to_tensor(momentum); - } + Tensor momentum_tensor = null; + if (!use_fused_avg_updates) + { + if (training_value == null) + momentum_tensor = tf_utils.smart_cond(training, + () => new float[] { momentum }, + () => new float[] { 1.0f })[0]; + else + momentum_tensor = ops.convert_to_tensor(momentum); + } + + if (use_fused_avg_updates) + _assign_new_value(moving_mean, mean); + else + _assign_moving_average(moving_variance, variance, momentum_tensor); - if (training_value == null) - { - var mean_update = _assign_moving_average(moving_mean.AsTensor(), mean, momentum_tensor); - var variance_update = _assign_moving_average(moving_variance.AsTensor(), variance, momentum_tensor); - add_update(new Tensor[] { mean_update }, inputs: true); - add_update(new Tensor[] { variance_update }, inputs: true); + if (use_fused_avg_updates) + _assign_new_value(moving_variance, mean); + else + _assign_moving_average(moving_variance, variance, momentum_tensor); + + // var mean_update = _assign_moving_average(moving_mean.AsTensor(), mean, momentum_tensor); + // var variance_update = _assign_moving_average(moving_variance.AsTensor(), variance, momentum_tensor); + // add_update(new Tensor[] { mean_update }, inputs: true); + // add_update(new Tensor[] { variance_update }, inputs: true); } return output; } - public Tensor _assign_moving_average(RefVariable variable, Tensor value, Tensor momentum) + Tensor _assign_new_value(IVariableV1 variable, Tensor value) + { + return tf_with(ops.name_scope("AssignNewValue", null, new { variable, value, momentum }), scope => + { + // var cm = ops.colocate_with(variable); + return state_ops.assign_sub(variable, value, name: scope); + }); + } + + Tensor _assign_moving_average(IVariableV1 variable, Tensor value, Tensor momentum) { - return tf_with(ops.name_scope(null, "AssignMovingAvg", new { variable, value, momentum }), scope => + return tf_with(ops.name_scope("AssignMovingAvg", null, new { variable, value, momentum }), scope => { // var cm = ops.colocate_with(variable); var decay = ops.convert_to_tensor(1.0f - momentum, name: "decay"); - var update_delta = (variable - math_ops.cast(value, variable.dtype)) * decay; + var update_delta = (variable.AsTensor() - math_ops.cast(value, variable.dtype)) * decay; return state_ops.assign_sub(variable, update_delta, name: scope); }); } diff --git a/src/TensorFlowNET.Keras/Layers/Convolutional.cs b/src/TensorFlowNET.Keras/Layers/Convolutional.cs index 1be8527c..a7eb9aa6 100644 --- a/src/TensorFlowNET.Keras/Layers/Convolutional.cs +++ b/src/TensorFlowNET.Keras/Layers/Convolutional.cs @@ -20,6 +20,7 @@ using Tensorflow.Keras.ArgsDefinition; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Utils; using Tensorflow.Operations; +using static Tensorflow.Binding; namespace Tensorflow.Keras.Layers { diff --git a/src/TensorFlowNET.Keras/Layers/GlobalAveragePooling2D.cs b/src/TensorFlowNET.Keras/Layers/GlobalAveragePooling2D.cs new file mode 100644 index 00000000..efc8050d --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/GlobalAveragePooling2D.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Keras.ArgsDefinition; + +namespace Tensorflow.Keras.Layers +{ + public class GlobalAveragePooling2D : GlobalPooling2D + { + public GlobalAveragePooling2D(Pooling2DArgs args) + : base(args) + { + } + + protected override Tensors Call(Tensors inputs, Tensor state = null, bool is_training = false) + { + if (data_format == "channels_last") + return math_ops.reduce_mean(inputs, new int[] { 1, 2 }, false); + else + return math_ops.reduce_mean(inputs, new int[] { 2, 3 }, false); + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/GlobalPooling2D.cs b/src/TensorFlowNET.Keras/Layers/GlobalPooling2D.cs new file mode 100644 index 00000000..e944aef0 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/GlobalPooling2D.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Keras.Engine; +using Tensorflow.Keras.Utils; + +namespace Tensorflow.Keras.Layers +{ + public abstract class GlobalPooling2D : Layer + { + Pooling2DArgs args; + protected string data_format => args.DataFormat; + protected InputSpec input_spec; + + public GlobalPooling2D(Pooling2DArgs args) : base(args) + { + this.args = args; + args.DataFormat = conv_utils.normalize_data_format(data_format); + input_spec = new InputSpec(ndim: 4); + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs index 7711dd16..ebdd09f6 100644 --- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs +++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs @@ -28,8 +28,7 @@ namespace Tensorflow.Keras.Layers /// /// /// - public Tensors batch_normalization(Tensor inputs, - int axis = -1, + public BatchNormalization BatchNormalization(int axis = -1, float momentum = 0.99f, float epsilon = 0.001f, bool center = true, @@ -38,31 +37,26 @@ namespace Tensorflow.Keras.Layers IInitializer gamma_initializer = null, IInitializer moving_mean_initializer = null, IInitializer moving_variance_initializer = null, - Tensor training = null, bool trainable = true, string name = null, bool renorm = false, float renorm_momentum = 0.99f) - { - var layer = new BatchNormalization(new BatchNormalizationArgs - { - Axis = axis, - Momentum = momentum, - Epsilon = epsilon, - Center = center, - Scale = scale, - BetaInitializer = beta_initializer, - GammaInitializer = gamma_initializer, - MovingMeanInitializer = moving_mean_initializer, - MovingVarianceInitializer = moving_variance_initializer, - Renorm = renorm, - RenormMomentum = renorm_momentum, - Trainable = trainable, - Name = name - }); - - return layer.Apply(inputs); - } + => new BatchNormalization(new BatchNormalizationArgs + { + Axis = axis, + Momentum = momentum, + Epsilon = epsilon, + Center = center, + Scale = scale, + BetaInitializer = beta_initializer ?? tf.zeros_initializer, + GammaInitializer = gamma_initializer ?? tf.ones_initializer, + MovingMeanInitializer = moving_mean_initializer ?? tf.zeros_initializer, + MovingVarianceInitializer = moving_variance_initializer ?? tf.ones_initializer, + Renorm = renorm, + RenormMomentum = renorm_momentum, + Trainable = trainable, + Name = name + }); /// /// @@ -115,53 +109,64 @@ namespace Tensorflow.Keras.Layers Activation = activation ?? keras.activations.Linear }); - public Tensor conv2d(Tensor inputs, - int filters, - int[] kernel_size, - int[] strides = null, + public Conv2D Conv2D(int filters, + TensorShape kernel_size = null, + TensorShape strides = null, string padding = "valid", - string data_format = "channels_last", - int[] dilation_rate = null, + string data_format = null, + TensorShape dilation_rate = null, + int groups = 1, + string activation = null, bool use_bias = true, + string kernel_initializer = "glorot_uniform", + string bias_initializer = "zeros", + string kernel_regularizer = null, + string bias_regularizer = null, + string activity_regularizer = null) + => new Conv2D(new Conv2DArgs + { + Rank = 2, + Filters = filters, + KernelSize = kernel_size, + Strides = strides == null ? (1, 1) : strides, + Padding = padding, + DataFormat = data_format, + DilationRate = dilation_rate == null ? (1, 1) : dilation_rate, + Groups = groups, + UseBias = use_bias, + KernelInitializer = GetInitializerByName(kernel_initializer), + BiasInitializer = GetInitializerByName(bias_initializer), + Activation = GetActivationByName(activation) + }); + + public Dense Dense(int units, Activation activation = null, IInitializer kernel_initializer = null, IInitializer bias_initializer = null, - bool trainable = true, - string name = null) - { - if (strides == null) - strides = new int[] { 1, 1 }; - if (dilation_rate == null) - dilation_rate = new int[] { 1, 1 }; - if (bias_initializer == null) - bias_initializer = tf.zeros_initializer; - - var layer = new Conv2D(new Conv2DArgs + TensorShape input_shape = null) + => new Dense(new DenseArgs { - Filters = filters, - KernelSize = kernel_size, - Strides = strides, - Padding = padding, - DataFormat = data_format, - DilationRate = dilation_rate, - Activation = activation, - UseBias = use_bias, - KernelInitializer = kernel_initializer, - BiasInitializer = bias_initializer, - Trainable = trainable, - Name = name + Units = units, + Activation = activation ?? keras.activations.Linear, + KernelInitializer = kernel_initializer ?? tf.glorot_uniform_initializer, + BiasInitializer = bias_initializer ?? tf.zeros_initializer, + InputShape = input_shape }); - return layer.Apply(inputs); - } + public Dense Dense(int units) + => new Dense(new DenseArgs + { + Units = units, + Activation = GetActivationByName("linear") + }); public Dense Dense(int units, - Activation activation = null, + string activation = null, TensorShape input_shape = null) => new Dense(new DenseArgs { Units = units, - Activation = activation ?? keras.activations.Linear, + Activation = GetActivationByName(activation), InputShape = input_shape }); @@ -367,6 +372,12 @@ namespace Tensorflow.Keras.Layers Padding = padding }); + public Tensor add(params Tensor[] inputs) + => new Add(new MergeArgs { Inputs = inputs }).Apply(inputs); + + public GlobalAveragePooling2D GlobalAveragePooling2D() + => new GlobalAveragePooling2D(new Pooling2DArgs { }); + Activation GetActivationByName(string name) => name switch { @@ -376,5 +387,14 @@ namespace Tensorflow.Keras.Layers "tanh" => keras.activations.Tanh, _ => keras.activations.Linear }; + + IInitializer GetInitializerByName(string name) + => name switch + { + "glorot_uniform" => tf.glorot_uniform_initializer, + "zeros" => tf.zeros_initializer, + "ones" => tf.ones_initializer, + _ => tf.glorot_uniform_initializer + }; } } diff --git a/src/TensorFlowNET.Keras/Layers/Merge.cs b/src/TensorFlowNET.Keras/Layers/Merge.cs new file mode 100644 index 00000000..bfed03ad --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Merge.cs @@ -0,0 +1,35 @@ +using System; +using System.Collections.Generic; +using System.Text; +using static Tensorflow.Binding; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Keras.Engine; + +namespace Tensorflow.Keras.Layers +{ + public abstract class Merge : Layer + { + public Merge(MergeArgs args) : base(args) + { + + } + + protected override void build(TensorShape input_shape) + { + // output_shape = input_shape.dims[1^]; + } + + protected override Tensors Call(Tensors inputs, Tensor state = null, bool is_training = false) + { + return _merge_function(inputs); + } + + Tensors _merge_function(Tensors inputs) + { + var output = inputs[0]; + foreach (var i in range(1, inputs.Length)) + output += inputs[i]; + return output; + } + } +}