Improve the API of LayerNormalizationtags/v0.110.0-LSTM-Model
@@ -14,6 +14,7 @@ | |||
limitations under the License. | |||
******************************************************************************/ | |||
using System.Xml.Linq; | |||
using Tensorflow.Operations; | |||
using Tensorflow.Operations.Activation; | |||
using static Tensorflow.Binding; | |||
@@ -126,6 +127,34 @@ namespace Tensorflow | |||
name: name, | |||
exponential_avg_factor: exponential_avg_factor); | |||
/// <summary> | |||
/// Normalizes a tensor by `mean` and `variance`, and applies (optionally) a`scale` \\(\gamma\\) to it, as well as an `offset` \\(\beta\\). | |||
/// </summary> | |||
/// <param name="x">A floating point tensor.</param> | |||
/// <param name="mean">A mean `Tensor`.</param> | |||
/// <param name="variance">A variance `Tensor`.</param> | |||
/// <param name="offset"> An offset `Tensor`, often denoted \\(\beta\\) in equations, or NULL. If present, will be added to the normalized tensor.</param> | |||
/// <param name="scale"> A scale `Tensor`, often denoted \\(\gamma\\) in equations, or NULL. If present, the scale is applied to the normalized tensor.</param> | |||
/// <param name="variance_epsilon"> A small float number to avoid dividing by 0.</param> | |||
/// <param name="name">A name for this operation.</param> | |||
/// <returns>the normalized, scaled, offset tensor.</returns> | |||
public Tensor batch_normalization(Tensor x, | |||
Tensor mean, | |||
Tensor variance, | |||
Tensor offset, | |||
Tensor scale, | |||
float variance_epsilon, | |||
string name = null) | |||
{ | |||
var inv = math_ops.rsqrt(variance + variance_epsilon); | |||
tf_with(ops.name_scope(name, "batchnorm", (x, mean, variance, scale, offset)), scope => | |||
{ | |||
if (scale != null) inv *= scale; | |||
}); | |||
if (offset != null) return x * math_ops.cast(inv, x.dtype) + math_ops.cast(offset - mean * inv, dtype: x.dtype); | |||
else return x * math_ops.cast(inv, x.dtype) + math_ops.cast(-mean * inv, dtype: x.dtype); | |||
} | |||
public Tensor max_pool(Tensor value, int[] ksize, int[] strides, string padding, string data_format = "NHWC", string name = null) | |||
=> nn_ops.max_pool(value, ksize, strides, padding, data_format: data_format, name: name); | |||
@@ -153,9 +153,22 @@ namespace Tensorflow.Keras.Layers | |||
} | |||
else | |||
{ | |||
var input_dtype = inputs.dtype; | |||
if ((input_dtype == tf.float16) && DType == tf.float32) inputs = tf.cast(inputs, tf.float32); | |||
(Tensor mean, Tensor variance) = tf.nn.moments(inputs, axis, keep_dims: true); | |||
} | |||
(Tensor scale, Tensor offset) = (_broadcast(gamma), _broadcast(beta)); | |||
outputs = tf.nn.batch_normalization( | |||
inputs, | |||
mean, | |||
variance, | |||
offset: offset, | |||
scale: scale, | |||
variance_epsilon: epsilon); | |||
outputs = tf.cast(outputs, input_dtype); | |||
} | |||
// If some components of the shape got lost due to adjustments, fix that. | |||
outputs.shape = input_shape; | |||
@@ -1,5 +1,7 @@ | |||
using Microsoft.VisualStudio.TestTools.UnitTesting; | |||
using System; | |||
using System.Collections.Generic; | |||
using System.Linq; | |||
using Tensorflow.NumPy; | |||
using static Tensorflow.Binding; | |||
using static Tensorflow.KerasApi; | |||
@@ -161,6 +163,26 @@ namespace Tensorflow.Keras.UnitTest.Layers | |||
Tensor output = layer.Apply(inputs); | |||
Assert.AreEqual((5, 2), output.shape); | |||
Assert.IsTrue(output[0].numpy().Equals(new[] { -0.99998f, 0.99998f })); | |||
// test_layernorm_weights | |||
Assert.AreEqual(len(layer.TrainableWeights), 2); | |||
Assert.AreEqual(len(layer.Weights), 2); | |||
var beta = layer.Weights.Where(x => x.Name.StartsWith("beta")).Single(); | |||
var gamma = layer.Weights.Where(x => x.Name.StartsWith("gamma")).Single(); | |||
// correctness_test | |||
layer = keras.layers.LayerNormalization(axis: -1, epsilon: (float) 1e-12); | |||
var x = np.random.normal(loc: 5.0f, scale: 10.0f, size: (1000, 2, 2, 2)).astype(tf.float32); | |||
output = layer.Apply(x); | |||
var y = (output - beta.numpy()) / gamma.numpy(); | |||
var y_mean = np.mean(y.numpy()); | |||
var y_std = np.sqrt(np.sum(np.power(y.numpy() - np.mean(y.numpy()), 2)) / 8000); | |||
Assert.IsTrue(tf.greater(np.array(0.1f), tf.abs(y_std - 1.0)).ToArray<bool>()[0]); | |||
Assert.IsTrue(tf.greater(np.array(0.1f), tf.abs(y_mean)).ToArray<bool>()[0]); | |||
} | |||
/// <summary> | |||