Add keras.layers.CategoryEncoding.

2 years ago · b8645d3f83
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Preprocessing/CategoryEncodingArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Preprocessing/CategoryEncodingArgs.cs
@@ -0,0 +1,16 @@
 using Newtonsoft.Json;
 using Tensorflow.NumPy;

 namespace Tensorflow.Keras.ArgsDefinition
 {
    public class CategoryEncodingArgs : AutoSerializeLayerArgs
    {
        [JsonProperty("num_tokens")]
        public int NumTokens { get; set; }
        [JsonProperty("output_mode")]
        public string OutputMode { get; set; }
        [JsonProperty("sparse")]
        public bool Sparse { get; set; }
        public NDArray CountWeights { get; set; }
    }
 }
--- a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
+++ b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
@@ -1,4 +1,5 @@
 using System;
 using Tensorflow.NumPy;
 using static Google.Protobuf.Reflection.FieldDescriptorProto.Types;

 namespace Tensorflow.Keras.Layers
@@ -28,6 +29,17 @@ namespace Tensorflow.Keras.Layers
            bool renorm = false,
            float renorm_momentum = 0.99f);

        /// <summary>
        /// A preprocessing layer which encodes integer features.
        /// </summary>
        /// <param name="num_tokens">The total number of tokens the layer should support.</param>
        /// <param name="output_mode">Specification for the output of the layer.</param>
        /// <returns></returns>
        public ILayer CategoryEncoding(int num_tokens, 
            string output_mode = "one_hot",
            bool sparse = false,
            NDArray count_weights = null);

        public ILayer Conv1D(int filters,
            Shape kernel_size,
            int strides = 1,
--- a/src/TensorFlowNET.Core/Operations/math_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/math_ops.cs
@@ -839,10 +839,24 @@ namespace Tensorflow
                        output_size = math_ops.maximum(minlength, output_size);
                    if (maxlength != null)
                        output_size = math_ops.minimum(maxlength, output_size);
                    var weights = constant_op.constant(new long[0], dtype: dtype);
                    weights = weights ?? constant_op.constant(new int[0], dtype: dtype);
                    return tf.Context.ExecuteOp("Bincount", name, new ExecuteOpArgs(arr, output_size, weights));
                }
                else
                {
                    var array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0;
                    var output_size = math_ops.cast(array_is_nonempty, arr.dtype) * (math_ops.reduce_max(arr) + 1);
                    if (minlength != null)
                        output_size = math_ops.maximum(minlength, output_size);
                    if (maxlength != null)
                        output_size = math_ops.minimum(maxlength, output_size);
                    weights = weights ?? array_ops.constant(new int[0], dtype: dtype);

                    return tf.Context.ExecuteOp("DenseBincount", name,
                        new ExecuteOpArgs(arr, output_size, weights, binary_output)
                            .SetAttributes(new { binary_output }));
                }
                
                throw new NotImplementedException("");
            });

--- a/src/TensorFlowNET.Core/Tensors/constant_op.cs
+++ b/src/TensorFlowNET.Core/Tensors/constant_op.cs
@@ -153,6 +153,10 @@ namespace Tensorflow
            bool allow_broadcast)
        {
            var t = convert_to_eager_tensor(value, tf.Context, dtype: dtype);
            if (dtype != TF_DataType.DtInvalid && dtype != t.dtype)
            {
                t = math_ops.cast(t, dtype);
            }
            if (shape is null || shape.IsNull)
                return t;

--- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
@@ -4,6 +4,7 @@ using Tensorflow.Keras.ArgsDefinition.Core;
 using Tensorflow.Keras.ArgsDefinition.Rnn;
 using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Layers.Rnn;
 using Tensorflow.NumPy;
 using static Tensorflow.Binding;
 using static Tensorflow.KerasApi;

@@ -829,5 +830,14 @@ namespace Tensorflow.Keras.Layers
                "orthogonal" => tf.orthogonal_initializer,
                _ => tf.glorot_uniform_initializer
            };

        public ILayer CategoryEncoding(int num_tokens, string output_mode = "one_hot", bool sparse = false, NDArray count_weights = null)
            => new CategoryEncoding(new CategoryEncodingArgs
            {
                NumTokens = num_tokens,
                OutputMode = output_mode,
                Sparse = sparse,
                CountWeights = count_weights
            });
    }
 }
--- a/src/TensorFlowNET.Keras/Layers/Preprocessing/CategoryEncoding.cs
+++ b/src/TensorFlowNET.Keras/Layers/Preprocessing/CategoryEncoding.cs
@@ -0,0 +1,75 @@
 using Tensorflow.Keras.ArgsDefinition;
 using Tensorflow.Keras.Engine;

 namespace Tensorflow.Keras.Layers
 {
    /// <summary>
    /// This layer provides options for condensing data into a categorical encoding when the total number of tokens are known in advance.
    /// </summary>
    public class CategoryEncoding : Layer
    {
        CategoryEncodingArgs args;

        public CategoryEncoding(CategoryEncodingArgs args) : base(args)
        {
            this.args = args;
        }

        protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null)
        {
            var depth = args.NumTokens;
            var max_value = tf.reduce_max(inputs);
            var min_value = tf.reduce_min(inputs);

            /*var condition = tf.logical_and(tf.greater(tf.cast(constant_op.constant(depth), max_value.dtype), max_value),
                tf.greater_equal(min_value, tf.cast(constant_op.constant(0), min_value.dtype)));*/

            var bincounts = encode_categorical_inputs(inputs, args.OutputMode, depth, args.DType, 
                sparse: args.Sparse, 
                count_weights: args.CountWeights);

            if(args.OutputMode != "tf_idf")
            {
                return bincounts;
            }

            return inputs;
        }

        public override Shape ComputeOutputShape(Shape input_shape)
        {
            return input_shape;
        }

        Tensors encode_categorical_inputs(Tensor inputs, string output_mode, int depth, 
            TF_DataType dtype = TF_DataType.TF_FLOAT, 
            bool sparse = false, 
            Tensor count_weights = null)
        {
            bool binary_output = false;
            if (output_mode == "one_hot")
            {
                binary_output = true;
                if (inputs.shape[-1] != 1)
                {
                    inputs = tf.expand_dims(inputs, -1);
                }
            }
            else if (output_mode == "multi_hot")
            {
                binary_output = true;
            }

            var depth_tensor = constant_op.constant(depth);
            var result = tf.math.bincount(inputs, 
                weights: count_weights,
                minlength: depth_tensor,
                maxlength: depth_tensor,
                dtype: dtype,
                axis: -1,
                binary_output: binary_output);

            return result;
        }
    }
 }
--- a/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs
@@ -177,5 +177,60 @@ namespace TensorFlowNET.Keras.UnitTest
            Assert.AreEqual((5, 2), output.shape);
            Assert.IsTrue(output[0].numpy().Equals(new[] { -0.99998f, 0.99998f }));
        }

        /// <summary>
        /// https://www.tensorflow.org/api_docs/python/tf/keras/layers/CategoryEncoding
        /// </summary>
        [TestMethod]
        public void CategoryEncoding()
        {
            // one-hot
            var inputs = np.array(new[] { 3, 2, 0, 1 });
            var layer = tf.keras.layers.CategoryEncoding(4);
            
            Tensor output = layer.Apply(inputs);
            Assert.AreEqual((4, 4), output.shape);
            Assert.IsTrue(output[0].numpy().Equals(new[] { 0, 0, 0, 1f }));
            Assert.IsTrue(output[1].numpy().Equals(new[] { 0, 0, 1, 0f }));
            Assert.IsTrue(output[2].numpy().Equals(new[] { 1, 0, 0, 0f }));
            Assert.IsTrue(output[3].numpy().Equals(new[] { 0, 1, 0, 0f }));

            // multi-hot
            inputs = np.array(new[,]
            {
                { 0, 1 },
                { 0, 0 },
                { 1, 2 },
                { 3, 1 }
            });
            layer = tf.keras.layers.CategoryEncoding(4, output_mode: "multi_hot");
            output = layer.Apply(inputs);
            Assert.IsTrue(output[0].numpy().Equals(new[] { 1, 1, 0, 0f }));
            Assert.IsTrue(output[1].numpy().Equals(new[] { 1, 0, 0, 0f }));
            Assert.IsTrue(output[2].numpy().Equals(new[] { 0, 1, 1, 0f }));
            Assert.IsTrue(output[3].numpy().Equals(new[] { 0, 1, 0, 1f }));

            // using weighted inputs in "count" mode
            inputs = np.array(new[,]
            {
                { 0, 1 },
                { 0, 0 },
                { 1, 2 },
                { 3, 1 }
            });
            var weights = np.array(new[,]
            {
                { 0.1f, 0.2f },
                { 0.1f, 0.1f },
                { 0.2f, 0.3f },
                { 0.4f, 0.2f }
            });
            layer = tf.keras.layers.CategoryEncoding(4, output_mode: "count", count_weights: weights);
            output = layer.Apply(inputs);
            Assert.IsTrue(output[0].numpy().Equals(new[] { 0.1f, 0.2f, 0f, 0f }));
            Assert.IsTrue(output[1].numpy().Equals(new[] { 0.2f, 0f, 0f, 0f }));
            Assert.IsTrue(output[2].numpy().Equals(new[] { 0f, 0.2f, 0.3f, 0f }));
            Assert.IsTrue(output[3].numpy().Equals(new[] { 0f, 0.2f, 0f, 0.4f }));
        }
    }
 }
--- a/test/TensorFlowNET.Keras.UnitTest/Losses/LossesTest.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Losses/LossesTest.cs
@@ -4,11 +4,12 @@ using System.Collections.Generic;
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
 using Tensorflow;
 using TensorFlowNET.Keras.UnitTest;
 using static Tensorflow.Binding;
 using static Tensorflow.KerasApi;

 namespace Tensorflow.Keras.UnitTest.Losses;
 namespace TensorFlowNET.Keras.UnitTest;

 [TestClass]
 public class LossesTest : EagerModeTestBase