Browse Source

Add keras.layers.CategoryEncoding.

tags/v0.100.4-load-saved-model
Haiping Chen 2 years ago
parent
commit
b8645d3f83
8 changed files with 189 additions and 2 deletions
  1. +16
    -0
      src/TensorFlowNET.Core/Keras/ArgsDefinition/Preprocessing/CategoryEncodingArgs.cs
  2. +12
    -0
      src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
  3. +15
    -1
      src/TensorFlowNET.Core/Operations/math_ops.cs
  4. +4
    -0
      src/TensorFlowNET.Core/Tensors/constant_op.cs
  5. +10
    -0
      src/TensorFlowNET.Keras/Layers/LayersApi.cs
  6. +75
    -0
      src/TensorFlowNET.Keras/Layers/Preprocessing/CategoryEncoding.cs
  7. +55
    -0
      test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs
  8. +2
    -1
      test/TensorFlowNET.Keras.UnitTest/Losses/LossesTest.cs

+ 16
- 0
src/TensorFlowNET.Core/Keras/ArgsDefinition/Preprocessing/CategoryEncodingArgs.cs View File

@@ -0,0 +1,16 @@
using Newtonsoft.Json;
using Tensorflow.NumPy;

namespace Tensorflow.Keras.ArgsDefinition
{
public class CategoryEncodingArgs : AutoSerializeLayerArgs
{
[JsonProperty("num_tokens")]
public int NumTokens { get; set; }
[JsonProperty("output_mode")]
public string OutputMode { get; set; }
[JsonProperty("sparse")]
public bool Sparse { get; set; }
public NDArray CountWeights { get; set; }
}
}

+ 12
- 0
src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs View File

@@ -1,4 +1,5 @@
using System;
using Tensorflow.NumPy;
using static Google.Protobuf.Reflection.FieldDescriptorProto.Types;

namespace Tensorflow.Keras.Layers
@@ -28,6 +29,17 @@ namespace Tensorflow.Keras.Layers
bool renorm = false,
float renorm_momentum = 0.99f);

/// <summary>
/// A preprocessing layer which encodes integer features.
/// </summary>
/// <param name="num_tokens">The total number of tokens the layer should support.</param>
/// <param name="output_mode">Specification for the output of the layer.</param>
/// <returns></returns>
public ILayer CategoryEncoding(int num_tokens,
string output_mode = "one_hot",
bool sparse = false,
NDArray count_weights = null);

public ILayer Conv1D(int filters,
Shape kernel_size,
int strides = 1,


+ 15
- 1
src/TensorFlowNET.Core/Operations/math_ops.cs View File

@@ -839,10 +839,24 @@ namespace Tensorflow
output_size = math_ops.maximum(minlength, output_size);
if (maxlength != null)
output_size = math_ops.minimum(maxlength, output_size);
var weights = constant_op.constant(new long[0], dtype: dtype);
weights = weights ?? constant_op.constant(new int[0], dtype: dtype);
return tf.Context.ExecuteOp("Bincount", name, new ExecuteOpArgs(arr, output_size, weights));
}
else
{
var array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0;
var output_size = math_ops.cast(array_is_nonempty, arr.dtype) * (math_ops.reduce_max(arr) + 1);
if (minlength != null)
output_size = math_ops.maximum(minlength, output_size);
if (maxlength != null)
output_size = math_ops.minimum(maxlength, output_size);
weights = weights ?? array_ops.constant(new int[0], dtype: dtype);

return tf.Context.ExecuteOp("DenseBincount", name,
new ExecuteOpArgs(arr, output_size, weights, binary_output)
.SetAttributes(new { binary_output }));
}
throw new NotImplementedException("");
});



+ 4
- 0
src/TensorFlowNET.Core/Tensors/constant_op.cs View File

@@ -153,6 +153,10 @@ namespace Tensorflow
bool allow_broadcast)
{
var t = convert_to_eager_tensor(value, tf.Context, dtype: dtype);
if (dtype != TF_DataType.DtInvalid && dtype != t.dtype)
{
t = math_ops.cast(t, dtype);
}
if (shape is null || shape.IsNull)
return t;



+ 10
- 0
src/TensorFlowNET.Keras/Layers/LayersApi.cs View File

@@ -4,6 +4,7 @@ using Tensorflow.Keras.ArgsDefinition.Core;
using Tensorflow.Keras.ArgsDefinition.Rnn;
using Tensorflow.Keras.Engine;
using Tensorflow.Keras.Layers.Rnn;
using Tensorflow.NumPy;
using static Tensorflow.Binding;
using static Tensorflow.KerasApi;

@@ -829,5 +830,14 @@ namespace Tensorflow.Keras.Layers
"orthogonal" => tf.orthogonal_initializer,
_ => tf.glorot_uniform_initializer
};

public ILayer CategoryEncoding(int num_tokens, string output_mode = "one_hot", bool sparse = false, NDArray count_weights = null)
=> new CategoryEncoding(new CategoryEncodingArgs
{
NumTokens = num_tokens,
OutputMode = output_mode,
Sparse = sparse,
CountWeights = count_weights
});
}
}

+ 75
- 0
src/TensorFlowNET.Keras/Layers/Preprocessing/CategoryEncoding.cs View File

@@ -0,0 +1,75 @@
using Tensorflow.Keras.ArgsDefinition;
using Tensorflow.Keras.Engine;

namespace Tensorflow.Keras.Layers
{
/// <summary>
/// This layer provides options for condensing data into a categorical encoding when the total number of tokens are known in advance.
/// </summary>
public class CategoryEncoding : Layer
{
CategoryEncodingArgs args;

public CategoryEncoding(CategoryEncodingArgs args) : base(args)
{
this.args = args;
}

protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null)
{
var depth = args.NumTokens;
var max_value = tf.reduce_max(inputs);
var min_value = tf.reduce_min(inputs);

/*var condition = tf.logical_and(tf.greater(tf.cast(constant_op.constant(depth), max_value.dtype), max_value),
tf.greater_equal(min_value, tf.cast(constant_op.constant(0), min_value.dtype)));*/

var bincounts = encode_categorical_inputs(inputs, args.OutputMode, depth, args.DType,
sparse: args.Sparse,
count_weights: args.CountWeights);

if(args.OutputMode != "tf_idf")
{
return bincounts;
}

return inputs;
}

public override Shape ComputeOutputShape(Shape input_shape)
{
return input_shape;
}

Tensors encode_categorical_inputs(Tensor inputs, string output_mode, int depth,
TF_DataType dtype = TF_DataType.TF_FLOAT,
bool sparse = false,
Tensor count_weights = null)
{
bool binary_output = false;
if (output_mode == "one_hot")
{
binary_output = true;
if (inputs.shape[-1] != 1)
{
inputs = tf.expand_dims(inputs, -1);
}
}
else if (output_mode == "multi_hot")
{
binary_output = true;
}

var depth_tensor = constant_op.constant(depth);
var result = tf.math.bincount(inputs,
weights: count_weights,
minlength: depth_tensor,
maxlength: depth_tensor,
dtype: dtype,
axis: -1,
binary_output: binary_output);

return result;
}
}
}

+ 55
- 0
test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs View File

@@ -177,5 +177,60 @@ namespace TensorFlowNET.Keras.UnitTest
Assert.AreEqual((5, 2), output.shape);
Assert.IsTrue(output[0].numpy().Equals(new[] { -0.99998f, 0.99998f }));
}

/// <summary>
/// https://www.tensorflow.org/api_docs/python/tf/keras/layers/CategoryEncoding
/// </summary>
[TestMethod]
public void CategoryEncoding()
{
// one-hot
var inputs = np.array(new[] { 3, 2, 0, 1 });
var layer = tf.keras.layers.CategoryEncoding(4);
Tensor output = layer.Apply(inputs);
Assert.AreEqual((4, 4), output.shape);
Assert.IsTrue(output[0].numpy().Equals(new[] { 0, 0, 0, 1f }));
Assert.IsTrue(output[1].numpy().Equals(new[] { 0, 0, 1, 0f }));
Assert.IsTrue(output[2].numpy().Equals(new[] { 1, 0, 0, 0f }));
Assert.IsTrue(output[3].numpy().Equals(new[] { 0, 1, 0, 0f }));

// multi-hot
inputs = np.array(new[,]
{
{ 0, 1 },
{ 0, 0 },
{ 1, 2 },
{ 3, 1 }
});
layer = tf.keras.layers.CategoryEncoding(4, output_mode: "multi_hot");
output = layer.Apply(inputs);
Assert.IsTrue(output[0].numpy().Equals(new[] { 1, 1, 0, 0f }));
Assert.IsTrue(output[1].numpy().Equals(new[] { 1, 0, 0, 0f }));
Assert.IsTrue(output[2].numpy().Equals(new[] { 0, 1, 1, 0f }));
Assert.IsTrue(output[3].numpy().Equals(new[] { 0, 1, 0, 1f }));

// using weighted inputs in "count" mode
inputs = np.array(new[,]
{
{ 0, 1 },
{ 0, 0 },
{ 1, 2 },
{ 3, 1 }
});
var weights = np.array(new[,]
{
{ 0.1f, 0.2f },
{ 0.1f, 0.1f },
{ 0.2f, 0.3f },
{ 0.4f, 0.2f }
});
layer = tf.keras.layers.CategoryEncoding(4, output_mode: "count", count_weights: weights);
output = layer.Apply(inputs);
Assert.IsTrue(output[0].numpy().Equals(new[] { 0.1f, 0.2f, 0f, 0f }));
Assert.IsTrue(output[1].numpy().Equals(new[] { 0.2f, 0f, 0f, 0f }));
Assert.IsTrue(output[2].numpy().Equals(new[] { 0f, 0.2f, 0.3f, 0f }));
Assert.IsTrue(output[3].numpy().Equals(new[] { 0f, 0.2f, 0f, 0.4f }));
}
}
}

+ 2
- 1
test/TensorFlowNET.Keras.UnitTest/Losses/LossesTest.cs View File

@@ -4,11 +4,12 @@ using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Tensorflow;
using TensorFlowNET.Keras.UnitTest;
using static Tensorflow.Binding;
using static Tensorflow.KerasApi;

namespace Tensorflow.Keras.UnitTest.Losses;
namespace TensorFlowNET.Keras.UnitTest;

[TestClass]
public class LossesTest : EagerModeTestBase


Loading…
Cancel
Save