Browse Source

Mul gradient is not correct in TensorFlowOpLayer #698

tags/keras_v0.3.0
Oceania2018 4 years ago
parent
commit
14c26e7e07
10 changed files with 53 additions and 39 deletions
  1. +2
    -1
      src/TensorFlowNET.Core/Eager/EagerRunner.TFE_FastPathExecute.cs
  2. +10
    -14
      src/TensorFlowNET.Core/Functions/TapeGradientFunctions.cs
  3. +5
    -2
      src/TensorFlowNET.Core/Gradients/array_grad.cs
  4. +6
    -6
      src/TensorFlowNET.Core/Gradients/math_grad.cs
  5. +9
    -0
      src/TensorFlowNET.Core/Operations/array_ops.cs
  6. +1
    -1
      src/TensorFlowNET.Core/Operations/gen_array_ops.cs
  7. +13
    -13
      src/TensorFlowNET.Core/Operations/gen_math_ops.cs
  8. +4
    -0
      src/TensorFlowNET.Core/Tensors/TensorShape.Equals.cs
  9. +1
    -1
      src/TensorFlowNET.Keras/BackendImpl.cs
  10. +2
    -1
      src/TensorFlowNET.Keras/Layers/Reshaping/Reshape.cs

+ 2
- 1
src/TensorFlowNET.Core/Eager/EagerRunner.TFE_FastPathExecute.cs View File

@@ -380,7 +380,8 @@ namespace Tensorflow.Eager
c_api.TFE_OpSetAttrBool(op, key, Convert.ToBoolean(value));
break;
case TF_AttrType.TF_ATTR_INT:
c_api.TFE_OpSetAttrInt(op, key, Convert.ToInt64(value));
attr_list_sizes[key] = Convert.ToInt64(value);
c_api.TFE_OpSetAttrInt(op, key, attr_list_sizes[key]);
break;
case TF_AttrType.TF_ATTR_FLOAT:
c_api.TFE_OpSetAttrFloat(op, key, Convert.ToSingle(value));


+ 10
- 14
src/TensorFlowNET.Core/Functions/TapeGradientFunctions.cs View File

@@ -44,7 +44,7 @@ namespace Tensorflow.Functions
public void Record(Tensors flat_outputs, Tensors inference_args)
{
var (backward_function, to_record) = _wrap_backward_function(_forward_graph, _backward, flat_outputs);
tf.Runner.RecordGradient(_forward.Name, flat_outputs, new object[0], inference_args,
tf.Runner.RecordGradient(_forward.Name, inference_args, new object[0], to_record,
getBackwardFunction: () => backward_function);
}

@@ -52,20 +52,16 @@ namespace Tensorflow.Functions
{
BackwardFunction _backward_function_wrapper = (output_grads, unneeded_gradients) =>
{
return new Tensor[0];
/*var gradients = ops.gradientFunctions[op_name](new EagerOperation
var processed_args = new List<Tensor>();
var input_index = 0;
foreach (var (output_index, arg) in enumerate(output_grads))
{
Name = op_name,
NumInputs = op_inputs.Length,
Inputs = op_inputs,
NumOutputs = op_outputs.Length,
Outputs = op_outputs,
SkipInputIndices = unneeded_gradients,
Attrs = attrs
}, output_grads);

return gradients;*/
if (arg is null)
throw new NotImplementedException("");
processed_args.add(arg);
input_index += 1;
}
return output_grads;// backward.Invoke(processed_args.ToArray());
};

return (_backward_function_wrapper, flat_outputs);


+ 5
- 2
src/TensorFlowNET.Core/Gradients/array_grad.cs View File

@@ -85,10 +85,13 @@ namespace Tensorflow.Gradients
var out_grads = new List<Tensor>();
if(concat_dim is EagerTensor)
{
var non_neg_concat_dim = (int)concat_dim % input_values[0].rank;
var dim_int = (int)concat_dim;
var non_neg_concat_dim = dim_int < 0
? input_values[0].rank + dim_int
: dim_int % input_values[0].rank;
var sizes = input_values.Select(x => x.shape[non_neg_concat_dim]).ToArray();
var sizes_tensor = constant_op.constant(sizes);
out_grads = gen_array_ops.split_v(grad, sizes_tensor, sizes[0], non_neg_concat_dim).ToList();
out_grads = array_ops.split(grad, sizes_tensor, non_neg_concat_dim).ToList();
}
else if (constant_op.is_constant(concat_dim))
{


+ 6
- 6
src/TensorFlowNET.Core/Gradients/math_grad.cs View File

@@ -212,7 +212,7 @@ namespace Tensorflow.Gradients
};
}

var broads = SmartBroadcastGradientArgs(x, y);
var broads = SmartBroadcastGradientArgs(x, y, grad);
var (sx, rx, must_reduce_x) = broads[0];
var (sy, ry, must_reduce_y) = broads[1];

@@ -468,7 +468,7 @@ namespace Tensorflow.Gradients
_ShapesFullySpecifiedAndEqual(x, y, grad))
return new Tensor[] { grad, -grad };

var broads = SmartBroadcastGradientArgs(x, y);
var broads = SmartBroadcastGradientArgs(x, y, grad);
var (sx, rx, must_reduce_x) = broads[0];
var (sy, ry, must_reduce_y) = broads[1];

@@ -718,7 +718,7 @@ namespace Tensorflow.Gradients

var z = op.outputs[0];

var broads = SmartBroadcastGradientArgs(x, y);
var broads = SmartBroadcastGradientArgs(x, y, grad);
var (sx, rx, must_reduce_x) = broads[0];
var (sy, ry, must_reduce_y) = broads[1];

@@ -753,7 +753,7 @@ namespace Tensorflow.Gradients
/// <param name="x"></param>
/// <param name="y"></param>
/// <returns></returns>
private static (Tensor, Tensor, bool)[] SmartBroadcastGradientArgs(Tensor x, Tensor y)
private static (Tensor, Tensor, bool)[] SmartBroadcastGradientArgs(Tensor x, Tensor y, Tensor grad)
{
Tensor sx, sy;
if (x.TensorShape.is_fully_defined() &&
@@ -771,8 +771,8 @@ namespace Tensorflow.Gradients
var (rx, ry) = gen_array_ops.broadcast_gradient_args(sx, sy);
return new[]
{
(sx, rx, true),
(sy, ry, true)
(sx, rx, !x.TensorShape.Equals(grad.TensorShape)),
(sy, ry, !y.TensorShape.Equals(grad.TensorShape))
};
}
}


+ 9
- 0
src/TensorFlowNET.Core/Operations/array_ops.cs View File

@@ -885,6 +885,15 @@ namespace Tensorflow
});
}

public static Tensor[] split(Tensor value, Tensor size_splits, int axis, int num = -1,
string name = "split")
{
if (num == -1)
num = size_splits.shape[0];

return gen_array_ops.split_v(value, size_splits, axis, num, name: name);
}

public static Tensor[] split<T>(Tensor value, int num_split, T axis,
string name = "split")
{


+ 1
- 1
src/TensorFlowNET.Core/Operations/gen_array_ops.cs View File

@@ -527,7 +527,7 @@ namespace Tensorflow
var results = tf.Runner.TFE_FastPathExecute(tf.Context, tf.Context.DeviceName,
"SplitV", name,
null,
value, size_splits, axis,
value, size_splits, axis,
"num_split", num_split);

return results;


+ 13
- 13
src/TensorFlowNET.Core/Operations/gen_math_ops.cs View File

@@ -346,21 +346,21 @@ namespace Tensorflow
/// <c>dy</c> is the corresponding input gradient.
/// </remarks>
public static Tensor sigmoid_grad(Tensor y, Tensor dy, string name = "SigmoidGrad")
{
if (tf.executing_eagerly())
{
var results = tf.Runner.TFE_FastPathExecute(tf.Context, tf.Context.DeviceName,
=> tf.Context.RunInAutoMode2(
() => tf.OpDefLib._apply_op_helper("SigmoidGrad", name, new { y, dy }).output,
() => tf.Runner.TFE_FastPathExecute(tf.Context, tf.Context.DeviceName,
"SigmoidGrad", name,
null,
y, dy);

return results[0];
}

var op = tf.OpDefLib._apply_op_helper("SigmoidGrad", name: name, args: new { y, dy });
return op.output;
}
y, dy).FirstOrDefault(),
(op) =>
{
var attrs = new object[]
{
"T", op.get_attr<TF_DataType>("T")
};
tf.Runner.RecordGradient("SigmoidGrad", op.inputs, attrs, op.outputs);
},
new Tensors(y, dy));

public static Tensor sign<T>(T x, string name = "Sign")
{


+ 4
- 0
src/TensorFlowNET.Core/Tensors/TensorShape.Equals.cs View File

@@ -10,6 +10,10 @@ namespace Tensorflow
switch (obj)
{
case TensorShape shape1:
if (rank == -1 && shape1.rank == -1)
return false;
else if (rank != shape1.rank)
return false;
return Enumerable.SequenceEqual(shape1.dims, dims);
default:
return false;


+ 1
- 1
src/TensorFlowNET.Keras/BackendImpl.cs View File

@@ -239,7 +239,7 @@ namespace Tensorflow.Keras
{
var rank = tensors[0].NDims;
if (rank > -1)
axis %= rank;
axis += rank;
else
axis = 0;
}


+ 2
- 1
src/TensorFlowNET.Keras/Layers/Reshaping/Reshape.cs View File

@@ -21,7 +21,8 @@ namespace Tensorflow.Keras.Layers

protected override Tensors Call(Tensors inputs, Tensor state = null, bool is_training = false)
{
var shape = new List<int> { inputs.shape[0] };
var shape_tensor = array_ops.shape(inputs);
var shape = new List<int> { shape_tensor.shape[0] };
shape.AddRange(args.TargetShape.dims);

var result = array_ops.reshape(inputs, shape.ToArray());


Loading…
Cancel
Save