diff --git a/src/TensorFlowNET.Core/APIs/c_api.cs b/src/TensorFlowNET.Core/APIs/c_api.cs index 56672173..bdf2785f 100644 --- a/src/TensorFlowNET.Core/APIs/c_api.cs +++ b/src/TensorFlowNET.Core/APIs/c_api.cs @@ -43,7 +43,7 @@ namespace Tensorflow /// public partial class c_api { - public const string TensorFlowLibName = "tensorflow"; + public const string TensorFlowLibName = @"D:\SciSharp\tensorflow-google\bazel-bin\tensorflow\tensorflow.dll"; public static string StringPiece(IntPtr handle) { diff --git a/src/TensorFlowNET.Core/Eager/EagerOperation.cs b/src/TensorFlowNET.Core/Eager/EagerOperation.cs index 05735f02..dfc5df78 100644 --- a/src/TensorFlowNET.Core/Eager/EagerOperation.cs +++ b/src/TensorFlowNET.Core/Eager/EagerOperation.cs @@ -8,6 +8,8 @@ namespace Tensorflow.Eager { public int NumInputs; public Tensor[] Inputs { get; set; } + public int NumOutputs; + public Tensor[] Outputs { get; set; } public int[] SkipInputIndices { get; set; } public EagerOperation() : base(IntPtr.Zero) { } @@ -31,5 +33,18 @@ namespace Tensorflow.Eager return _inputs_val; } } + + public override Tensor[] outputs + { + get + { + if (_outputs == null) + { + _outputs = Outputs; + } + + return _outputs; + } + } } } diff --git a/src/TensorFlowNET.Core/Eager/c_api.eager.cs b/src/TensorFlowNET.Core/Eager/c_api.eager.cs index 8c1e3c34..148790c0 100644 --- a/src/TensorFlowNET.Core/Eager/c_api.eager.cs +++ b/src/TensorFlowNET.Core/Eager/c_api.eager.cs @@ -11,14 +11,12 @@ namespace Tensorflow public static extern void TFE_RegisterGradientFunction(_gradient_function_callback callbackPointer); [UnmanagedFunctionPointer(CallingConvention.StdCall)] - public delegate IntPtr _gradient_function_callback(string op_name, - int num_inputs, - IntPtr op_inputs, - int num_attrs, - int num_outputs, - IntPtr output_grads, - int num_skip_inputs, - IntPtr skip_input_indices); + public delegate IntPtr _gradient_function_callback(string op_name, + BindingArray op_inputs, + BindingArray op_outputs, + int num_attrs, + BindingArray output_grads, + BindingArray skip_input_indices); [DllImport(TensorFlowLibName)] public static extern IntPtr TFE_WrapGradientResult(IntPtr[] gradients, int num_gradients); diff --git a/src/TensorFlowNET.Core/Gradients/math_grad.cs b/src/TensorFlowNET.Core/Gradients/math_grad.cs index 47a0a3f0..363a25b6 100644 --- a/src/TensorFlowNET.Core/Gradients/math_grad.cs +++ b/src/TensorFlowNET.Core/Gradients/math_grad.cs @@ -310,11 +310,23 @@ namespace Tensorflow.Gradients var input_shape = op.inputs[0]._shape_tuple(); var output_shape = op.outputs[0]._shape_tuple(); - var input_shape_tensor = array_ops.shape(op.inputs[0]); - var output_shape_tensor = array_ops.shape(op.outputs[0]); - var factor = _safe_shape_div(math_ops.reduce_prod(input_shape_tensor), math_ops.reduce_prod(output_shape_tensor)); + if(input_shape != null && + output_shape != null) + { + var input_size = np.prod(input_shape); + var output_size = np.prod(output_shape); + var factor = (int)input_size / Math.Max((int)output_size, 1); + var factor_tensor = constant_op.constant((int)input_size, dtype: sum_grad.dtype); + return new Tensor[] { math_ops.truediv(sum_grad, math_ops.cast(factor_tensor, sum_grad.dtype)), null }; + } + else + { + var input_shape_tensor = array_ops.shape(op.inputs[0]); + var output_shape_tensor = array_ops.shape(op.outputs[0]); + var factor = _safe_shape_div(math_ops.reduce_prod(input_shape_tensor), math_ops.reduce_prod(output_shape_tensor)); - return new Tensor[] { math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), null }; + return new Tensor[] { math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), null }; + } } /// diff --git a/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs b/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs index 32016d37..1beae7cd 100644 --- a/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs +++ b/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs @@ -43,7 +43,7 @@ namespace Tensorflow.Keras.Optimizers _aggregate_gradients(grads_and_vars); - return control_flow_ops.no_op(); + return null; }); } diff --git a/src/TensorFlowNET.Core/Operations/Operation.Output.cs b/src/TensorFlowNET.Core/Operations/Operation.Output.cs index 18393e2f..b283d988 100644 --- a/src/TensorFlowNET.Core/Operations/Operation.Output.cs +++ b/src/TensorFlowNET.Core/Operations/Operation.Output.cs @@ -38,8 +38,8 @@ namespace Tensorflow return num; } - private Tensor[] _outputs; - public Tensor[] outputs => _outputs; + protected Tensor[] _outputs; + public virtual Tensor[] outputs => _outputs; public Tensor output => _outputs.FirstOrDefault(); public int NumControlOutputs => c_api.TF_OperationNumControlOutputs(_handle); diff --git a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs index 70428cfe..9d2f556c 100644 --- a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs @@ -508,6 +508,19 @@ namespace Tensorflow public static Tensor less(Tx x, Ty y, string name = null) { + if (tf.context.executing_eagerly()) + { + using var status = new Status(); + EagerTensorHandle tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name, + "Less", name, new IntPtr[] + { + x as EagerTensor, + y as EagerTensor + }, 2, null, status); + status.Check(true); + return tensor; + } + var _op = _op_def_lib._apply_op_helper("Less", name: name, args: new { x, y }); return _op.outputs[0]; diff --git a/src/TensorFlowNET.Core/tensorflow.cs b/src/TensorFlowNET.Core/tensorflow.cs index 4f3b95fb..732ab264 100644 --- a/src/TensorFlowNET.Core/tensorflow.cs +++ b/src/TensorFlowNET.Core/tensorflow.cs @@ -62,24 +62,30 @@ namespace Tensorflow }); ops.RegisterFromAssembly(); - c_api.TFE_RegisterGradientFunction((op_name, num_inputs, op_inputs, num_attrs, num_outputs, output_grads, num_skip_inputs, skip_input_indices) => + c_api.TFE_RegisterGradientFunction((op_name, op_inputs, op_outputs, num_attrs, output_grads, skip_input_indices) => { - var input_tensors = new EagerTensor[num_inputs]; - for (int i = 0; i < num_inputs; i++) - input_tensors[i] = new EagerTensor(*((IntPtr*)op_inputs + i)); + var input_tensors = new EagerTensor[op_inputs.length]; + for (int i = 0; i < op_inputs.length; i++) + input_tensors[i] = new EagerTensor(*((IntPtr*)op_inputs.array + i)); - var output_grad_tensors = new EagerTensor[num_outputs]; - for (int i = 0; i < num_outputs; i++) - output_grad_tensors[i] = new EagerTensor(*((IntPtr*)output_grads + i)); + var output_tensors = new EagerTensor[op_outputs.length]; + for (int i = 0; i < op_outputs.length; i++) + if (op_outputs.array != IntPtr.Zero) + output_tensors[i] = new EagerTensor(*((IntPtr*)op_outputs.array + i)); - var skip_input_indices_param = new int[num_skip_inputs]; - for (int i = 0; i < num_skip_inputs; i++) - skip_input_indices_param[i] = *((int*)skip_input_indices + i); + var output_grad_tensors = new EagerTensor[output_grads.length]; + for (int i = 0; i < output_grads.length; i++) + output_grad_tensors[i] = new EagerTensor(*((IntPtr*)output_grads.array + i)); + + var skip_input_indices_param = new int[skip_input_indices.length]; + for (int i = 0; i < skip_input_indices.length; i++) + skip_input_indices_param[i] = *((int*)skip_input_indices.array + i); var gradients = ops.gradientFunctions[op_name](new EagerOperation { - NumInputs = num_inputs, + NumInputs = input_tensors.Length, Inputs = input_tensors, + Outputs = output_tensors, SkipInputIndices = skip_input_indices_param }, output_grad_tensors); diff --git a/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs b/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs index e6e96df3..d18b993b 100644 --- a/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs +++ b/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs @@ -38,10 +38,11 @@ namespace TensorFlowNET.UnitTest.Training var noise = tf.random.normal(shape: NUM_EXAMPLES); var outputs = inputs * TRUE_W + TRUE_b + noise; - print($"Current loss: {loss(model(inputs), outputs).numpy()}"); + Tensor init_loss = loss(model(inputs), outputs); + // print($"Current loss: {init_loss.numpy()}"); // Define a training loop - Action train = (inputs, outputs, learning_rate) + Func train = (inputs, outputs, learning_rate) => { using var t = tf.GradientTape(); @@ -49,13 +50,17 @@ namespace TensorFlowNET.UnitTest.Training var (dW, db) = t.gradient(current_loss, (W, b)); W.assign_sub(learning_rate * dW); b.assign_sub(learning_rate * db); + return current_loss; }; var epochs = range(10); foreach(var epoch in epochs) { - train(inputs, outputs, 0.1f); - print($"Epoch %2d: W=%1.2f b=%1.2f, loss=%2.5f"); + var current_loss = train(inputs, outputs, 0.1f); + print($"Epoch {epoch}: W={(float)W.numpy()} b={(float)b.numpy()}, loss={(float)current_loss.numpy()}"); + + if (epoch > 0) // skip first epoch + Assert.IsTrue((bool)(current_loss < init_loss)); } } }