From 8c2c18a216b89cb73d9daaeb67914d84c66f7a1c Mon Sep 17 00:00:00 2001 From: haiping008 Date: Fri, 1 Feb 2019 17:36:40 -0600 Subject: [PATCH] GradientTest.Gradients --- src/TensorFlowNET.Core/APIs/tf.gradients.cs | 2 +- .../Gradients/gradients_impl.py.cs | 72 +++++++++++++++---- .../Operations/InputList.cs | 1 + .../Operations/Operation.cs | 2 + .../Operations/control_flow_util.py.cs | 19 +++++ .../Operations/gen_math_ops.cs | 8 +-- src/TensorFlowNET.Core/Python.cs | 4 +- .../Tensors/Tensor.Operators.cs | 22 ++++-- test/TensorFlowNET.UnitTest/GradientTest.cs | 3 +- 9 files changed, 106 insertions(+), 27 deletions(-) create mode 100644 src/TensorFlowNET.Core/Operations/control_flow_util.py.cs diff --git a/src/TensorFlowNET.Core/APIs/tf.gradients.cs b/src/TensorFlowNET.Core/APIs/tf.gradients.cs index 115b7fef..77491d55 100644 --- a/src/TensorFlowNET.Core/APIs/tf.gradients.cs +++ b/src/TensorFlowNET.Core/APIs/tf.gradients.cs @@ -24,7 +24,7 @@ namespace Tensorflow stop_gradients: stop_gradients); } - public static object gradients(Tensor ys, + public static Tensor[] gradients(Tensor ys, Tensor[] xs, Tensor[] grad_ys = null, string name = "gradients", diff --git a/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs b/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs index aec61d1c..4c08de1d 100644 --- a/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs +++ b/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs @@ -9,7 +9,7 @@ namespace Tensorflow { public class gradients_impl { - public static void gradients(Tensor[] ys, + public static Tensor[] gradients(Tensor[] ys, Tensor[] xs, Tensor[] grad_ys = null, string name = "gradients", @@ -17,7 +17,7 @@ namespace Tensorflow bool gate_gradients = false, int? aggregation_method = null) { - _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients); + return _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients); } public static Tensor[] _GradientsHelper(Tensor[] ys, @@ -91,7 +91,9 @@ namespace Tensorflow { // 'ready' handles the case where one output gradient relies on // another output's gradient. - bool ready = !pending_count.ContainsKey(op.Name) || pending_count[op.Name] == 0; + if (!pending_count.ContainsKey(op.Name)) + pending_count[op.Name] = 0; + bool ready = pending_count[op.Name] == 0; if(ready && !to_ops_set.Contains(op) && reachable_to_ops.Contains(op)) { to_ops_set.Add(op); @@ -136,8 +138,12 @@ namespace Tensorflow } } + else + { + in_grads = _NonEagerInputs(op, xs).Select(x => new Tensor(IntPtr.Zero)).ToArray(); + } - var inputs = (List)_NonEagerInputs(op, xs); + var inputs = _NonEagerInputs(op, xs).ToList(); foreach (var (t_in, in_grad) in Python.zip(inputs, in_grads)) { if(in_grad != null) @@ -155,6 +161,15 @@ namespace Tensorflow return xs.Select(x => _GetGrad(grads, x)).ToArray(); } + /// + /// Update pending count for the inputs of op and enqueue ready ops. + /// + /// + /// + /// + /// + /// + /// private static void _UpdatePendingAndEnqueueReady(Dictionary grads, Operation op, Queue queue, @@ -162,7 +177,28 @@ namespace Tensorflow object loop_state, Tensor[] xs) { + foreach(var x in _NonEagerInputs(op, xs)) + { + pending_count[x.op.Name] -= 1; + var ready = pending_count[x.op.Name] == 0; + if(loop_state != null && !ready) + { + + } + + if (ready) + { + if (control_flow_util.IsLoopExit(x.op)) + { + + } + else + { + queue.Enqueue(x.op); + } + } + } } private static void _VerifyGeneratedGradients(Tensor[] grads, Operation op) @@ -227,7 +263,10 @@ namespace Tensorflow bool is_stop_op = true; foreach(var inp in _NonEagerInputs(op, xs)) { - if(pending_count.ContainsKey(op.Name) && pending_count[op.Name] > 0) + if (!pending_count.ContainsKey(inp.op.Name)) + pending_count[inp.op.Name] = 0; + + if (pending_count[inp.op.Name] > 0) { is_stop_op = false; break; @@ -267,14 +306,14 @@ namespace Tensorflow private static void _SetGrad(Dictionary grads, Tensor t, Tensor grad) { var op = t.op; - Tensor[][] op_grads = null; - if (!grads.ContainsKey(op.Name)) + Tensor[][] op_grads = grads.ContainsKey(op.Name) ? grads[op.Name] : null; + if (op_grads == null) { op_grads = op.outputs.Select(x => new Tensor[1]).ToArray(); grads[op.Name] = op_grads; } var t_grads = op_grads[t.value_index]; - // t_grads[0] = grad; + t_grads[0] = grad; } /// @@ -348,7 +387,7 @@ namespace Tensorflow // Clear the boolean so we won't add the inputs again. reached_ops.Remove(op); foreach (var inp in _NonEagerInputs(op, xs)) - queue.Enqueue((inp as Tensor).op); + queue.Enqueue(inp.op); } } // X in between_ops iff X is on a path of zero or more backpropagatable tensors @@ -363,19 +402,22 @@ namespace Tensorflow foreach(Tensor x in _NonEagerInputs(op, xs)) { if (between_ops.Contains(x.op)) - if (pending_count.ContainsKey(x.op.Name)) - pending_count[x.op.Name] += 1; - else - pending_count[x.op.Name] = 1; + { + if (!pending_count.ContainsKey(x.op.Name)) + pending_count[x.op.Name] = 0; + + pending_count[x.op.Name] += 1; + } } } return (reachable_to_ops.ToArray(), pending_count, loop_state); } - private static InputList _NonEagerInputs(Operation op, Tensor[] xs) + private static IEnumerable _NonEagerInputs(Operation op, Tensor[] xs) { - return op.inputs; + for (int i = 0; i < op.inputs.Length; i++) + yield return op.inputs[i]; } /// diff --git a/src/TensorFlowNET.Core/Operations/InputList.cs b/src/TensorFlowNET.Core/Operations/InputList.cs index 2c2fb28f..4f387120 100644 --- a/src/TensorFlowNET.Core/Operations/InputList.cs +++ b/src/TensorFlowNET.Core/Operations/InputList.cs @@ -9,6 +9,7 @@ namespace Tensorflow public class InputList : IEnumerable { public Tensor[] _inputs; + public int Length => _inputs.Length; public Tensor this[int index] => _inputs[index]; public InputList(Tensor[] inputs) diff --git a/src/TensorFlowNET.Core/Operations/Operation.cs b/src/TensorFlowNET.Core/Operations/Operation.cs index bda7fbfe..b0195077 100644 --- a/src/TensorFlowNET.Core/Operations/Operation.cs +++ b/src/TensorFlowNET.Core/Operations/Operation.cs @@ -14,6 +14,8 @@ namespace Tensorflow public int _id => _id_value; private int _id_value; + public string type => OpType; + private Status status = new Status(); public string Name => c_api.StringPiece(c_api.TF_OperationName(_handle)); diff --git a/src/TensorFlowNET.Core/Operations/control_flow_util.py.cs b/src/TensorFlowNET.Core/Operations/control_flow_util.py.cs new file mode 100644 index 00000000..4654261e --- /dev/null +++ b/src/TensorFlowNET.Core/Operations/control_flow_util.py.cs @@ -0,0 +1,19 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow +{ + public class control_flow_util + { + /// + /// Return true if `op` is an Exit. + /// + /// + /// + public static bool IsLoopExit(Operation op) + { + return op.type == "Exit" || op.type == "RefExit"; + } + } +} diff --git a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs index c8e19d91..2e38b798 100644 --- a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs @@ -10,9 +10,9 @@ namespace Tensorflow { public static OpDefLibrary _op_def_lib = new OpDefLibrary(); - public static Tensor add(Tensor x, Tensor y) + public static Tensor add(Tensor x, Tensor y, string name = "") { - var _op = _op_def_lib._apply_op_helper("Add", args: new { x, y }); + var _op = _op_def_lib._apply_op_helper("Add", name, args: new { x, y }); return _op.outputs[0]; } @@ -24,9 +24,9 @@ namespace Tensorflow return _op.outputs[0]; } - public static Tensor mul(Tensor x, Tensor y) + public static Tensor mul(Tensor x, Tensor y, string name = "") { - var _op = _op_def_lib._apply_op_helper("Mul", args: new { x, y }); + var _op = _op_def_lib._apply_op_helper("Mul", name, args: new { x, y }); return _op.outputs[0]; } diff --git a/src/TensorFlowNET.Core/Python.cs b/src/TensorFlowNET.Core/Python.cs index 54dd4b10..0ce36b00 100644 --- a/src/TensorFlowNET.Core/Python.cs +++ b/src/TensorFlowNET.Core/Python.cs @@ -61,8 +61,8 @@ namespace Tensorflow public static IEnumerable<(T, T)> zip(IList t1, IList t2) { - int index = 0; - yield return (t1[index], t2[index]); + for (int i = 0; i < t1.Count; i++) + yield return (t1[i], t2[i]); } } diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.Operators.cs b/src/TensorFlowNET.Core/Tensors/Tensor.Operators.cs index 1025cf50..e0e7e75e 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.Operators.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.Operators.cs @@ -6,9 +6,16 @@ namespace Tensorflow { public partial class Tensor { - public static Tensor operator +(Tensor t1, Tensor t2) + public static Tensor operator +(Tensor x, Tensor y) { - return gen_math_ops.add(t1, t2); + Tensor t = null; + + Python.with(new ops.name_scope("", "add", new Tensor[] { x, y }), scope => + { + t = gen_math_ops.add(x, y, scope); + }); + + return t; } public static Tensor operator -(Tensor t1, Tensor t2) @@ -16,9 +23,16 @@ namespace Tensorflow return gen_math_ops.sub(t1, t2); } - public static Tensor operator *(Tensor t1, Tensor t2) + public static Tensor operator *(Tensor x, Tensor y) { - return gen_math_ops.mul(t1, t2); + Tensor t = null; + + Python.with(new ops.name_scope("", "mul", new Tensor[] { x, y }), scope => + { + t = gen_math_ops.mul(x, y, name: scope); + }); + + return t; } public static Tensor operator /(Tensor t1, Tensor t2) diff --git a/test/TensorFlowNET.UnitTest/GradientTest.cs b/test/TensorFlowNET.UnitTest/GradientTest.cs index 5fd1972d..317c8e01 100644 --- a/test/TensorFlowNET.UnitTest/GradientTest.cs +++ b/test/TensorFlowNET.UnitTest/GradientTest.cs @@ -14,7 +14,8 @@ namespace TensorFlowNET.UnitTest { var a = tf.constant(0.0); var b = 2.0 * a; - var g = tf.gradients(a + b, new Tensor[] { a, b }, stop_gradients: new Tensor[] { a, b }); + var c = a + b; + var g = tf.gradients(c, new Tensor[] { a, b }, stop_gradients: new Tensor[] { a, b }); } } }