From 8f56b57b203fe49ac209c35f0b976cca23829066 Mon Sep 17 00:00:00 2001 From: haiping008 Date: Tue, 8 Jan 2019 17:06:45 -0600 Subject: [PATCH] c_api.gradient --- .../Gradients/c_api.gradient.cs | 29 +++ src/TensorFlowNET.Core/Tensors/Tensor.cs | 37 ++-- .../Tensors/c_api.tensor.cs | 8 + .../CApiGradientsTest.cs | 173 +++++++++++++++++- test/TensorFlowNET.UnitTest/CApiTest.cs | 5 + test/TensorFlowNET.UnitTest/OperationsTest.cs | 4 +- 6 files changed, 236 insertions(+), 20 deletions(-) create mode 100644 src/TensorFlowNET.Core/Gradients/c_api.gradient.cs diff --git a/src/TensorFlowNET.Core/Gradients/c_api.gradient.cs b/src/TensorFlowNET.Core/Gradients/c_api.gradient.cs new file mode 100644 index 00000000..16a32ae1 --- /dev/null +++ b/src/TensorFlowNET.Core/Gradients/c_api.gradient.cs @@ -0,0 +1,29 @@ +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; + +namespace Tensorflow +{ + public static partial class c_api + { + /// + /// Adds operations to compute the partial derivatives of sum of `y`s w.r.t `x`s, + /// i.e., d(y_1 + y_2 + ...)/dx_1, d(y_1 + y_2 + ...)/dx_2... + /// This is a variant of TF_AddGradients that allows to caller to pass a custom + /// name prefix to the operations added to a graph to compute the gradients. + /// + /// TF_Graph* + /// const char* + /// TF_Output* + /// int + /// TF_Output* + /// int + /// TF_Output* + /// TF_Status* + /// TF_Output* + [DllImport(TensorFlowLibName)] + public static extern void TF_AddGradientsWithPrefix(IntPtr g, string prefix, TF_Output[] y, int ny, + TF_Output[] x, int nx, TF_Output[] dx, IntPtr status, TF_Output[] dy); + } +} diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.cs b/src/TensorFlowNET.Core/Tensors/Tensor.cs index d4bc910c..9b8c1727 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; using System.Text; +using static Tensorflow.c_api; namespace Tensorflow { @@ -94,9 +95,15 @@ namespace Tensorflow private IntPtr Allocate(NDArray nd) { - var dotHandle = Marshal.AllocHGlobal(nd.dtypesize * nd.size); - ulong size = (ulong)(nd.size * nd.dtypesize); + IntPtr dotHandle = IntPtr.Zero; + ulong size = 0; + if (nd.dtype.Name != "String") + { + dotHandle = Marshal.AllocHGlobal(nd.dtypesize * nd.size); + size = (ulong)(nd.size * nd.dtypesize); + } + switch (nd.dtype.Name) { case "Int16": @@ -114,11 +121,8 @@ namespace Tensorflow case "String": var value = nd.Data()[0]; var bytes = Encoding.UTF8.GetBytes(value); - var buf = Marshal.AllocHGlobal(bytes.Length + 1); - Marshal.Copy(bytes, 0, buf, bytes.Length); - - //c_api.TF_SetAttrString(op, "value", buf, (uint)bytes.Length); - + dotHandle = Marshal.AllocHGlobal(bytes.Length + 1); + Marshal.Copy(bytes, 0, dotHandle, bytes.Length); size = (ulong)bytes.Length; break; default: @@ -126,18 +130,21 @@ namespace Tensorflow } var dataType = ToTFDataType(nd.dtype); - + // shape + var dims = nd.shape.Select(x => (long)x).ToArray(); + // Free the original buffer and set flag + Deallocator deallocator = (IntPtr values, IntPtr len, ref bool closure) => + { + Marshal.FreeHGlobal(dotHandle); + closure = true; + }; + var tfHandle = c_api.TF_NewTensor(dataType, - nd.shape.Select(x => (long)x).ToArray(), // shape + dims, nd.ndim, dotHandle, size, - (IntPtr values, IntPtr len, ref bool closure) => - { - // Free the original buffer and set flag - Marshal.FreeHGlobal(dotHandle); - closure = true; - }, + deallocator, ref deallocator_called); return tfHandle; diff --git a/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs b/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs index 872bd537..62ed55a9 100644 --- a/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs +++ b/src/TensorFlowNET.Core/Tensors/c_api.tensor.cs @@ -7,6 +7,14 @@ namespace Tensorflow { public static partial class c_api { + /// + /// Allocate and return a new Tensor. + /// + /// TF_DataType + /// const int64_t* + /// int + /// size_t + /// [DllImport(TensorFlowLibName)] public static extern IntPtr TF_AllocateTensor(TF_DataType dtype, long[] dims, int num_dims, ulong len); diff --git a/test/TensorFlowNET.UnitTest/CApiGradientsTest.cs b/test/TensorFlowNET.UnitTest/CApiGradientsTest.cs index 5a7eeba3..4ab8e8c7 100644 --- a/test/TensorFlowNET.UnitTest/CApiGradientsTest.cs +++ b/test/TensorFlowNET.UnitTest/CApiGradientsTest.cs @@ -2,9 +2,11 @@ using NumSharp.Core; using System; using System.Collections.Generic; +using System.Linq; using System.Runtime.InteropServices; using System.Text; using Tensorflow; +using Buffer = Tensorflow.Buffer; namespace TensorFlowNET.UnitTest { @@ -27,6 +29,97 @@ namespace TensorFlowNET.UnitTest var expected_grad_outputs = new TF_Output[2]; BuildSuccessGraph(inputs, outputs); + BuildExpectedGraph(grad_inputs_provided, expected_grad_outputs); + + AddGradients(grad_inputs_provided, string.Empty, inputs, 2, outputs, 1, + grad_outputs); + // EXPECT_EQ(TF_OK, TF_GetCode(s_)); + + // Compare that the graphs match. + GraphDef expected_gdef; + GraphDef gdef; + EXPECT_TRUE(GetGraphDef(expected_graph_, out expected_gdef)); + EXPECT_TRUE(GetGraphDef(graph_, out gdef)); + //TF_EXPECT_GRAPH_EQ(expected_gdef, gdef); + + // Compare that the output of the gradients of both graphs match. + RunGraphsAndCompareOutputs(grad_outputs, expected_grad_outputs); + } + + private bool GetGraphDef(Graph graph, out GraphDef graph_def) + { + graph_def = null; + var s = new Status(); + var buffer = new Buffer(); + c_api.TF_GraphToGraphDef(graph, buffer, s); + bool ret = TF_GetCode(s) == TF_OK; + EXPECT_EQ(TF_OK, TF_GetCode(s)); + if (ret) graph_def = GraphDef.Parser.ParseFrom(buffer.Data); + buffer.Dispose(); + s.Dispose(); + return ret; + } + + private void RunGraphsAndCompareOutputs(TF_Output[] grad_outputs, TF_Output[] expected_grad_outputs) + { + var csession = new CSession(graph_, s_); + var expected_csession = new CSession(expected_graph_, s_); + + var grad_outputs_vec = new List(); + grad_outputs_vec.AddRange(grad_outputs.Select(x => x.oper)); + csession.SetOutputs(grad_outputs_vec); + csession.Run(s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)); + var out0 = csession.output_tensor(0); + var out1 = csession.output_tensor(1); + + var expected_grad_outputs_vec = new List(); + expected_grad_outputs_vec.AddRange(expected_grad_outputs.Select(x => x.oper)); + expected_csession.SetOutputs(expected_grad_outputs_vec); + expected_csession.Run(s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)); + var expected_out0 = expected_csession.output_tensor(0); + var expected_out1 = expected_csession.output_tensor(1); + + //CompareTensors(out0, expected_out0); + //CompareTensors(out1, expected_out1); + } + /*void TestGradientsError(bool grad_inputs_provided) + { + var inputs = new TF_Output[1]; + var outputs = new TF_Output[1]; + var grad_outputs = new TF_Output[1]; + + BuildErrorGraph(inputs, outputs); + + AddGradients(grad_inputs_provided, nullptr, inputs, 1, outputs, 1, + grad_outputs); + + string expected_msg = + "No gradient defined for op: TestOpWithNoGradient. Please see " + "https://www.tensorflow.org/code/" + "tensorflow/cc/gradients/README.md" + " for instructions on how to add C++ gradients."; + EXPECT_EQ(expected_msg, TF_Message(s_)); + }*/ + + private void AddGradients(bool grad_inputs_provided, string prefix, TF_Output[] inputs, int ninputs, + TF_Output[] outputs, int noutputs, TF_Output[] grad_outputs) + { + if (grad_inputs_provided) + { + var grad_inputs = new TF_Output[1]; + float[] grad_inputs_val = { 1.0f, 1.0f, 1.0f, 1.0f }; + var grad_inputs_op = FloatConst2x2(graph_, s_, grad_inputs_val, "GradInputs"); + grad_inputs[0] = new TF_Output(grad_inputs_op, 0); + c_api.TF_AddGradientsWithPrefix(graph_, prefix, outputs, noutputs, inputs, + ninputs, grad_inputs, s_, grad_outputs); + } + else + { + c_api.TF_AddGradientsWithPrefix(graph_, prefix, outputs, noutputs, inputs, + ninputs, null, s_, grad_outputs); + } } private void BuildSuccessGraph(TF_Output[] inputs, TF_Output[] outputs) @@ -55,6 +148,67 @@ namespace TensorFlowNET.UnitTest EXPECT_EQ(TF_OK, TF_GetCode(s_)); } + private void BuildExpectedGraph(bool grad_inputs_provided, TF_Output[] expected_grad_outputs) + { + // The expected graph looks like this if grad_inputs_provided. + // If grad_inputs_provided is false, Const_0 will be a OnesLike op. + // ^ ^ + // dy| dx| // MatMul Gradient Graph + // | | + // MatMul_2 MatMul_1 + // ^ ^ ^ ^ + // | |----------| | + // | ^ | + // | dz| | + // | | | + // | Const_3 | + // | | + // | ^ | + // | z| | // MatMul Forward Graph + // | | | + // | MatMul | + // | / \ | + // | ^ ^ | + // | | | | + // |---x| y|----| + // | | + // | | + // Const_0 Const_1 + // + float[] const0_val = { 1.0f, 2.0f, 3.0f, 4.0f }; + float[] const1_val = { 1.0f, 0.0f, 0.0f, 1.0f }; + var const0 = FloatConst2x2(expected_graph_, s_, const0_val, "Const_0"); + var const1 = FloatConst2x2(expected_graph_, s_, const1_val, "Const_1"); + var matmul = MatMul(expected_graph_, s_, const0, const1, "MatMul"); + + Operation const3; + if (grad_inputs_provided) + { + float[] const3_val = { 1.0f, 1.0f, 1.0f, 1.0f }; + const3 = FloatConst2x2(expected_graph_, s_, const3_val, "GradInputs"); + } + else + { + const3 = OnesLike(expected_graph_, s_, matmul, "gradients/OnesLike"); + } + + var matmul1 = MatMul(expected_graph_, s_, const3, const1, + "gradients/MatMul", false, true); + var matmul2 = MatMul(expected_graph_, s_, const0, const3, + "gradients/MatMul_1", true, false); + expected_grad_outputs[0] = new TF_Output(matmul1, 0); + expected_grad_outputs[1] = new TF_Output( matmul2, 0); + } + + private Operation OnesLike(Graph graph, Status s, Operation input, string name) + { + var desc = TF_NewOperation(graph, "OnesLike", name); + TF_AddInput(desc, new TF_Output(input, 0)); + var op = TF_FinishOperation(desc, s); + EXPECT_EQ(TF_OK, TF_GetCode(s)); + return op; + } + private Operation FloatConst2x2(Graph graph, Status s, float[] values, string name) { var tensor = FloatTensor2x2(values); @@ -69,9 +223,10 @@ namespace TensorFlowNET.UnitTest private Tensor FloatTensor2x2(float[] values) { - long[] dims = { 2, 2 }; - Tensor t = c_api.TF_AllocateTensor(TF_FLOAT, dims, 2, sizeof(float) * 4); - Marshal.Copy(values, 0, t, 4); + //long[] dims = { 2, 2 }; + //Tensor t = c_api.TF_AllocateTensor(TF_FLOAT, dims, 2, sizeof(float) * 4); + //Marshal.Copy(values, 0, t, 4); + Tensor t = new Tensor(new NDArray(values).reshape(2, 2)); return t; } @@ -106,6 +261,18 @@ namespace TensorFlowNET.UnitTest TestGradientsSuccess(false); } + [TestMethod] + public void OpWithNoGradientRegistered_GradInputs() + { + //TestGradientsError(true); + } + + [TestMethod] + public void OpWithNoGradientRegistered_NoGradInputs() + { + //TestGradientsError(false); + } + public void Dispose() { graph_.Dispose(); diff --git a/test/TensorFlowNET.UnitTest/CApiTest.cs b/test/TensorFlowNET.UnitTest/CApiTest.cs index bfecd5c1..dedb88b3 100644 --- a/test/TensorFlowNET.UnitTest/CApiTest.cs +++ b/test/TensorFlowNET.UnitTest/CApiTest.cs @@ -11,6 +11,11 @@ namespace TensorFlowNET.UnitTest protected TF_Code TF_OK = TF_Code.TF_OK; protected TF_DataType TF_FLOAT = TF_DataType.TF_FLOAT; + protected void EXPECT_TRUE(bool expected) + { + Assert.IsTrue(expected); + } + protected void EXPECT_EQ(object expected, object actual) { Assert.AreEqual(expected, actual); diff --git a/test/TensorFlowNET.UnitTest/OperationsTest.cs b/test/TensorFlowNET.UnitTest/OperationsTest.cs index 0e0910ff..69f93ecc 100644 --- a/test/TensorFlowNET.UnitTest/OperationsTest.cs +++ b/test/TensorFlowNET.UnitTest/OperationsTest.cs @@ -37,7 +37,7 @@ namespace TensorFlowNET.UnitTest feed_dict.Add(b, 2.0f); var o = sess.run(c, feed_dict); - Assert.AreEqual(o, 5.0f); + Assert.AreEqual((float)o, 5.0f); } } @@ -51,7 +51,7 @@ namespace TensorFlowNET.UnitTest using (var sess = tf.Session()) { var o = sess.run(c); - Assert.AreEqual(o, 9.0f); + Assert.AreEqual((float)o, 9.0f); } } }