diff --git a/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs b/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs index 83527399..fa9c9166 100644 --- a/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs +++ b/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs @@ -192,8 +192,8 @@ namespace Tensorflow { if (!pending_count.ContainsKey(x.op.Name)) pending_count[x.op.Name] = 0; - else - pending_count[x.op.Name] -= 1; + + pending_count[x.op.Name] -= 1; var ready = pending_count[x.op.Name] == 0; @@ -288,7 +288,7 @@ namespace Tensorflow } } if (is_stop_op) - stop_ops.Add(op); + stop_ops.Insert(0, op); } stop_ops.AddRange(stop_gradient_ops.Where(x => !stop_ops.Contains(x))); return stop_ops.ToArray(); diff --git a/src/TensorFlowNET.Core/Gradients/math_grad.py.cs b/src/TensorFlowNET.Core/Gradients/math_grad.py.cs index 8377b72d..19d51463 100644 --- a/src/TensorFlowNET.Core/Gradients/math_grad.py.cs +++ b/src/TensorFlowNET.Core/Gradients/math_grad.py.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Text; namespace Tensorflow @@ -13,8 +14,67 @@ namespace Tensorflow { var x = op.inputs[0]; var y = op.inputs[1]; + if (grad is Tensor && _ShapesFullySpecifiedAndEqual(x, y, grad)) + return (grad, grad); - return (grad, grad); + var sx = array_ops.shape(x); + var sy = array_ops.shape(y); + var (rx, ry) = gen_array_ops.broadcast_gradient_args(sx, sy); + + var r1 = gen_array_ops.reshape(math_ops.reduce_sum(grad, rx), sx); + var r2 = gen_array_ops.reshape(math_ops.reduce_sum(grad, ry), sy); + + return (r1, r2); + } + + public static (Tensor, Tensor) _IdGrad(Operation op, Tensor grad) + { + return (grad, null); + } + + public static (Tensor, Tensor) _MulGrad(Operation op, Tensor grad) + { + var x = op.inputs[0]; + var y = op.inputs[1]; + if (grad is Tensor && _ShapesFullySpecifiedAndEqual(x, y, grad) && + new TF_DataType[] { tf.int32, tf.float32 }.Contains(grad.dtype)) + return (gen_math_ops.mul(grad, y), gen_math_ops.mul(grad, x)); + + var sx = array_ops.shape(x); + var sy = array_ops.shape(y); + var (rx, ry) = gen_array_ops.broadcast_gradient_args(sx, sy); + + x = math_ops.conj(x); + y = math_ops.conj(y); + + var r1 = math_ops.reduce_sum(gen_math_ops.mul(grad, y), rx); + var r2 = math_ops.reduce_sum(gen_math_ops.mul(x, grad), ry); + + return (gen_array_ops.reshape(r1, sx), gen_array_ops.reshape(r2, sy)); + } + + public static (Tensor, Tensor) _SubGrad(Operation op, Tensor grad) + { + var x = op.inputs[0]; + var y = op.inputs[1]; + if (grad is Tensor && _ShapesFullySpecifiedAndEqual(x, y, grad)) + return (grad, -grad); + + var sx = array_ops.shape(x); + var sy = array_ops.shape(y); + var (rx, ry) = gen_array_ops.broadcast_gradient_args(sx, sy); + + var r1 = gen_array_ops.reshape(math_ops.reduce_sum(grad, rx), sx); + var r2 = gen_array_ops.reshape(-math_ops.reduce_sum(grad, ry), sy); + + return (r1, r2); + } + + public static bool _ShapesFullySpecifiedAndEqual(Tensor x, Tensor y, Tensor grad) + { + return false; + /*return string.Join(",", x.shape).Equals(string.Join(",", y.shape)) && + string.Join(",", x.shape).Equals(string.Join(",", grad.shape));*/ } public static (Tensor, Tensor) _SumGrad(Operation op, Tensor grad) @@ -71,8 +131,7 @@ namespace Tensorflow x = math_ops.conj(x); y = math_ops.conj(y); y = math_ops.conj(z); - - var gx = gen_array_ops.reshape(math_ops.reduce_sum(grad * y * gen_math_ops.pow(x, y - 1), rx), sx); + var gx = gen_array_ops.reshape(math_ops.reduce_sum(grad * y * gen_math_ops.pow(x, y - 1.0), rx), sx); Tensor log_x = null; // Avoid false singularity at x = 0 if (x.dtype.is_complex()) @@ -81,7 +140,9 @@ namespace Tensorflow } else { - log_x = array_ops.where(x > 0, gen_array_ops.log(x), array_ops.zeros_like(x)); + var x1 = gen_array_ops.log(x); + var y1 = array_ops.zeros_like(x); + log_x = array_ops.where(x > 0.0, x1, y1); } var gy = gen_array_ops.reshape(math_ops.reduce_sum(grad * z * log_x, ry), sy); diff --git a/src/TensorFlowNET.Core/Operations/array_ops.py.cs b/src/TensorFlowNET.Core/Operations/array_ops.py.cs index 62722a32..23a6eef3 100644 --- a/src/TensorFlowNET.Core/Operations/array_ops.py.cs +++ b/src/TensorFlowNET.Core/Operations/array_ops.py.cs @@ -147,7 +147,27 @@ namespace Tensorflow public static Tensor zeros_like(Tensor tensor, TF_DataType dtype = TF_DataType.DtInvalid, string name = "", bool optimize = true) { - throw new NotImplementedException("zeros_like"); + return Python.with(new ops.name_scope(name, "zeros_like", new Tensor[] { tensor }), scope => + { + name = scope; + tensor = ops.convert_to_tensor(tensor, name: "tensor"); + + // is_fully_defined return unexpected value. + if (optimize && tensor_util.to_shape(tensor.shape).is_fully_defined() && dtype != TF_DataType.TF_VARIANT) + { + + } + + if(dtype != TF_DataType.DtInvalid && dtype != tensor.dtype && dtype != TF_DataType.TF_VARIANT) + { + throw new NotImplementedException("zeros_like"); + // return zeros(shape_internal(tensor, optimize: optimize), dtype: dtype, name: name); + } + else + { + return gen_array_ops.zeros_like(tensor, name: name); + } + }); } } } diff --git a/src/TensorFlowNET.Core/Operations/gen_array_ops.cs b/src/TensorFlowNET.Core/Operations/gen_array_ops.cs index 7093967b..efc8e4e0 100644 --- a/src/TensorFlowNET.Core/Operations/gen_array_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_array_ops.cs @@ -106,9 +106,9 @@ namespace Tensorflow throw new NotImplementedException("where"); } - public static Tensor select(Tensor condition, Tensor x, Tensor y, string name = "") + public static Tensor select(Tensor condition, Tensor t, Tensor e, string name = "") { - var _op = _op_def_lib._apply_op_helper("Select", name, new { condition, x, y }); + var _op = _op_def_lib._apply_op_helper("Select", name, new { condition, t, e }); return _op.outputs[0]; } diff --git a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs index 627f9850..57921209 100644 --- a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs @@ -24,7 +24,7 @@ namespace Tensorflow return _op.outputs[0]; } - public static Tensor sub(Tensor x, Tensor y, string name = "") + public static Tensor sub(Tx x, Ty y, string name = "") { var _op = _op_def_lib._apply_op_helper("Sub", name, args: new { x, y }); @@ -89,7 +89,7 @@ namespace Tensorflow return _op.outputs[0]; } - public static Tensor pow(Tensor x, double y, string name = "") + public static Tensor pow(Tx x, Ty y, string name = "") { var _op = _op_def_lib._apply_op_helper("Pow", name, args: new { x, y }); diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.Operators.cs b/src/TensorFlowNET.Core/Tensors/Tensor.Operators.cs index a2851de8..77c5c5c0 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.Operators.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.Operators.cs @@ -23,15 +23,10 @@ namespace Tensorflow }); } - public static Tensor operator -(Tensor t1) - { - return gen_math_ops.neg(t1); - } - - public static Tensor operator -(Tensor t1, Tensor t2) - { - return gen_math_ops.sub(t1, t2); - } + public static Tensor operator -(Tensor t1) => gen_math_ops.neg(t1); + public static Tensor operator -(Tensor t1, Tensor t2) => gen_math_ops.sub(t1, t2); + public static Tensor operator -(Tensor t1, int t2) => gen_math_ops.sub(t1, t2); + public static Tensor operator -(Tensor t1, double t2) => gen_math_ops.sub(t1, t2); public static Tensor operator *(double x, Tensor y) { @@ -85,14 +80,9 @@ namespace Tensorflow }); } - public static Tensor operator >(Tensor x, int y) - { - return gen_array_ops.greater(x, y); - } - - public static Tensor operator <(Tensor x, int y) - { - return gen_array_ops.less(x, y); - } + public static Tensor operator >(Tensor x, int y) => gen_array_ops.greater(x, y); + public static Tensor operator >(Tensor x, double y) => gen_array_ops.greater(x, y); + public static Tensor operator <(Tensor x, int y) => gen_array_ops.less(x, y); + public static Tensor operator <(Tensor x, double y) => gen_array_ops.less(x, y); } } diff --git a/src/TensorFlowNET.Core/ops.py.cs b/src/TensorFlowNET.Core/ops.py.cs index 52600d29..d98854f6 100644 --- a/src/TensorFlowNET.Core/ops.py.cs +++ b/src/TensorFlowNET.Core/ops.py.cs @@ -294,8 +294,16 @@ namespace Tensorflow { case "Add": return math_grad._AddGrad(oper, out_grads); + case "Identity": + return math_grad._IdGrad(oper, out_grads); + case "Mul": + return math_grad._MulGrad(oper, out_grads); case "Sum": return math_grad._SumGrad(oper, out_grads); + case "Sub": + return math_grad._SubGrad(oper, out_grads); + case "Pow": + return math_grad._PowGrad(oper, out_grads); case "RealDiv": return math_grad._RealDivGrad(oper, out_grads); default: