diff --git a/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs b/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs index 04dd7a9c..afa30811 100644 --- a/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs +++ b/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs @@ -11,7 +11,7 @@ namespace Tensorflow.Eager Tensor[] inputs, object[] attrs, Tensor[] results, - Func getBackwardFunction = null) + BackwardFunction backwardFunction = null) { bool should_record = ShouldRecord(inputs); @@ -28,9 +28,9 @@ namespace Tensorflow.Eager } if (!should_record) return should_record; - tf.Logger.Debug($"RecordGradient: op_name={op_name}"); + // tf.Logger.Debug($"RecordGradient: op_name={op_name}"); - Tensor[] op_outputs; + /*Tensor[] op_outputs = null; var unused_output_indices = gradient_exclustions.OpGradientUnusedOutputIndices(op_name); if (unused_output_indices != null) { @@ -44,7 +44,7 @@ namespace Tensorflow.Eager else op_outputs = results; - Tensor[] op_inputs; + Tensor[] op_inputs = null; var unused_input_indices = gradient_exclustions.OpGradientUnusedInputIndices(op_name); if (unused_input_indices != null) { @@ -56,22 +56,14 @@ namespace Tensorflow.Eager } } else - op_inputs = inputs; + op_inputs = inputs;*/ - TapeSetRecordOperation(op_name, inputs, results, - getBackwardFunction ?? GetBackwradFunction(op_name, inputs, attrs, results)); + backwardFunction = backwardFunction ?? GetGradientFunction(op_name, inputs, attrs, results); + TapeSetRecordOperation(op_name, inputs, results, backwardFunction); return true; } - Func GetBackwradFunction(string op_name, - Tensor[] op_inputs, - object[] attrs, - Tensor[] op_outputs) - { - return () => GetGradientFunction(op_name, op_inputs, attrs, op_outputs); - } - BackwardFunction GetGradientFunction(string op_name, Tensor[] op_inputs, object[] attrs, diff --git a/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordBackprop.cs b/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordBackprop.cs index 22515f4e..e8751aed 100644 --- a/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordBackprop.cs +++ b/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordBackprop.cs @@ -1,7 +1,6 @@ using System; using Tensorflow.Gradients; using static Tensorflow.Binding; -using static Tensorflow.tensorflow; namespace Tensorflow.Eager { @@ -10,7 +9,7 @@ namespace Tensorflow.Eager void TapeSetRecordBackprop(string op_type, Tensor[] input_tensors, TapeTensor[] output_tensors, - Func backward_function_getter) + BackwardFunction backward_function) { if (!CouldBackprop()) { @@ -19,8 +18,7 @@ namespace Tensorflow.Eager foreach (var tape in tf.GetTapeSet()) { - tape.RecordOperation(op_type, input_tensors, output_tensors, - backward_function_getter); + tape.RecordOperation(op_type, input_tensors, output_tensors, backward_function); } } } diff --git a/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordForwardprop.cs b/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordForwardprop.cs index 1c5cac7b..0490447d 100644 --- a/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordForwardprop.cs +++ b/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordForwardprop.cs @@ -9,7 +9,7 @@ namespace Tensorflow.Eager bool TapeSetRecordForwardprop(string op_type, Tensor[] input_tensors, TapeTensor[] output_tensors, - Func backward_function_getter) + BackwardFunction backward_function_getter) { if (!CouldForwardprop()) { diff --git a/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordOperation.cs b/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordOperation.cs index 861f26fc..42e1cff9 100644 --- a/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordOperation.cs +++ b/src/TensorFlowNET.Core/Eager/EagerRunner.TapeSetRecordOperation.cs @@ -10,16 +10,16 @@ namespace Tensorflow.Eager public bool TapeSetRecordOperation(string op_type, Tensor[] input_tensors, Tensor[] output_tensors, - Func backward_function_getter) + BackwardFunction backward_function) { var output_info = output_tensors.Select(x => new TapeTensor(x)).ToArray(); if (!TapeSetRecordForwardprop(op_type, input_tensors, output_info, - backward_function_getter)) + backward_function)) return false; TapeSetRecordBackprop(op_type, input_tensors, output_info, - backward_function_getter); + backward_function); return true; } diff --git a/src/TensorFlowNET.Core/Eager/IEagerRunner.cs b/src/TensorFlowNET.Core/Eager/IEagerRunner.cs index cec489c3..7baf4cd7 100644 --- a/src/TensorFlowNET.Core/Eager/IEagerRunner.cs +++ b/src/TensorFlowNET.Core/Eager/IEagerRunner.cs @@ -9,7 +9,8 @@ namespace Tensorflow.Eager { Tensor[] Execute(Context ctx, string op_name, int num_outputs, - Tensor[] inputs, object[] attrs, + Tensor[] inputs, + object[] attrs, string name = null); (TF_DataType, Tensor[]) ArgsToMatchingEager(Context ctx, @@ -34,7 +35,7 @@ namespace Tensorflow.Eager Tensor[] inputs, object[] attrs, Tensor[] results, - Func getBackwardFunction = null); + BackwardFunction getBackwardFunction = null); bool MustRecordGradient(); diff --git a/src/TensorFlowNET.Core/Functions/TapeGradientFunctions.cs b/src/TensorFlowNET.Core/Functions/TapeGradientFunctions.cs index 33f3d692..b4241304 100644 --- a/src/TensorFlowNET.Core/Functions/TapeGradientFunctions.cs +++ b/src/TensorFlowNET.Core/Functions/TapeGradientFunctions.cs @@ -47,7 +47,7 @@ namespace Tensorflow.Functions { var (backward_function, to_record) = _wrap_backward_function(_forward_graph, _backward, flat_outputs); tf.Runner.RecordGradient(_forward.Name, inference_args, new object[0], to_record, - getBackwardFunction: () => backward_function); + getBackwardFunction: backward_function); } /// diff --git a/src/TensorFlowNET.Core/Gradients/BackpropInitialState.cs b/src/TensorFlowNET.Core/Gradients/BackpropInitialState.cs index 06ae7ce7..ec0e8484 100644 --- a/src/TensorFlowNET.Core/Gradients/BackpropInitialState.cs +++ b/src/TensorFlowNET.Core/Gradients/BackpropInitialState.cs @@ -5,7 +5,7 @@ namespace Tensorflow.Gradients { public class BackpropInitialState { - public OpTape op_tape { get; set; } + public OpTape op_tape { get; set; } /// /// Map from tensor to how many references still exist for this tensor in /// the tape. @@ -19,7 +19,7 @@ namespace Tensorflow.Gradients public BackpropInitialState() { - op_tape = new OpTape(); + op_tape = new OpTape(); tensor_usage_counts = new UnorderedMap(); op_missing_tensor = new UnorderedMap(); } diff --git a/src/TensorFlowNET.Core/Gradients/ITape.cs b/src/TensorFlowNET.Core/Gradients/ITape.cs index c4e88617..dbd085ea 100644 --- a/src/TensorFlowNET.Core/Gradients/ITape.cs +++ b/src/TensorFlowNET.Core/Gradients/ITape.cs @@ -13,7 +13,7 @@ namespace Tensorflow.Gradients void RecordOperation(string op_type, Tensor[] input_tensors, TapeTensor[] output_tensors, - Func backward_function_getter); + BackwardFunction backward_function); void VariableAccessed(ResourceVariable variable); diff --git a/src/TensorFlowNET.Core/Gradients/OpTape.cs b/src/TensorFlowNET.Core/Gradients/OpTape.cs index 7c79eb5d..61030e62 100644 --- a/src/TensorFlowNET.Core/Gradients/OpTape.cs +++ b/src/TensorFlowNET.Core/Gradients/OpTape.cs @@ -5,10 +5,7 @@ namespace Tensorflow.Gradients /// /// Map from operation-id to tape entry. /// - /// - /// - public class OpTape : - UnorderedMap> + public class OpTape : UnorderedMap { } diff --git a/src/TensorFlowNET.Core/Gradients/OpTapeEntry.cs b/src/TensorFlowNET.Core/Gradients/OpTapeEntry.cs index 165ef14f..537369dd 100644 --- a/src/TensorFlowNET.Core/Gradients/OpTapeEntry.cs +++ b/src/TensorFlowNET.Core/Gradients/OpTapeEntry.cs @@ -5,9 +5,7 @@ namespace Tensorflow.Gradients /// /// Represents an entry in the tape. /// - /// - /// - public class OpTapeEntry + public class OpTapeEntry { public string op_type { get; set; } public TapeTensor[] output_tensor_info { get; set; } diff --git a/src/TensorFlowNET.Core/Gradients/Tape.CallBackwardFunction.cs b/src/TensorFlowNET.Core/Gradients/Tape.CallBackwardFunction.cs index 4c5f8b1c..9dc1b666 100644 --- a/src/TensorFlowNET.Core/Gradients/Tape.CallBackwardFunction.cs +++ b/src/TensorFlowNET.Core/Gradients/Tape.CallBackwardFunction.cs @@ -1,5 +1,4 @@ using System.Collections.Generic; -using static Tensorflow.tensorflow; namespace Tensorflow.Gradients { @@ -9,7 +8,7 @@ namespace Tensorflow.Gradients List unneeded_gradients, List output_gradients) { - var grads = new Tensor[output_gradients.Count]; + // var grads = new Tensor[output_gradients.Count]; var result = backward_function(output_gradients.ToArray(), unneeded_gradients.ToArray()); diff --git a/src/TensorFlowNET.Core/Gradients/Tape.ComputeGradient.cs b/src/TensorFlowNET.Core/Gradients/Tape.ComputeGradient.cs index 70e1a743..d955582b 100644 --- a/src/TensorFlowNET.Core/Gradients/Tape.ComputeGradient.cs +++ b/src/TensorFlowNET.Core/Gradients/Tape.ComputeGradient.cs @@ -2,24 +2,22 @@ using System.Collections.Generic; using System.Linq; using Tensorflow.Util; -using static Tensorflow.tensorflow; namespace Tensorflow.Gradients { public partial class Tape { - int kMinAggregateCount = 4; - int kMinAggregateBytes = 128 * 1024 * 1024; + // int kMinAggregateCount = 4; + // int kMinAggregateBytes = 128 * 1024 * 1024; public Tensor[] ComputeGradient(Tensor[] target_tensor_ids, Tensor[] source_tensor_ids, UnorderedMap sources_that_are_targets, Tensor[] output_gradients) { - var result = new List(source_tensor_ids.Length); var sources_set = new UnorderedSet(source_tensor_ids); - var gradients_size = new UnorderedMap(); - + // var gradients_size = new UnorderedMap(); + var functionsAcceptingNoneForIndicesMap = FunctionsAcceptingNoneForIndicesMap(); var state = PrepareBackprop( target_tensor_ids, tensor_tape_, op_tape_, sources_set, _persistent); var op_stack = InitialStack(state.op_tape, state.op_missing_tensor); @@ -54,7 +52,7 @@ namespace Tensorflow.Gradients var id = trace.output_tensor_info[i].GetTensor(); if (!gradients.find(id, out var grad_it)) { - if (FunctionsAcceptingNoneForIndicesMap().find(trace.op_type, out var func_name_it) && + if (functionsAcceptingNoneForIndicesMap.find(trace.op_type, out var func_name_it) && func_name_it.find(i)) { out_gradients.Add(null); @@ -76,8 +74,8 @@ namespace Tensorflow.Gradients gradients.Remove(id); else { - grad_it.Clear(); - grad_it.Add(new_gradients); + // grad_it.Clear(); + // grad_it.Add(new_gradients); // vspace.MarkAsResult(new_gradients); } out_gradients.Add(new_gradients); @@ -87,18 +85,17 @@ namespace Tensorflow.Gradients Tensor[] in_gradients; if (any_gradient_nonzero) { - foreach (var i in zero_indices) - out_gradients[i] = trace.output_tensor_info[i].ZerosLike(); + // foreach (var i in zero_indices) + // out_gradients[i] = trace.output_tensor_info[i].ZerosLike(); - in_gradients = CallBackwardFunction(trace.backward_function, - unneeded_gradients, - out_gradients); + in_gradients = trace.backward_function(out_gradients.ToArray(), unneeded_gradients.ToArray()); if (in_gradients.Count() != trace.input_tensor_id.Count()) throw new RuntimeError($"Recorded operation '{trace.op_type}' returned too few gradients. Expected {trace.input_tensor_id.Length} but received {in_gradients.Count()}"); if (!_persistent) { // trace.backward_function_deleter(trace.backward_function); + trace.backward_function = null; } } else @@ -113,7 +110,7 @@ namespace Tensorflow.Gradients { var unaggregated_grads = gradients[id]; unaggregated_grads.Add(in_gradients[i]); - if (unaggregated_grads.Count > kMinAggregateCount) + /*if (unaggregated_grads.Count > kMinAggregateCount) { if (!gradients_size.find(id, out var size)) { @@ -125,7 +122,7 @@ namespace Tensorflow.Gradients { throw new NotImplementedException(""); } - } + }*/ } if (!state.tensor_usage_counts.find(id)) @@ -162,36 +159,21 @@ namespace Tensorflow.Gradients if (state.op_tape.Count > 0) throw new RuntimeError("Invalid tape state."); - var used_gradient_ids = new List(source_tensor_ids.Length); + var result = new Tensor[source_tensor_ids.Length]; + var j = 0; foreach (var id in source_tensor_ids) { - if (!gradients.find(id, out var grad_it)) - result.Add(null); - else + if (gradients.find(id, out var grad_it)) { if (grad_it.Count > 1) - { - var grad = gen_math_ops.add_n(grad_it.ToArray()); - grad_it.Clear(); - grad_it.Add(grad); - } - result.Add(grad_it[0]); - used_gradient_ids.Add(id); + result[j] = gen_math_ops.add_n(grad_it.ToArray()); + else + result[j] = grad_it[0]; } + j++; } - /*foreach(var grad_pair in gradients) - { - if(!used_gradient_ids.Contains(grad_pair.Key)) - { - foreach(var g in grad_pair.Value) - { - vspace.DeleteGradient(g); - } - } - }*/ - - return result.ToArray(); + return result; } UnorderedMap> FunctionsAcceptingNoneForIndicesMap() @@ -207,7 +189,7 @@ namespace Tensorflow.Gradients UnorderedMap sources_that_are_targets, Tensor[] output_gradients, TensorTape tensor_tape, - OpTape op_tape) + OpTape op_tape) { var result = new UnorderedMapEnumerable>(); for (int i = 0; i < target_tensor_ids.Length; ++i) @@ -253,7 +235,7 @@ namespace Tensorflow.Gradients return result; } - Queue InitialStack(OpTape op_tape, + Queue InitialStack(OpTape op_tape, UnorderedMap op_missing_tensor) { var result = new Queue(); diff --git a/src/TensorFlowNET.Core/Gradients/Tape.PrepareBackprop.cs b/src/TensorFlowNET.Core/Gradients/Tape.PrepareBackprop.cs index ae81b8d5..dd1a1401 100644 --- a/src/TensorFlowNET.Core/Gradients/Tape.PrepareBackprop.cs +++ b/src/TensorFlowNET.Core/Gradients/Tape.PrepareBackprop.cs @@ -8,7 +8,7 @@ namespace Tensorflow.Gradients { public BackpropInitialState PrepareBackprop(Tensor[] target, TensorTape tensor_tape, - OpTape op_tape, + OpTape op_tape, UnorderedSet sources_set, bool persistent_tape) { diff --git a/src/TensorFlowNET.Core/Gradients/Tape.RecordOperation.cs b/src/TensorFlowNET.Core/Gradients/Tape.RecordOperation.cs index 4435c312..c76d620c 100644 --- a/src/TensorFlowNET.Core/Gradients/Tape.RecordOperation.cs +++ b/src/TensorFlowNET.Core/Gradients/Tape.RecordOperation.cs @@ -16,7 +16,7 @@ namespace Tensorflow.Gradients public void RecordOperation(string op_type, Tensor[] input_tensors, TapeTensor[] output_tensors, - Func backward_function_getter) + BackwardFunction backward_function) { if (!ShouldRecord(input_tensors)) return; @@ -32,12 +32,12 @@ namespace Tensorflow.Gradients tensor_usage_[o.GetTensor()] = 1; } - op_tape_[op_id] = new OpTapeEntry + op_tape_[op_id] = new OpTapeEntry { op_type = op_type, output_tensor_info = output_tensors, input_tensor_id = input_tensors, - backward_function = backward_function_getter() + backward_function = backward_function }; } } diff --git a/src/TensorFlowNET.Core/Gradients/Tape.cs b/src/TensorFlowNET.Core/Gradients/Tape.cs index 35710c14..982ffe1f 100644 --- a/src/TensorFlowNET.Core/Gradients/Tape.cs +++ b/src/TensorFlowNET.Core/Gradients/Tape.cs @@ -15,7 +15,7 @@ namespace Tensorflow.Gradients bool _recording; bool _created_eagerly; TensorTape tensor_tape_; - OpTape op_tape_; + OpTape op_tape_; /// /// A deque-backed stack, whose element references are not invalidated by @@ -28,7 +28,7 @@ namespace Tensorflow.Gradients _persistent = persistent; _created_eagerly = tf.Context.executing_eagerly(); tensor_tape_ = new TensorTape(); - op_tape_ = new OpTape(); + op_tape_ = new OpTape(); tensor_usage_ = new UnorderedMap(); if(_created_eagerly) tf.Context.start_step(); diff --git a/src/TensorFlowNET.Core/Graphs/FuncGraph.cs b/src/TensorFlowNET.Core/Graphs/FuncGraph.cs index 33207892..df750813 100644 --- a/src/TensorFlowNET.Core/Graphs/FuncGraph.cs +++ b/src/TensorFlowNET.Core/Graphs/FuncGraph.cs @@ -161,7 +161,7 @@ namespace Tensorflow.Graphs tf.Runner.RecordGradient("captured_value", new[] { graph_const }, null, new[] { tensor }, - getBackwardFunction: () => _backward_function_wrapper + getBackwardFunction: _backward_function_wrapper /*getForwardFunction: forward_function*/); return graph_const; @@ -191,7 +191,7 @@ namespace Tensorflow.Graphs tf.Runner.RecordGradient("captured_value", new[] { placeholder }, null, new[] { tensor }, - getBackwardFunction: () => _backward_function_wrapper + getBackwardFunction: _backward_function_wrapper /*getForwardFunction: forward_function*/); return placeholder; diff --git a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs index fd3241f0..894f9780 100644 --- a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs @@ -15,6 +15,8 @@ ******************************************************************************/ using System; +using System.Collections; +using System.Collections.Generic; using System.Linq; using Tensorflow.Contexts; using static Tensorflow.Binding; diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.cs b/src/TensorFlowNET.Core/Tensors/Tensor.cs index 91cfac30..628d1ce0 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.cs @@ -215,7 +215,7 @@ namespace Tensorflow /// Evaluates this tensor in a `Session`. /// /// A dictionary that maps `Tensor` objects to feed values. - /// A array corresponding to the value of this tensor. + /// A array corresponding to the value of this tensor. public NDArray eval(params FeedItem[] feed_dict) { return ops._eval_using_default_session(this, feed_dict, graph); @@ -226,7 +226,7 @@ namespace Tensorflow /// /// A dictionary that maps `Tensor` objects to feed values. /// The `Session` to be used to evaluate this tensor. - /// A array corresponding to the value of this tensor. + /// A array corresponding to the value of this tensor. public NDArray eval(Session session, params FeedItem[] feed_dict) { return ops._eval_using_default_session(this, feed_dict, graph, session); diff --git a/src/TensorFlowNET.Core/tensorflow.cs b/src/TensorFlowNET.Core/tensorflow.cs index fd07cc3b..bea1e35b 100644 --- a/src/TensorFlowNET.Core/tensorflow.cs +++ b/src/TensorFlowNET.Core/tensorflow.cs @@ -14,7 +14,6 @@ limitations under the License. ******************************************************************************/ -using System.Collections.Generic; using Serilog; using Serilog.Core; using Tensorflow.Contexts;