Browse Source

fix AdamOptimizer for Graph mode.

tags/v0.20
Oceania2018 5 years ago
parent
commit
aa13352d74
6 changed files with 35 additions and 33 deletions
  1. +17
    -4
      src/TensorFlowNET.Core/Gradients/math_grad.cs
  2. +1
    -1
      src/TensorFlowNET.Core/Operations/gen_image_ops.cs
  3. +1
    -1
      src/TensorFlowNET.Core/Operations/math_ops.cs
  4. +1
    -1
      src/TensorFlowNET.Core/Training/AdamOptimizer.cs
  5. +12
    -25
      src/TensorFlowNET.Core/Training/Optimizer.cs
  6. +3
    -1
      src/TensorFlowNET.Core/Variables/ResourceVariable.cs

+ 17
- 4
src/TensorFlowNET.Core/Gradients/math_grad.cs View File

@@ -542,15 +542,28 @@ namespace Tensorflow.Gradients
} }


input_shape = array_ops.shape(op.inputs[0]); input_shape = array_ops.shape(op.inputs[0]);
if (!op.get_attr<bool>("keep_dims"))

if (tf.executing_eagerly())
{
if (!op.get_attr<bool>("keep_dims"))
{
ops.colocate_with(input_shape);
var output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]);
// var tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims);
grad = gen_array_ops.reshape(grad, output_shape_kept_dims);
}

return new Tensor[] { gen_array_ops.broadcast_to(grad, input_shape), null };
}
else
{ {
ops.colocate_with(input_shape); ops.colocate_with(input_shape);
var output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]); var output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]);
// var tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims);
var tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims);
grad = gen_array_ops.reshape(grad, output_shape_kept_dims); grad = gen_array_ops.reshape(grad, output_shape_kept_dims);
}


return new Tensor[] { gen_array_ops.broadcast_to(grad, input_shape), null };
return new Tensor[] { gen_array_ops.tile(grad, tile_scaling), null };
}
} }


[RegisterGradient("RealDiv")] [RegisterGradient("RealDiv")]


+ 1
- 1
src/TensorFlowNET.Core/Operations/gen_image_ops.cs View File

@@ -66,7 +66,7 @@ namespace Tensorflow
int ratio = 1, int ratio = 1,
bool fancy_upscaling = true, bool fancy_upscaling = true,
bool try_recover_truncated = false, bool try_recover_truncated = false,
int acceptable_fraction = 1,
float acceptable_fraction = 1,
string dct_method = "", string dct_method = "",
string name = null) string name = null)
{ {


+ 1
- 1
src/TensorFlowNET.Core/Operations/math_ops.cs View File

@@ -652,7 +652,7 @@ namespace Tensorflow
} }
else else
{ {
if(x.rank > -1)
if (x.rank > -1 && tf.executing_eagerly())
return constant_op.constant(np.arange(x.rank)); return constant_op.constant(np.arange(x.rank));


var rank = array_ops.rank(x); var rank = array_ops.rank(x);


+ 1
- 1
src/TensorFlowNET.Core/Training/AdamOptimizer.cs View File

@@ -109,7 +109,7 @@ namespace Tensorflow.Train
return control_flow_ops.group(new[] { var_update, m_t, v_t }); return control_flow_ops.group(new[] { var_update, m_t, v_t });
} }


protected override void _create_slots(ResourceVariable[] var_list)
protected override void _create_slots(IVariableV1[] var_list)
{ {
var first_var = var_list.OrderBy(x => x.Name).First(); var first_var = var_list.OrderBy(x => x.Name).First();
_create_non_slot_variable(initial_value: _beta1, name: "beta1_power", colocate_with: first_var); _create_non_slot_variable(initial_value: _beta1, name: "beta1_power", colocate_with: first_var);


+ 12
- 25
src/TensorFlowNET.Core/Training/Optimizer.cs View File

@@ -107,7 +107,7 @@ namespace Tensorflow
/// </returns> /// </returns>
public Operation minimize(Tensor loss, public Operation minimize(Tensor loss,
IVariableV1 global_step = null, IVariableV1 global_step = null,
List<ResourceVariable> var_list=null,
List<IVariableV1> var_list=null,
GateGradientType gate_gradients = GateGradientType.GATE_OP, GateGradientType gate_gradients = GateGradientType.GATE_OP,
int? aggregation_method=null, int? aggregation_method=null,
bool colocate_gradients_with_ops = false, string name=null, Tensor grad_loss=null) bool colocate_gradients_with_ops = false, string name=null, Tensor grad_loss=null)
@@ -142,17 +142,17 @@ namespace Tensorflow
/// <returns> /// <returns>
/// An `Operation` that applies the specified gradients. If `global_step` /// An `Operation` that applies the specified gradients. If `global_step`
/// was not None, that operation also increments `global_step`.</returns> /// was not None, that operation also increments `global_step`.</returns>
public Operation apply_gradients(Tuple<Tensor, ResourceVariable>[] grads_and_vars, IVariableV1 global_step = null, string name = null)
public Operation apply_gradients(Tuple<Tensor, IVariableV1>[] grads_and_vars, IVariableV1 global_step = null, string name = null)
{ {
// No DistributionStrategy case. // No DistributionStrategy case.
var converted_grads_and_vars = new List<(Tensor, ResourceVariable, _OptimizableVariable)>();
var converted_grads_and_vars = new List<(Tensor, IVariableV1, _OptimizableVariable)>();
foreach (var (g, v) in grads_and_vars) foreach (var (g, v) in grads_and_vars)
{ {
if(g != null) if(g != null)
{ {
// Convert the grad to Tensor or IndexedSlices if necessary. // Convert the grad to Tensor or IndexedSlices if necessary.
var gR = ops.convert_to_tensor_or_indexed_slices(g); var gR = ops.convert_to_tensor_or_indexed_slices(g);
var p = optimizer._get_processor(v);
var p = optimizer._get_processor(v as ResourceVariable);
converted_grads_and_vars.Add((gR, v, p)); converted_grads_and_vars.Add((gR, v, p));
} }
} }
@@ -230,7 +230,7 @@ namespace Tensorflow
/// silently ignored). /// silently ignored).
/// </summary> /// </summary>
/// <param name="var_list"></param> /// <param name="var_list"></param>
protected virtual void _create_slots(ResourceVariable[] var_list)
protected virtual void _create_slots(IVariableV1[] var_list)
{ {
} }
@@ -369,8 +369,8 @@ namespace Tensorflow
/// A list of (gradient, variable) pairs. Variable is always present, but /// A list of (gradient, variable) pairs. Variable is always present, but
/// gradient can be `None`. /// gradient can be `None`.
/// </returns> /// </returns>
public Tuple<Tensor, ResourceVariable>[] compute_gradients(Tensor loss,
List<ResourceVariable> var_list = null,
public Tuple<Tensor, IVariableV1>[] compute_gradients(Tensor loss,
List<IVariableV1> var_list = null,
int? aggregation_method = null, int? aggregation_method = null,
GateGradientType gate_gradients = GateGradientType.GATE_OP, GateGradientType gate_gradients = GateGradientType.GATE_OP,
bool colocate_gradients_with_ops = false, bool colocate_gradients_with_ops = false,
@@ -381,26 +381,13 @@ namespace Tensorflow


if(var_list == null) if(var_list == null)
{ {
var vars = ops.get_collection<ResourceVariable>(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES);
var vars = ops.get_collection<IVariableV1>(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES);
var tmp = variables.trainable_variables(); var tmp = variables.trainable_variables();
switch (tmp)
{
case List<ResourceVariable> values:
var_list = values.Concat(vars).ToList();
break;
/*case List<RefVariable> values:
var_list = values.Concat(vars).ToList();
break;
case List<IVariableV1> values:
var_list = values.Select(x => x as RefVariable).Concat(vars).ToList();
break;*/
default:
throw new NotImplementedException("");
}
var_list = (tmp as List<IVariableV1>).Concat(vars).ToList();
} }


var_list = var_list.Concat(ops.get_collection<ResourceVariable>(tf.GraphKeys._STREAMING_MODEL_PORTS)).ToList();
var processors = var_list.Select(v => optimizer._get_processor(v)).ToList();
var_list = var_list.Concat(ops.get_collection<IVariableV1>(tf.GraphKeys._STREAMING_MODEL_PORTS)).ToList();
var processors = var_list.Select(v => optimizer._get_processor(v as ResourceVariable)).ToList();
var var_refs = processors.Select(x => x.target()).ToArray(); var var_refs = processors.Select(x => x.target()).ToArray();


var grads = gradients_impl.gradients(new Tensor[] { loss }, var_refs, grad_ys: grad_loss == null ? null : new Tensor[] { grad_loss }, var grads = gradients_impl.gradients(new Tensor[] { loss }, var_refs, grad_ys: grad_loss == null ? null : new Tensor[] { grad_loss },
@@ -412,7 +399,7 @@ namespace Tensorflow
grads = control_flow_ops.tuple(grads); grads = control_flow_ops.tuple(grads);


var grads_and_vars = zip(grads, var_list) var grads_and_vars = zip(grads, var_list)
.Select(x => new Tuple<Tensor, ResourceVariable>(x.Item1, x.Item2))
.Select(x => new Tuple<Tensor, IVariableV1>(x.Item1, x.Item2))
.ToArray(); .ToArray();


return grads_and_vars; return grads_and_vars;


+ 3
- 1
src/TensorFlowNET.Core/Variables/ResourceVariable.cs View File

@@ -155,7 +155,7 @@ namespace Tensorflow
_graph_element = value; _graph_element = value;
}); });


ops.add_to_collections(collections, this);
ops.add_to_collections<IVariableV1>(collections, this);
} }
else else
{ {
@@ -184,6 +184,8 @@ namespace Tensorflow
var g = ops.get_default_graph(); var g = ops.get_default_graph();
var prepend_name_scope = ops.prepend_name_scope(variable_def.VariableName, import_scope: import_scope); var prepend_name_scope = ops.prepend_name_scope(variable_def.VariableName, import_scope: import_scope);
handle = g.as_graph_element(prepend_name_scope) as Tensor; handle = g.as_graph_element(prepend_name_scope) as Tensor;
_handle_name = handle.name;
_name = handle.name;
_shape = new TensorShape(handle.op.get_attr("shape") as TensorShapeProto); _shape = new TensorShape(handle.op.get_attr("shape") as TensorShapeProto);
prepend_name_scope = ops.prepend_name_scope(variable_def.InitializerName, import_scope: import_scope); prepend_name_scope = ops.prepend_name_scope(variable_def.InitializerName, import_scope: import_scope);


Loading…
Cancel
Save