Browse Source

fix AdamOptimizer for Graph mode.

tags/v0.20
Oceania2018 5 years ago
parent
commit
aa13352d74
6 changed files with 35 additions and 33 deletions
  1. +17
    -4
      src/TensorFlowNET.Core/Gradients/math_grad.cs
  2. +1
    -1
      src/TensorFlowNET.Core/Operations/gen_image_ops.cs
  3. +1
    -1
      src/TensorFlowNET.Core/Operations/math_ops.cs
  4. +1
    -1
      src/TensorFlowNET.Core/Training/AdamOptimizer.cs
  5. +12
    -25
      src/TensorFlowNET.Core/Training/Optimizer.cs
  6. +3
    -1
      src/TensorFlowNET.Core/Variables/ResourceVariable.cs

+ 17
- 4
src/TensorFlowNET.Core/Gradients/math_grad.cs View File

@@ -542,15 +542,28 @@ namespace Tensorflow.Gradients
}

input_shape = array_ops.shape(op.inputs[0]);
if (!op.get_attr<bool>("keep_dims"))

if (tf.executing_eagerly())
{
if (!op.get_attr<bool>("keep_dims"))
{
ops.colocate_with(input_shape);
var output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]);
// var tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims);
grad = gen_array_ops.reshape(grad, output_shape_kept_dims);
}

return new Tensor[] { gen_array_ops.broadcast_to(grad, input_shape), null };
}
else
{
ops.colocate_with(input_shape);
var output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]);
// var tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims);
var tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims);
grad = gen_array_ops.reshape(grad, output_shape_kept_dims);
}

return new Tensor[] { gen_array_ops.broadcast_to(grad, input_shape), null };
return new Tensor[] { gen_array_ops.tile(grad, tile_scaling), null };
}
}

[RegisterGradient("RealDiv")]


+ 1
- 1
src/TensorFlowNET.Core/Operations/gen_image_ops.cs View File

@@ -66,7 +66,7 @@ namespace Tensorflow
int ratio = 1,
bool fancy_upscaling = true,
bool try_recover_truncated = false,
int acceptable_fraction = 1,
float acceptable_fraction = 1,
string dct_method = "",
string name = null)
{


+ 1
- 1
src/TensorFlowNET.Core/Operations/math_ops.cs View File

@@ -652,7 +652,7 @@ namespace Tensorflow
}
else
{
if(x.rank > -1)
if (x.rank > -1 && tf.executing_eagerly())
return constant_op.constant(np.arange(x.rank));

var rank = array_ops.rank(x);


+ 1
- 1
src/TensorFlowNET.Core/Training/AdamOptimizer.cs View File

@@ -109,7 +109,7 @@ namespace Tensorflow.Train
return control_flow_ops.group(new[] { var_update, m_t, v_t });
}

protected override void _create_slots(ResourceVariable[] var_list)
protected override void _create_slots(IVariableV1[] var_list)
{
var first_var = var_list.OrderBy(x => x.Name).First();
_create_non_slot_variable(initial_value: _beta1, name: "beta1_power", colocate_with: first_var);


+ 12
- 25
src/TensorFlowNET.Core/Training/Optimizer.cs View File

@@ -107,7 +107,7 @@ namespace Tensorflow
/// </returns>
public Operation minimize(Tensor loss,
IVariableV1 global_step = null,
List<ResourceVariable> var_list=null,
List<IVariableV1> var_list=null,
GateGradientType gate_gradients = GateGradientType.GATE_OP,
int? aggregation_method=null,
bool colocate_gradients_with_ops = false, string name=null, Tensor grad_loss=null)
@@ -142,17 +142,17 @@ namespace Tensorflow
/// <returns>
/// An `Operation` that applies the specified gradients. If `global_step`
/// was not None, that operation also increments `global_step`.</returns>
public Operation apply_gradients(Tuple<Tensor, ResourceVariable>[] grads_and_vars, IVariableV1 global_step = null, string name = null)
public Operation apply_gradients(Tuple<Tensor, IVariableV1>[] grads_and_vars, IVariableV1 global_step = null, string name = null)
{
// No DistributionStrategy case.
var converted_grads_and_vars = new List<(Tensor, ResourceVariable, _OptimizableVariable)>();
var converted_grads_and_vars = new List<(Tensor, IVariableV1, _OptimizableVariable)>();
foreach (var (g, v) in grads_and_vars)
{
if(g != null)
{
// Convert the grad to Tensor or IndexedSlices if necessary.
var gR = ops.convert_to_tensor_or_indexed_slices(g);
var p = optimizer._get_processor(v);
var p = optimizer._get_processor(v as ResourceVariable);
converted_grads_and_vars.Add((gR, v, p));
}
}
@@ -230,7 +230,7 @@ namespace Tensorflow
/// silently ignored).
/// </summary>
/// <param name="var_list"></param>
protected virtual void _create_slots(ResourceVariable[] var_list)
protected virtual void _create_slots(IVariableV1[] var_list)
{
}
@@ -369,8 +369,8 @@ namespace Tensorflow
/// A list of (gradient, variable) pairs. Variable is always present, but
/// gradient can be `None`.
/// </returns>
public Tuple<Tensor, ResourceVariable>[] compute_gradients(Tensor loss,
List<ResourceVariable> var_list = null,
public Tuple<Tensor, IVariableV1>[] compute_gradients(Tensor loss,
List<IVariableV1> var_list = null,
int? aggregation_method = null,
GateGradientType gate_gradients = GateGradientType.GATE_OP,
bool colocate_gradients_with_ops = false,
@@ -381,26 +381,13 @@ namespace Tensorflow

if(var_list == null)
{
var vars = ops.get_collection<ResourceVariable>(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES);
var vars = ops.get_collection<IVariableV1>(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES);
var tmp = variables.trainable_variables();
switch (tmp)
{
case List<ResourceVariable> values:
var_list = values.Concat(vars).ToList();
break;
/*case List<RefVariable> values:
var_list = values.Concat(vars).ToList();
break;
case List<IVariableV1> values:
var_list = values.Select(x => x as RefVariable).Concat(vars).ToList();
break;*/
default:
throw new NotImplementedException("");
}
var_list = (tmp as List<IVariableV1>).Concat(vars).ToList();
}

var_list = var_list.Concat(ops.get_collection<ResourceVariable>(tf.GraphKeys._STREAMING_MODEL_PORTS)).ToList();
var processors = var_list.Select(v => optimizer._get_processor(v)).ToList();
var_list = var_list.Concat(ops.get_collection<IVariableV1>(tf.GraphKeys._STREAMING_MODEL_PORTS)).ToList();
var processors = var_list.Select(v => optimizer._get_processor(v as ResourceVariable)).ToList();
var var_refs = processors.Select(x => x.target()).ToArray();

var grads = gradients_impl.gradients(new Tensor[] { loss }, var_refs, grad_ys: grad_loss == null ? null : new Tensor[] { grad_loss },
@@ -412,7 +399,7 @@ namespace Tensorflow
grads = control_flow_ops.tuple(grads);

var grads_and_vars = zip(grads, var_list)
.Select(x => new Tuple<Tensor, ResourceVariable>(x.Item1, x.Item2))
.Select(x => new Tuple<Tensor, IVariableV1>(x.Item1, x.Item2))
.ToArray();

return grads_and_vars;


+ 3
- 1
src/TensorFlowNET.Core/Variables/ResourceVariable.cs View File

@@ -155,7 +155,7 @@ namespace Tensorflow
_graph_element = value;
});

ops.add_to_collections(collections, this);
ops.add_to_collections<IVariableV1>(collections, this);
}
else
{
@@ -184,6 +184,8 @@ namespace Tensorflow
var g = ops.get_default_graph();
var prepend_name_scope = ops.prepend_name_scope(variable_def.VariableName, import_scope: import_scope);
handle = g.as_graph_element(prepend_name_scope) as Tensor;
_handle_name = handle.name;
_name = handle.name;
_shape = new TensorShape(handle.op.get_attr("shape") as TensorShapeProto);
prepend_name_scope = ops.prepend_name_scope(variable_def.InitializerName, import_scope: import_scope);


Loading…
Cancel
Save