Browse Source

add apply_adam, _apply_dense for Adam. #271

tags/v0.9
Oceania2018 6 years ago
parent
commit
93eb56e5a3
8 changed files with 146 additions and 110 deletions
  1. +6
    -6
      src/TensorFlowNET.Core/Clustering/_InitializeClustersOpFactory.cs
  2. +1
    -1
      src/TensorFlowNET.Core/Gradients/array_grad.cs
  3. +5
    -5
      src/TensorFlowNET.Core/Gradients/gradients_util.cs
  4. +1
    -1
      src/TensorFlowNET.Core/Gradients/nn_grad.cs
  5. +89
    -95
      src/TensorFlowNET.Core/Tensors/Tensor.cs
  6. +20
    -1
      src/TensorFlowNET.Core/Train/AdamOptimizer.cs
  7. +1
    -1
      src/TensorFlowNET.Core/Train/Optimizer.cs
  8. +23
    -0
      src/TensorFlowNET.Core/Train/gen_training_ops.py.cs

+ 6
- 6
src/TensorFlowNET.Core/Clustering/_InitializeClustersOpFactory.cs View File

@@ -47,10 +47,10 @@ namespace Tensorflow.Clustering
_cluster_centers_updated = cluster_centers_updated; _cluster_centers_updated = cluster_centers_updated;
_cluster_centers_initialized = cluster_centers_initialized; _cluster_centers_initialized = cluster_centers_initialized;


_num_selected = array_ops.shape(_cluster_centers)[0];
_num_selected = array_ops.shape(_cluster_centers).slice(0);
_num_remaining = _num_clusters - _num_selected; _num_remaining = _num_clusters - _num_selected;


_num_data = math_ops.add_n(_inputs.Select(i => array_ops.shape(i)[0]).ToArray());
_num_data = math_ops.add_n(_inputs.Select(i => array_ops.shape(i).slice(0)).ToArray());
} }


private Tensor _initialize() private Tensor _initialize()
@@ -68,7 +68,7 @@ namespace Tensorflow.Clustering
}, },
() => () =>
{ {
return control_flow_ops.no_op().output[0];
return control_flow_ops.no_op().output.slice(0);
}); });
}); });
} }
@@ -90,7 +90,7 @@ namespace Tensorflow.Clustering
// Adds some centers and returns the number of centers remaining. // Adds some centers and returns the number of centers remaining.
var new_centers = _choose_initial_centers(); var new_centers = _choose_initial_centers();
if (_distance_metric == KMeans.COSINE_DISTANCE) if (_distance_metric == KMeans.COSINE_DISTANCE)
new_centers = nn_impl.l2_normalize(new_centers[0], axis: 1);
new_centers = nn_impl.l2_normalize(new_centers.slice(0), axis: 1);


// If cluster_centers is empty, it doesn't have the right shape for concat. // If cluster_centers is empty, it doesn't have the right shape for concat.
var all_centers = control_flow_ops.cond(math_ops.equal(_num_selected, 0), var all_centers = control_flow_ops.cond(math_ops.equal(_num_selected, 0),
@@ -99,12 +99,12 @@ namespace Tensorflow.Clustering


var a = state_ops.assign(_cluster_centers, all_centers, validate_shape: false); var a = state_ops.assign(_cluster_centers, all_centers, validate_shape: false);


return _num_clusters - array_ops.shape(a)[0];
return _num_clusters - array_ops.shape(a).slice(0);
} }


private Tensor _choose_initial_centers() private Tensor _choose_initial_centers()
{ {
return _greedy_batch_sampler()[0];
return _greedy_batch_sampler().slice(0);
} }


private Tensor _greedy_batch_sampler() private Tensor _greedy_batch_sampler()


+ 1
- 1
src/TensorFlowNET.Core/Gradients/array_grad.cs View File

@@ -156,7 +156,7 @@ namespace Tensorflow.Gradients
// For axis 0 gathers, build an appropriately shaped IndexedSlices. // For axis 0 gathers, build an appropriately shaped IndexedSlices.
if((int)axis_static == 0) if((int)axis_static == 0)
{ {
var params_tail_shape = params_shape[new NumSharp.Slice(start:1)];
var params_tail_shape = params_shape.slice(new NumSharp.Slice(start:1));
var values_shape = array_ops.concat(new[] { indices_size, params_tail_shape }, 0); var values_shape = array_ops.concat(new[] { indices_size, params_tail_shape }, 0);
var values = array_ops.reshape(grad, values_shape); var values = array_ops.reshape(grad, values_shape);
indices = array_ops.reshape(indices, indices_size); indices = array_ops.reshape(indices, indices_size);


+ 5
- 5
src/TensorFlowNET.Core/Gradients/gradients_util.cs View File

@@ -105,16 +105,16 @@ namespace Tensorflow
var has_out_grads = true; var has_out_grads = true;
if (has_out_grads && !stop_ops.Contains(op)) if (has_out_grads && !stop_ops.Contains(op))
{ {
// A grad_fn must be defined, either as a function or as None
// for ops that do not have gradients.
var grad_fn = ops.get_gradient_function(op);

if (is_func_call) if (is_func_call)
{ {


} }
else else
{ {
// A grad_fn must be defined, either as a function or as None
// for ops that do not have gradients.
var grad_fn = ops.get_gradient_function(op);

foreach (var (i, out_grad) in enumerate(out_grads)) foreach (var (i, out_grad) in enumerate(out_grads))
{ {
if (out_grad == null) if (out_grad == null)
@@ -322,7 +322,7 @@ namespace Tensorflow
else else
{ {
used = "add_n"; used = "add_n";
out_grads[i] = new List<Tensor> { _MultiDeviceAddN(out_grad.ToArray(), gradient_uid) };
return_grads[i] = _MultiDeviceAddN(out_grad.ToArray(), gradient_uid);
} }
} }
else else


+ 1
- 1
src/TensorFlowNET.Core/Gradients/nn_grad.cs View File

@@ -200,7 +200,7 @@ namespace Tensorflow.Gradients


var in_lastdim = array_ops.gather(math_ops.cast(in_shape, TF_DataType.TF_INT64), var in_lastdim = array_ops.gather(math_ops.cast(in_shape, TF_DataType.TF_INT64),
array_ops.size(in_shape) - 1); array_ops.size(in_shape) - 1);
var outerdim = array_ops.shape(ind_2d)[0];
var outerdim = array_ops.shape(ind_2d).slice(0);


// Compute linear indices(flattened to 1D). // Compute linear indices(flattened to 1D).
var cast1 = math_ops.cast(outerdim, TF_DataType.TF_INT64); var cast1 = math_ops.cast(outerdim, TF_DataType.TF_INT64);


+ 89
- 95
src/TensorFlowNET.Core/Tensors/Tensor.cs View File

@@ -224,116 +224,110 @@ namespace Tensorflow
} }
} }


public Tensor this[Slice slice]
public Tensor slice(Slice slice)
{ {
get
{
var slice_spec = new int[] { slice.Start.Value };
var begin = new List<int>();
var end = new List<int>();
var strides = new List<int>();
var slice_spec = new int[] { slice.Start.Value };
var begin = new List<int>();
var end = new List<int>();
var strides = new List<int>();


var index = 0;
var (new_axis_mask, shrink_axis_mask) = (0, 0);
var (begin_mask, end_mask) = (0, 0);
var ellipsis_mask = 0;
var index = 0;
var (new_axis_mask, shrink_axis_mask) = (0, 0);
var (begin_mask, end_mask) = (0, 0);
var ellipsis_mask = 0;


foreach (var s in slice_spec)
foreach (var s in slice_spec)
{
begin.Add(s);
if (slice.Stop.HasValue)
{ {
begin.Add(s);
if(slice.Stop.HasValue)
{
end.Add(slice.Stop.Value);
}
else
{
end.Add(0);
end_mask |= (1 << index);
}
strides.Add(slice.Step);

index += 1;
end.Add(slice.Stop.Value);
} }

return with(ops.name_scope(null, "strided_slice", new { begin, end, strides }), scope =>
else
{ {
string name = scope;
if (begin != null)
{
var (packed_begin, packed_end, packed_strides) =
(array_ops.stack(begin.ToArray()),
array_ops.stack(end.ToArray()),
array_ops.stack(strides.ToArray()));

return gen_array_ops.strided_slice(
this,
packed_begin,
packed_end,
packed_strides,
begin_mask: begin_mask,
end_mask: end_mask,
shrink_axis_mask: shrink_axis_mask,
new_axis_mask: new_axis_mask,
ellipsis_mask: ellipsis_mask,

name: name);
}

throw new NotImplementedException("");
});
end.Add(0);
end_mask |= (1 << index);
}
strides.Add(slice.Step);

index += 1;
} }

return with(ops.name_scope(null, "strided_slice", new { begin, end, strides }), scope =>
{
string name = scope;
if (begin != null)
{
var (packed_begin, packed_end, packed_strides) =
(array_ops.stack(begin.ToArray()),
array_ops.stack(end.ToArray()),
array_ops.stack(strides.ToArray()));

return gen_array_ops.strided_slice(
this,
packed_begin,
packed_end,
packed_strides,
begin_mask: begin_mask,
end_mask: end_mask,
shrink_axis_mask: shrink_axis_mask,
new_axis_mask: new_axis_mask,
ellipsis_mask: ellipsis_mask,

name: name);
}

throw new NotImplementedException("");
});
} }


public Tensor this[int start]
public Tensor slice(int start)
{ {
get
{
var slice_spec = new int[] { start };
var begin = new List<int>();
var end = new List<int>();
var strides = new List<int>();
var slice_spec = new int[] { start };
var begin = new List<int>();
var end = new List<int>();
var strides = new List<int>();

var index = 0;
var (new_axis_mask, shrink_axis_mask) = (0, 0);
var (begin_mask, end_mask) = (0, 0);
var ellipsis_mask = 0;


var index = 0;
var (new_axis_mask, shrink_axis_mask) = (0, 0);
var (begin_mask, end_mask) = (0, 0);
var ellipsis_mask = 0;
foreach (var s in slice_spec)
{
begin.Add(s);
end.Add(s + 1);
strides.Add(1);
shrink_axis_mask |= (1 << index);
index += 1;
}


foreach (var s in slice_spec)
return with(ops.name_scope(null, "strided_slice", new { begin, end, strides }), scope =>
{
string name = scope;
if (begin != null)
{ {
begin.Add(s);
end.Add(s + 1);
strides.Add(1);
shrink_axis_mask |= (1 << index);
index += 1;
var (packed_begin, packed_end, packed_strides) =
(array_ops.stack(begin.ToArray()),
array_ops.stack(end.ToArray()),
array_ops.stack(strides.ToArray()));

return gen_array_ops.strided_slice(
this,
packed_begin,
packed_end,
packed_strides,
begin_mask: begin_mask,
end_mask: end_mask,
shrink_axis_mask: shrink_axis_mask,
new_axis_mask: new_axis_mask,
ellipsis_mask: ellipsis_mask,

name: name);
} }


return with(ops.name_scope(null, "strided_slice", new { begin, end, strides }), scope =>
{
string name = scope;
if (begin != null)
{
var (packed_begin, packed_end, packed_strides) =
(array_ops.stack(begin.ToArray()),
array_ops.stack(end.ToArray()),
array_ops.stack(strides.ToArray()));

return gen_array_ops.strided_slice(
this,
packed_begin,
packed_end,
packed_strides,
begin_mask: begin_mask,
end_mask: end_mask,
shrink_axis_mask: shrink_axis_mask,
new_axis_mask: new_axis_mask,
ellipsis_mask: ellipsis_mask,

name: name);
}

throw new NotImplementedException("");
});
}
throw new NotImplementedException("");
});
} }


public override string ToString() public override string ToString()


+ 20
- 1
src/TensorFlowNET.Core/Train/AdamOptimizer.cs View File

@@ -16,7 +16,7 @@ namespace Tensorflow.Train
float _beta1; float _beta1;
float _beta2; float _beta2;
float _epsilon; float _epsilon;
Tensor _lr_t, _beta1_t, _beta2_t, _epsilon_t;
Tensor _beta1_t, _beta2_t, _epsilon_t;


public AdamOptimizer(float learning_rate, float beta1 = 0.9f, float beta2 = 0.999f, float epsilon = 1e-8f, bool use_locking = false, string name = "Adam") public AdamOptimizer(float learning_rate, float beta1 = 0.9f, float beta2 = 0.999f, float epsilon = 1e-8f, bool use_locking = false, string name = "Adam")
: base(learning_rate, use_locking, name) : base(learning_rate, use_locking, name)
@@ -34,6 +34,25 @@ namespace Tensorflow.Train
}); });
} }


public override Operation _apply_dense(Tensor grad, RefVariable var)
{
var m = get_slot(var, "m");
var v = get_slot(var, "v");
var (beta1_power, beta2_power) = _get_beta_accumulators();
return gen_training_ops.apply_adam(
var,
m,
v,
math_ops.cast(beta1_power, var.dtype.as_base_dtype()),
math_ops.cast(beta2_power, var.dtype.as_base_dtype()),
math_ops.cast(_lr_t, var.dtype.as_base_dtype()),
math_ops.cast(_beta1_t, var.dtype.as_base_dtype()),
math_ops.cast(_beta2_t, var.dtype.as_base_dtype()),
math_ops.cast(_epsilon_t, var.dtype.as_base_dtype()),
grad,
use_locking: _use_locking).op;
}

private Operation _apply_sparse_shared(Tensor grad, RefVariable var, Tensor indices, Func<RefVariable, Tensor, Tensor, Tensor> scatter_add) private Operation _apply_sparse_shared(Tensor grad, RefVariable var, Tensor indices, Func<RefVariable, Tensor, Tensor, Tensor> scatter_add)
{ {
var (beta1_power_v, beta2_power_v) = _get_beta_accumulators(); var (beta1_power_v, beta2_power_v) = _get_beta_accumulators();


+ 1
- 1
src/TensorFlowNET.Core/Train/Optimizer.cs View File

@@ -272,7 +272,7 @@ namespace Tensorflow
public virtual (Tensor, Tensor) _deduplicate_indexed_slices(Tensor values, Tensor indices) public virtual (Tensor, Tensor) _deduplicate_indexed_slices(Tensor values, Tensor indices)
{ {
var (unique_indices, new_index_positions) = array_ops.unique(indices); var (unique_indices, new_index_positions) = array_ops.unique(indices);
var shape = array_ops.shape(unique_indices)[0];
var shape = array_ops.shape(unique_indices).slice(0);
var summed_values = math_ops.unsorted_segment_sum(values, new_index_positions, shape); var summed_values = math_ops.unsorted_segment_sum(values, new_index_positions, shape);
return (summed_values, unique_indices); return (summed_values, unique_indices);
} }


+ 23
- 0
src/TensorFlowNET.Core/Train/gen_training_ops.py.cs View File

@@ -8,6 +8,29 @@ namespace Tensorflow
{ {
public static OpDefLibrary _op_def_lib = new OpDefLibrary(); public static OpDefLibrary _op_def_lib = new OpDefLibrary();


public static Tensor apply_adam(RefVariable var, RefVariable m, RefVariable v, Tensor beta1_power, Tensor beta2_power,
Tensor lr, Tensor beta1, Tensor beta2, Tensor epsilon, Tensor grad,
bool use_locking = false, bool use_nesterov = false, string name = null)
{
var _op = _op_def_lib._apply_op_helper("ApplyAdam", name, new
{
var,
m,
v,
beta1_power,
beta2_power,
lr,
beta1,
beta2,
epsilon,
grad,
use_locking,
use_nesterov
});

return _op.outputs[0];
}

public static Tensor apply_gradient_descent(RefVariable var, Tensor alpha, Tensor delta, bool use_locking = false, string name = null) public static Tensor apply_gradient_descent(RefVariable var, Tensor alpha, Tensor delta, bool use_locking = false, string name = null)
{ {
var _op = _op_def_lib._apply_op_helper("ApplyGradientDescent", name, new var _op = _op_def_lib._apply_op_helper("ApplyGradientDescent", name, new


Loading…
Cancel
Save