@@ -40,6 +40,8 @@ namespace Tensorflow.Gradients | |||||
return end_value_index <= dim_index ? new Tensor[] { grad, null } : new Tensor[] { null, grad }; | return end_value_index <= dim_index ? new Tensor[] { grad, null } : new Tensor[] { null, grad }; | ||||
var concat_dim = op.inputs[dim_index]; | var concat_dim = op.inputs[dim_index]; | ||||
if (end_value_index == -1) | |||||
end_value_index = op.inputs.Length - 1; | |||||
var input_values = op.inputs._inputs.Skip(start_value_index).Take(end_value_index - start_value_index).ToArray(); | var input_values = op.inputs._inputs.Skip(start_value_index).Take(end_value_index - start_value_index).ToArray(); | ||||
var out_grads = new List<Tensor>(); | var out_grads = new List<Tensor>(); | ||||
@@ -43,12 +43,6 @@ namespace Tensorflow | |||||
if (grad_ys == null) | if (grad_ys == null) | ||||
grad_ys = new Tensor[ys.Length]; | grad_ys = new Tensor[ys.Length]; | ||||
var all = new List<Tensor>(); | |||||
all.AddRange(ys); | |||||
all.AddRange(xs); | |||||
all.AddRange(stop_gradients); | |||||
all.AddRange(grad_ys); | |||||
// Iterate over the collected ops. | // Iterate over the collected ops. | ||||
/** | /** | ||||
* grads: op => list of gradients received on each output endpoint of the | * grads: op => list of gradients received on each output endpoint of the | ||||
@@ -59,7 +53,8 @@ namespace Tensorflow | |||||
**/ | **/ | ||||
var grads = new Dictionary<string, Tensor[][]>(); | var grads = new Dictionary<string, Tensor[][]>(); | ||||
with(ops.name_scope(name, "gradients", values: all), scope => | |||||
with(ops.name_scope(name, "gradients", | |||||
values: ys.Concat(xs).Concat(stop_gradients).Concat(grad_ys)), scope => | |||||
{ | { | ||||
string grad_scope = scope; | string grad_scope = scope; | ||||
// Get a uid for this call to gradients that can be used to help | // Get a uid for this call to gradients that can be used to help | ||||
@@ -166,7 +161,7 @@ namespace Tensorflow | |||||
} | } | ||||
var inputs = _NonEagerInputs(op, xs).ToList(); | var inputs = _NonEagerInputs(op, xs).ToList(); | ||||
foreach (var (t_in, in_grad) in Python.zip(inputs, in_grads)) | |||||
foreach (var (t_in, in_grad) in zip(inputs, in_grads)) | |||||
{ | { | ||||
if(in_grad != null) | if(in_grad != null) | ||||
{ | { | ||||
@@ -72,7 +72,7 @@ namespace Tensorflow | |||||
private string _graph_key; | private string _graph_key; | ||||
public string graph_key => _graph_key; | public string graph_key => _graph_key; | ||||
public string _last_loss_reduction; | public string _last_loss_reduction; | ||||
public bool _is_loss_scaled_by_optimizer { get; set; } | |||||
public Status Status { get; } | public Status Status { get; } | ||||
/// <summary> | /// <summary> | ||||
@@ -10,7 +10,15 @@ namespace Tensorflow | |||||
{ | { | ||||
public Tensor[] _inputs; | public Tensor[] _inputs; | ||||
public int Length => _inputs.Length; | public int Length => _inputs.Length; | ||||
public Tensor this[int index] => _inputs[index]; | |||||
public Tensor this[int index] | |||||
{ | |||||
get | |||||
{ | |||||
if (index == -1) | |||||
index = _inputs.Length - 1; | |||||
return _inputs[index]; | |||||
} | |||||
} | |||||
public InputList(Tensor[] inputs) | public InputList(Tensor[] inputs) | ||||
{ | { | ||||
@@ -56,6 +56,7 @@ Removed global static graph instance.</PackageReleaseNotes> | |||||
</ItemGroup> | </ItemGroup> | ||||
<ItemGroup> | <ItemGroup> | ||||
<Folder Include="Distribute\" /> | |||||
<Folder Include="Keras\Initializers\" /> | <Folder Include="Keras\Initializers\" /> | ||||
</ItemGroup> | </ItemGroup> | ||||
@@ -2,7 +2,6 @@ | |||||
using System.Collections.Generic; | using System.Collections.Generic; | ||||
using System.Linq; | using System.Linq; | ||||
using System.Text; | using System.Text; | ||||
using distribute_lib = Tensorflow.Distribute; | |||||
using static Tensorflow.Python; | using static Tensorflow.Python; | ||||
namespace Tensorflow | namespace Tensorflow | ||||
@@ -82,7 +81,8 @@ namespace Tensorflow | |||||
var grads_and_vars = compute_gradients(loss, var_list:var_list, | var grads_and_vars = compute_gradients(loss, var_list:var_list, | ||||
gate_gradients: gate_gradients, | gate_gradients: gate_gradients, | ||||
aggregation_method:aggregation_method, | aggregation_method:aggregation_method, | ||||
colocate_gradients_with_ops: colocate_gradients_with_ops); | |||||
colocate_gradients_with_ops: colocate_gradients_with_ops, | |||||
grad_loss: grad_loss); | |||||
var vars_with_grad = grads_and_vars.Where(x => x.Item1 != null).Select(x => x.Item2).ToArray(); | var vars_with_grad = grads_and_vars.Where(x => x.Item1 != null).Select(x => x.Item2).ToArray(); | ||||
if (vars_with_grad.Length == 0) | if (vars_with_grad.Length == 0) | ||||
@@ -232,30 +232,31 @@ namespace Tensorflow | |||||
int? aggregation_method = null, | int? aggregation_method = null, | ||||
GateGradientType gate_gradients = GateGradientType.GATE_OP, | GateGradientType gate_gradients = GateGradientType.GATE_OP, | ||||
bool colocate_gradients_with_ops = false, | bool colocate_gradients_with_ops = false, | ||||
Tensor[] grad_loss = null) | |||||
Tensor grad_loss = null) | |||||
{ | { | ||||
// Scale loss if using a "mean" loss reduction and multiple replicas. | |||||
loss = _scale_loss(loss); | |||||
int num_towers = 1; | int num_towers = 1; | ||||
if(distribute_lib.get_loss_reduction() == VariableAggregationType.MEAN) | |||||
{ | |||||
} | |||||
var tmp = variables.trainable_variables(); | var tmp = variables.trainable_variables(); | ||||
var vars = ops.get_collection<RefVariable>(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES); | |||||
switch (tmp) | switch (tmp) | ||||
{ | { | ||||
case List<RefVariable> values: | case List<RefVariable> values: | ||||
var_list = values; | |||||
var_list = values.Concat(vars).ToList(); | |||||
break; | break; | ||||
case List<VariableV1> values: | case List<VariableV1> values: | ||||
var_list = values.Select(x => x as RefVariable).ToList(); | |||||
var_list = values.Select(x => x as RefVariable).Concat(vars).ToList(); | |||||
break; | break; | ||||
} | } | ||||
var_list = var_list.Concat(ops.get_collection<RefVariable>(ops.GraphKeys._STREAMING_MODEL_PORTS)).ToList(); | |||||
var processors = var_list.Select(v => optimizer._get_processor(v)).ToList(); | var processors = var_list.Select(v => optimizer._get_processor(v)).ToList(); | ||||
var var_refs = processors.Select(x => x.target()).ToArray(); | var var_refs = processors.Select(x => x.target()).ToArray(); | ||||
var grads = gradients_impl.gradients(new Tensor[] { loss }, var_refs, grad_ys: grad_loss, | |||||
gate_gradients: (gate_gradients == GateGradientType.GATE_OP), | |||||
var grads = gradients_impl.gradients(new Tensor[] { loss }, var_refs, grad_ys: grad_loss == null ? null : new Tensor[] { grad_loss }, | |||||
gate_gradients: gate_gradients == GateGradientType.GATE_OP, | |||||
aggregation_method: aggregation_method, | aggregation_method: aggregation_method, | ||||
colocate_gradients_with_ops: colocate_gradients_with_ops); | colocate_gradients_with_ops: colocate_gradients_with_ops); | ||||
@@ -269,6 +270,14 @@ namespace Tensorflow | |||||
return grads_and_vars; | return grads_and_vars; | ||||
} | } | ||||
private Tensor _scale_loss(Tensor loss_value) | |||||
{ | |||||
ops.get_default_graph()._is_loss_scaled_by_optimizer = false; | |||||
// TODO | |||||
// if distribute_lib.get_loss_reduction() == ds_reduce_util.ReduceOp.MEAN: | |||||
return loss_value; | |||||
} | |||||
protected T _call_if_callable<T>(T param) | protected T _call_if_callable<T>(T param) | ||||
{ | { | ||||
return param; | return param; | ||||
@@ -22,6 +22,16 @@ namespace Tensorflow | |||||
/// </summary> | /// </summary> | ||||
public static string TRAINABLE_VARIABLES = "trainable_variables"; | public static string TRAINABLE_VARIABLES = "trainable_variables"; | ||||
/// <summary> | |||||
/// Trainable resource-style variables. | |||||
/// </summary> | |||||
public static string TRAINABLE_RESOURCE_VARIABLES = "trainable_resource_variables"; | |||||
/// <summary> | |||||
/// Key for streaming model ports. | |||||
/// </summary> | |||||
public static string _STREAMING_MODEL_PORTS = "streaming_model_ports"; | |||||
/// <summary> | /// <summary> | ||||
/// Key to collect losses | /// Key to collect losses | ||||
/// </summary> | /// </summary> | ||||
@@ -45,6 +45,11 @@ namespace Tensorflow | |||||
return get_default_graph().get_collection(key, scope); | return get_default_graph().get_collection(key, scope); | ||||
} | } | ||||
public static List<T> get_collection<T>(string key, string scope = null) | |||||
{ | |||||
return get_default_graph().get_collection<T>(key, scope); | |||||
} | |||||
public static object get_collection_ref(string key) | public static object get_collection_ref(string key) | ||||
{ | { | ||||
return get_default_graph().get_collection_ref(key); | return get_default_graph().get_collection_ref(key); | ||||
@@ -207,7 +207,7 @@ namespace TensorFlowNET.Examples | |||||
Tensor predictions = null; | Tensor predictions = null; | ||||
with(tf.name_scope("output"), delegate | with(tf.name_scope("output"), delegate | ||||
{ | { | ||||
logits = tf.layers.dense(h_pool_flat, keep_prob); | |||||
logits = tf.layers.dense(h_pool_flat, NUM_CLASS); | |||||
predictions = tf.argmax(logits, -1, output_type: tf.int32); | predictions = tf.argmax(logits, -1, output_type: tf.int32); | ||||
}); | }); | ||||
@@ -215,7 +215,8 @@ namespace TensorFlowNET.Examples | |||||
{ | { | ||||
var sscel = tf.nn.sparse_softmax_cross_entropy_with_logits(logits: logits, labels: y); | var sscel = tf.nn.sparse_softmax_cross_entropy_with_logits(logits: logits, labels: y); | ||||
var loss = tf.reduce_mean(sscel); | var loss = tf.reduce_mean(sscel); | ||||
var optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step: global_step); | |||||
var adam = tf.train.AdamOptimizer(learning_rate); | |||||
var optimizer = adam.minimize(loss, global_step: global_step); | |||||
}); | }); | ||||
with(tf.name_scope("accuracy"), delegate | with(tf.name_scope("accuracy"), delegate | ||||