Browse Source

Merge remote-tracking branch 'origin/Makina'

tags/v0.9
Arnav Das 6 years ago
parent
commit
baaff630c4
51 changed files with 1344 additions and 625 deletions
  1. BIN
      data/dbpedia_subset.zip
  2. BIN
      data/lstm_crf_ner.zip
  3. BIN
      graph/lstm_crf_ner.meta
  4. BIN
      graph/vd_cnn.meta
  5. +73
    -8
      src/TensorFlowNET.Core/Estimator/HyperParams.cs
  6. +12
    -3
      src/TensorFlowNET.Core/Framework/meta_graph.py.cs
  7. +16
    -0
      src/TensorFlowNET.Core/Gradients/math_grad.cs
  8. +2
    -0
      src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs
  9. +8
    -1
      src/TensorFlowNET.Core/Graphs/Graph.cs
  10. +2
    -0
      src/TensorFlowNET.Core/Operations/ControlFlows/ControlFlowContext.cs
  11. +66
    -4
      src/TensorFlowNET.Core/Operations/ControlFlows/WhileContext.cs
  12. +7
    -0
      src/TensorFlowNET.Core/Operations/gen_math_ops.cs
  13. +3
    -0
      src/TensorFlowNET.Core/Operations/math_ops.cs
  14. +19
    -5
      src/TensorFlowNET.Core/Sessions/BaseSession.cs
  15. +11
    -0
      src/TensorFlowNET.Core/Sessions/FeedDict.cs
  16. +1
    -1
      src/TensorFlowNET.Core/TensorFlowNET.Core.csproj
  17. +4
    -0
      src/TensorFlowNET.Core/Tensors/Tensor.Creation.cs
  18. +4
    -0
      src/TensorFlowNET.Core/Tensors/Tensor.cs
  19. +3
    -0
      src/TensorFlowNET.Core/Train/Saving/BaseSaverBuilder.cs
  20. +1
    -0
      test/TensorFlowNET.Examples/AudioProcess/README.md
  21. +63
    -0
      test/TensorFlowNET.Examples/ImageProcess/ImageBackgroundRemoval.cs
  22. +0
    -0
      test/TensorFlowNET.Examples/ImageProcess/ImageRecognitionInception.cs
  23. +0
    -0
      test/TensorFlowNET.Examples/ImageProcess/InceptionArchGoogLeNet.cs
  24. +0
    -0
      test/TensorFlowNET.Examples/ImageProcess/ObjectDetection.cs
  25. +0
    -43
      test/TensorFlowNET.Examples/MetaGraph.cs
  26. +0
    -0
      test/TensorFlowNET.Examples/Models/KMeansClustering.cs
  27. +0
    -0
      test/TensorFlowNET.Examples/Models/LinearRegression.cs
  28. +0
    -0
      test/TensorFlowNET.Examples/Models/LogisticRegression.cs
  29. +0
    -0
      test/TensorFlowNET.Examples/Models/NaiveBayesClassifier.cs
  30. +0
    -0
      test/TensorFlowNET.Examples/Models/NearestNeighbor.cs
  31. +156
    -156
      test/TensorFlowNET.Examples/Models/NeuralNetXor.cs
  32. +1
    -0
      test/TensorFlowNET.Examples/Program.cs
  33. +0
    -94
      test/TensorFlowNET.Examples/Text/DataHelpers.cs
  34. +0
    -71
      test/TensorFlowNET.Examples/Text/NER/BiLstmCrfNer.cs
  35. +0
    -179
      test/TensorFlowNET.Examples/Text/TextClassificationTrain.cs
  36. +0
    -0
      test/TensorFlowNET.Examples/TextProcess/BinaryTextClassification.cs
  37. +163
    -0
      test/TensorFlowNET.Examples/TextProcess/DataHelpers.cs
  38. +39
    -0
      test/TensorFlowNET.Examples/TextProcess/NER/BiLstmCrfNer.cs
  39. +0
    -0
      test/TensorFlowNET.Examples/TextProcess/NER/CRF.cs
  40. +212
    -0
      test/TensorFlowNET.Examples/TextProcess/NER/LstmCrfNer.cs
  41. +0
    -0
      test/TensorFlowNET.Examples/TextProcess/NamedEntityRecognition.cs
  42. +289
    -0
      test/TensorFlowNET.Examples/TextProcess/TextClassificationTrain.cs
  43. +0
    -0
      test/TensorFlowNET.Examples/TextProcess/Word2Vec.cs
  44. +14
    -14
      test/TensorFlowNET.Examples/TextProcess/cnn_models/ITextClassificationModel.cs
  45. +0
    -0
      test/TensorFlowNET.Examples/TextProcess/cnn_models/VdCnn.cs
  46. +22
    -0
      test/TensorFlowNET.Examples/Utility/ArrayShuffling.cs
  47. +108
    -0
      test/TensorFlowNET.Examples/Utility/CoNLLDataset.cs
  48. +28
    -35
      test/TensorFlowNET.Examples/Utility/PbtxtParser.cs
  49. +0
    -8
      test/TensorFlowNET.UnitTest/ExamplesTests/ExamplesTest.cs
  50. +15
    -1
      test/TensorFlowNET.UnitTest/GraphTest.cs
  51. +2
    -2
      test/TensorFlowNET.UnitTest/control_flow_ops_test/WhileContextTestCase.cs

BIN
data/dbpedia_subset.zip View File


BIN
data/lstm_crf_ner.zip View File


BIN
graph/lstm_crf_ner.meta View File


BIN
graph/vd_cnn_untrained.meta → graph/vd_cnn.meta View File


+ 73
- 8
src/TensorFlowNET.Core/Estimator/HyperParams.cs View File

@@ -1,27 +1,92 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.IO;
using System.Text; using System.Text;


namespace Tensorflow.Estimator namespace Tensorflow.Estimator
{ {
public class HyperParams public class HyperParams
{ {
public string data_dir { get; set; }
public string result_dir { get; set; }
public string model_dir { get; set; }
public string eval_dir { get; set; }
/// <summary>
/// root dir
/// </summary>
public string data_root_dir { get; set; }

/// <summary>
/// results dir
/// </summary>
public string result_dir { get; set; } = "results";

/// <summary>
/// model dir
/// </summary>
public string model_dir { get; set; } = "model";

public string eval_dir { get; set; } = "eval";

public string test_dir { get; set; } = "test";


public int dim { get; set; } = 300; public int dim { get; set; } = 300;
public float dropout { get; set; } = 0.5f; public float dropout { get; set; } = 0.5f;
public int num_oov_buckets { get; set; } = 1; public int num_oov_buckets { get; set; } = 1;
public int epochs { get; set; } = 25; public int epochs { get; set; } = 25;
public int epoch_no_imprv { get; set; } = 3;
public int batch_size { get; set; } = 20; public int batch_size { get; set; } = 20;
public int buffer { get; set; } = 15000; public int buffer { get; set; } = 15000;
public int lstm_size { get; set; } = 100; public int lstm_size { get; set; } = 100;
public string lr_method { get; set; } = "adam";
public float lr { get; set; } = 0.001f;
public float lr_decay { get; set; } = 0.9f;

/// <summary>
/// lstm on chars
/// </summary>
public int hidden_size_char { get; set; } = 100;

/// <summary>
/// lstm on word embeddings
/// </summary>
public int hidden_size_lstm { get; set; } = 300;

/// <summary>
/// is clipping
/// </summary>
public bool clip { get; set; } = false;

public string filepath_dev { get; set; }
public string filepath_test { get; set; }
public string filepath_train { get; set; }

public string filepath_words { get; set; }
public string filepath_chars { get; set; }
public string filepath_tags { get; set; }
public string filepath_glove { get; set; }

public HyperParams(string dataDir)
{
data_root_dir = dataDir;

if (string.IsNullOrEmpty(data_root_dir))
throw new ValueError("Please specifiy the root data directory");

if (!Directory.Exists(data_root_dir))
Directory.CreateDirectory(data_root_dir);

result_dir = Path.Combine(data_root_dir, result_dir);
if (!Directory.Exists(result_dir))
Directory.CreateDirectory(result_dir);

model_dir = Path.Combine(result_dir, model_dir);
if (!Directory.Exists(model_dir))
Directory.CreateDirectory(model_dir);

test_dir = Path.Combine(result_dir, test_dir);
if (!Directory.Exists(test_dir))
Directory.CreateDirectory(test_dir);


public string words { get; set; }
public string chars { get; set; }
public string tags { get; set; }
public string glove { get; set; }
eval_dir = Path.Combine(result_dir, eval_dir);
if (!Directory.Exists(eval_dir))
Directory.CreateDirectory(eval_dir);
}
} }
} }

+ 12
- 3
src/TensorFlowNET.Core/Framework/meta_graph.py.cs View File

@@ -101,9 +101,18 @@ namespace Tensorflow
switch (col.Key) switch (col.Key)
{ {
case "cond_context": case "cond_context":
var proto = CondContextDef.Parser.ParseFrom(value);
var condContext = new CondContext().from_proto(proto, import_scope);
graph.add_to_collection(col.Key, condContext);
{
var proto = CondContextDef.Parser.ParseFrom(value);
var condContext = new CondContext().from_proto(proto, import_scope);
graph.add_to_collection(col.Key, condContext);
}
break;
case "while_context":
{
var proto = WhileContextDef.Parser.ParseFrom(value);
var whileContext = new WhileContext().from_proto(proto, import_scope);
graph.add_to_collection(col.Key, whileContext);
}
break; break;
default: default:
throw new NotImplementedException("import_scoped_meta_graph_with_return_elements"); throw new NotImplementedException("import_scoped_meta_graph_with_return_elements");


+ 16
- 0
src/TensorFlowNET.Core/Gradients/math_grad.cs View File

@@ -32,6 +32,22 @@ namespace Tensorflow.Gradients
return new Tensor[] { r1, r2 }; return new Tensor[] { r1, r2 };
} }


/// <summary>
/// Returns grad * exp(x).
/// </summary>
/// <param name="op"></param>
/// <param name="grads"></param>
/// <returns></returns>
public static Tensor[] _ExpGrad(Operation op, Tensor[] grads)
{
var grad = grads[0];
var y = op.outputs[0]; // y = e^x
return with(ops.control_dependencies(new Operation[] { grad }), dp => {
y = math_ops.conj(y);
return new Tensor[] { math_ops.mul_no_nan(y, grad) };
});
}

public static Tensor[] _IdGrad(Operation op, Tensor[] grads) public static Tensor[] _IdGrad(Operation op, Tensor[] grads)
{ {
return new Tensor[] { grads[0] }; return new Tensor[] { grads[0] };


+ 2
- 0
src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs View File

@@ -22,6 +22,8 @@ namespace Tensorflow
return math_grad._AddGrad(oper, out_grads); return math_grad._AddGrad(oper, out_grads);
case "BiasAdd": case "BiasAdd":
return nn_grad._BiasAddGrad(oper, out_grads); return nn_grad._BiasAddGrad(oper, out_grads);
case "Exp":
return math_grad._ExpGrad(oper, out_grads);
case "Identity": case "Identity":
return math_grad._IdGrad(oper, out_grads); return math_grad._IdGrad(oper, out_grads);
case "Log": case "Log":


+ 8
- 1
src/TensorFlowNET.Core/Graphs/Graph.cs View File

@@ -160,7 +160,14 @@ namespace Tensorflow
} }
else if (!name.Contains(":") & !allow_operation) else if (!name.Contains(":") & !allow_operation)
{ {
throw new NotImplementedException("_as_graph_element_locked");
// Looks like an Operation name but can't be an Operation.
if (_nodes_by_name.ContainsKey(name))
// Yep, it's an Operation name
throw new ValueError($"The name {name} refers to an Operation, not a {types_str}.");
else
throw new ValueError(
$"The name {name} looks like an (invalid) Operation name, not a {types_str}" +
" Tensor names must be of the form \"<op_name>:<output_index>\".");
} }
} }




+ 2
- 0
src/TensorFlowNET.Core/Operations/ControlFlows/ControlFlowContext.cs View File

@@ -198,6 +198,8 @@ namespace Tensorflow.Operations
{ {
case CtxtOneofCase.CondCtxt: case CtxtOneofCase.CondCtxt:
return new CondContext().from_proto(context_def.CondCtxt, import_scope: import_scope); return new CondContext().from_proto(context_def.CondCtxt, import_scope: import_scope);
case CtxtOneofCase.WhileCtxt:
return new WhileContext().from_proto(context_def.WhileCtxt, import_scope: import_scope);
} }


throw new NotImplementedException($"Unknown ControlFlowContextDef field: {context_def.CtxtCase}"); throw new NotImplementedException($"Unknown ControlFlowContextDef field: {context_def.CtxtCase}");


+ 66
- 4
src/TensorFlowNET.Core/Operations/ControlFlows/WhileContext.cs View File

@@ -2,14 +2,70 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.Text; using System.Text;
using Tensorflow.Operations.ControlFlows; using Tensorflow.Operations.ControlFlows;
using static Tensorflow.Python;


namespace Tensorflow.Operations namespace Tensorflow.Operations
{ {
/// <summary>
/// Creates a `WhileContext`.
/// </summary>
public class WhileContext : ControlFlowContext public class WhileContext : ControlFlowContext
{ {
private bool _back_prop=true;
bool _back_prop=true;
GradLoopState _grad_state =null;
Tensor _maximum_iterations;
int _parallel_iterations;
bool _swap_memory;
Tensor _pivot_for_pred;
Tensor _pivot_for_body;
Tensor[] _loop_exits;
Tensor[] _loop_enters;


private GradLoopState _grad_state =null;
public WhileContext(int parallel_iterations = 10,
bool back_prop = true,
bool swap_memory = false,
string name = "while_context",
GradLoopState grad_state = null,
WhileContextDef context_def = null,
string import_scope = null)
{
if (context_def != null)
{
_init_from_proto(context_def, import_scope: import_scope);
}
else
{
}

_grad_state = grad_state;
}

private void _init_from_proto(WhileContextDef context_def, string import_scope = null)
{
var g = ops.get_default_graph();
_name = ops.prepend_name_scope(context_def.ContextName, import_scope);
if (!string.IsNullOrEmpty(context_def.MaximumIterationsName))
_maximum_iterations = g.as_graph_element(ops.prepend_name_scope(context_def.MaximumIterationsName, import_scope)) as Tensor;
_parallel_iterations = context_def.ParallelIterations;
_back_prop = context_def.BackProp;
_swap_memory = context_def.SwapMemory;
_pivot_for_pred = g.as_graph_element(ops.prepend_name_scope(context_def.PivotForPredName, import_scope)) as Tensor;
// We use this node to control constants created by the body lambda.
_pivot_for_body = g.as_graph_element(ops.prepend_name_scope(context_def.PivotForBodyName, import_scope)) as Tensor;
// The boolean tensor for loop termination condition.
_pivot = g.as_graph_element(ops.prepend_name_scope(context_def.PivotName, import_scope)) as Tensor;
// The list of exit tensors for loop variables.
_loop_exits = new Tensor[context_def.LoopExitNames.Count];
foreach (var (i, exit_name) in enumerate(context_def.LoopExitNames))
_loop_exits[i] = g.as_graph_element(ops.prepend_name_scope(exit_name, import_scope)) as Tensor;
// The list of enter tensors for loop variables.
_loop_enters = new Tensor[context_def.LoopEnterNames.Count];
foreach (var (i, enter_name) in enumerate(context_def.LoopEnterNames))
_loop_enters[i] = g.as_graph_element(ops.prepend_name_scope(enter_name, import_scope)) as Tensor;

__init__(values_def: context_def.ValuesDef, import_scope: import_scope);
}


public override WhileContext GetWhileContext() public override WhileContext GetWhileContext()
{ {
@@ -21,9 +77,15 @@ namespace Tensorflow.Operations


public override bool back_prop => _back_prop; public override bool back_prop => _back_prop;


public static WhileContext from_proto(object proto)
public WhileContext from_proto(WhileContextDef proto, string import_scope)
{ {
throw new NotImplementedException();
var ret = new WhileContext(context_def: proto, import_scope: import_scope);

ret.Enter();
foreach (var nested_def in proto.NestedContexts)
from_control_flow_context_def(nested_def, import_scope: import_scope);
ret.Exit();
return ret;
} }


public object to_proto() public object to_proto()


+ 7
- 0
src/TensorFlowNET.Core/Operations/gen_math_ops.cs View File

@@ -352,6 +352,13 @@ namespace Tensorflow
return _op.outputs[0]; return _op.outputs[0];
} }
public static Tensor mul_no_nan<Tx, Ty>(Tx x, Ty y, string name = null)
{
var _op = _op_def_lib._apply_op_helper("MulNoNan", name, args: new { x, y });
return _op.outputs[0];
}
public static Tensor real_div(Tensor x, Tensor y, string name = null) public static Tensor real_div(Tensor x, Tensor y, string name = null)
{ {
var _op = _op_def_lib._apply_op_helper("RealDiv", name, args: new { x, y }); var _op = _op_def_lib._apply_op_helper("RealDiv", name, args: new { x, y });


+ 3
- 0
src/TensorFlowNET.Core/Operations/math_ops.cs View File

@@ -71,6 +71,9 @@ namespace Tensorflow
public static Tensor multiply(Tensor x, Tensor y, string name = null) public static Tensor multiply(Tensor x, Tensor y, string name = null)
=> gen_math_ops.mul(x, y, name: name); => gen_math_ops.mul(x, y, name: name);


public static Tensor mul_no_nan(Tensor x, Tensor y, string name = null)
=> gen_math_ops.mul_no_nan(x, y, name: name);

/// <summary> /// <summary>
/// Computes the mean of elements across dimensions of a tensor. /// Computes the mean of elements across dimensions of a tensor.
/// Reduces `input_tensor` along the dimensions given in `axis`. /// Reduces `input_tensor` along the dimensions given in `axis`.


+ 19
- 5
src/TensorFlowNET.Core/Sessions/BaseSession.cs View File

@@ -1,5 +1,6 @@
using NumSharp; using NumSharp;
using System; using System;
using System.Collections;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
@@ -18,7 +19,7 @@ namespace Tensorflow


public BaseSession(string target = "", Graph graph = null) public BaseSession(string target = "", Graph graph = null)
{ {
if(graph is null)
if (graph is null)
{ {
_graph = ops.get_default_graph(); _graph = ops.get_default_graph();
} }
@@ -40,6 +41,13 @@ namespace Tensorflow
return _run(fetches, feed_dict); return _run(fetches, feed_dict);
} }


public virtual NDArray run(object fetches, Hashtable feed_dict = null)
{
var feed_items = feed_dict == null ? new FeedItem[0] :
feed_dict.Keys.OfType<object>().Select(key => new FeedItem(key, feed_dict[key])).ToArray();
return _run(fetches, feed_items);
}

private NDArray _run(object fetches, FeedItem[] feed_dict = null) private NDArray _run(object fetches, FeedItem[] feed_dict = null)
{ {
var feed_dict_tensor = new Dictionary<object, object>(); var feed_dict_tensor = new Dictionary<object, object>();
@@ -89,11 +97,17 @@ namespace Tensorflow
case byte[] val: case byte[] val:
feed_dict_tensor[subfeed_t] = (NDArray)val; feed_dict_tensor[subfeed_t] = (NDArray)val;
break; break;
case bool val:
feed_dict_tensor[subfeed_t] = (NDArray)val;
break;
case bool[] val:
feed_dict_tensor[subfeed_t] = (NDArray)val;
break;
default: default:
Console.WriteLine($"can't handle data type of subfeed_val"); Console.WriteLine($"can't handle data type of subfeed_val");
throw new NotImplementedException("_run subfeed"); throw new NotImplementedException("_run subfeed");
}
}
feed_map[subfeed_t.name] = (subfeed_t, subfeed_val); feed_map[subfeed_t.name] = (subfeed_t, subfeed_val);
} }
} }
@@ -132,9 +146,9 @@ namespace Tensorflow
/// </returns> /// </returns>
private NDArray[] _do_run(List<Operation> target_list, List<Tensor> fetch_list, Dictionary<object, object> feed_dict) private NDArray[] _do_run(List<Operation> target_list, List<Tensor> fetch_list, Dictionary<object, object> feed_dict)
{ {
var feeds = feed_dict.Select(x =>
var feeds = feed_dict.Select(x =>
{ {
if(x.Key is Tensor tensor)
if (x.Key is Tensor tensor)
{ {
switch (x.Value) switch (x.Value)
{ {


+ 11
- 0
src/TensorFlowNET.Core/Sessions/FeedDict.cs View File

@@ -0,0 +1,11 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
namespace Tensorflow.Sessions
{
public class FeedDict : Hashtable
{
}
}

+ 1
- 1
src/TensorFlowNET.Core/TensorFlowNET.Core.csproj View File

@@ -49,7 +49,7 @@ Add Word2Vec example.</PackageReleaseNotes>


<ItemGroup> <ItemGroup>
<PackageReference Include="Google.Protobuf" Version="3.7.0" /> <PackageReference Include="Google.Protobuf" Version="3.7.0" />
<PackageReference Include="NumSharp" Version="0.10.0" />
<PackageReference Include="NumSharp" Version="0.10.1" />
</ItemGroup> </ItemGroup>


<ItemGroup> <ItemGroup>


+ 4
- 0
src/TensorFlowNET.Core/Tensors/Tensor.Creation.cs View File

@@ -55,6 +55,10 @@ namespace Tensorflow
var nd1 = nd.ravel(); var nd1 = nd.ravel();
switch (nd.dtype.Name) switch (nd.dtype.Name)
{ {
case "Boolean":
var boolVals = Array.ConvertAll(nd1.Data<bool>(), x => Convert.ToByte(x));
Marshal.Copy(boolVals, 0, dotHandle, nd.size);
break;
case "Int16": case "Int16":
Marshal.Copy(nd1.Data<short>(), 0, dotHandle, nd.size); Marshal.Copy(nd1.Data<short>(), 0, dotHandle, nd.size);
break; break;


+ 4
- 0
src/TensorFlowNET.Core/Tensors/Tensor.cs View File

@@ -191,6 +191,8 @@ namespace Tensorflow
return TF_DataType.TF_INT16; return TF_DataType.TF_INT16;
case "Int32": case "Int32":
return TF_DataType.TF_INT32; return TF_DataType.TF_INT32;
case "Int64":
return TF_DataType.TF_INT64;
case "Single": case "Single":
return TF_DataType.TF_FLOAT; return TF_DataType.TF_FLOAT;
case "Double": case "Double":
@@ -199,6 +201,8 @@ namespace Tensorflow
return TF_DataType.TF_UINT8; return TF_DataType.TF_UINT8;
case "String": case "String":
return TF_DataType.TF_STRING; return TF_DataType.TF_STRING;
case "Boolean":
return TF_DataType.TF_BOOL;
default: default:
throw new NotImplementedException("ToTFDataType error"); throw new NotImplementedException("ToTFDataType error");
} }


+ 3
- 0
src/TensorFlowNET.Core/Train/Saving/BaseSaverBuilder.cs View File

@@ -120,6 +120,9 @@ namespace Tensorflow
case List<CondContext> values: case List<CondContext> values:
foreach (var element in values) ; foreach (var element in values) ;
break; break;
case List<WhileContext> values:
foreach (var element in values) ;
break;
default: default:
throw new NotImplementedException("_build_internal.check_collection_list"); throw new NotImplementedException("_build_internal.check_collection_list");
} }


+ 1
- 0
test/TensorFlowNET.Examples/AudioProcess/README.md View File

@@ -0,0 +1 @@


+ 63
- 0
test/TensorFlowNET.Examples/ImageProcess/ImageBackgroundRemoval.cs View File

@@ -0,0 +1,63 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Tensorflow;
using TensorFlowNET.Examples.Utility;
using static Tensorflow.Python;

namespace TensorFlowNET.Examples.ImageProcess
{
/// <summary>
/// This example removes the background from an input image.
///
/// https://github.com/susheelsk/image-background-removal
/// </summary>
public class ImageBackgroundRemoval : IExample
{
public int Priority => 15;

public bool Enabled { get; set; } = true;
public bool ImportGraph { get; set; } = true;

public string Name => "Image Background Removal";

string dataDir = "deeplabv3";
string modelDir = "deeplabv3_mnv2_pascal_train_aug";
string modelName = "frozen_inference_graph.pb";

public bool Run()
{
PrepareData();

// import GraphDef from pb file
var graph = new Graph().as_default();
graph.Import(Path.Join(dataDir, modelDir, modelName));

Tensor output = graph.OperationByName("SemanticPredictions");

with(tf.Session(graph), sess =>
{
// Runs inference on a single image.
sess.run(output, new FeedItem(output, "[np.asarray(resized_image)]"));
});

return false;
}

public void PrepareData()
{
// get mobile_net_model file
string fileName = "deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz";
string url = $"http://download.tensorflow.org/models/{fileName}";
Web.Download(url, dataDir, fileName);
Compress.ExtractTGZ(Path.Join(dataDir, fileName), dataDir);

// xception_model, better accuracy
/*fileName = "deeplabv3_pascal_train_aug_2018_01_04.tar.gz";
url = $"http://download.tensorflow.org/models/{fileName}";
Web.Download(url, modelDir, fileName);
Compress.ExtractTGZ(Path.Join(modelDir, fileName), modelDir);*/
}
}
}

test/TensorFlowNET.Examples/ImageRecognitionInception.cs → test/TensorFlowNET.Examples/ImageProcess/ImageRecognitionInception.cs View File


test/TensorFlowNET.Examples/InceptionArchGoogLeNet.cs → test/TensorFlowNET.Examples/ImageProcess/InceptionArchGoogLeNet.cs View File


test/TensorFlowNET.Examples/ObjectDetection.cs → test/TensorFlowNET.Examples/ImageProcess/ObjectDetection.cs View File


+ 0
- 43
test/TensorFlowNET.Examples/MetaGraph.cs View File

@@ -1,43 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using Tensorflow;
using static Tensorflow.Python;

namespace TensorFlowNET.Examples
{
public class MetaGraph : IExample
{
public int Priority => 100;
public bool Enabled { get; set; } = false;
public string Name => "Meta Graph";
public bool ImportGraph { get; set; } = true;


public bool Run()
{
ImportMetaGraph("my-save-dir/");
return false;
}

private void ImportMetaGraph(string dir)
{
with(tf.Session(), sess =>
{
var new_saver = tf.train.import_meta_graph(dir + "my-model-10000.meta");
new_saver.restore(sess, dir + "my-model-10000");
var labels = tf.constant(0, dtype: tf.int32, shape: new int[] { 100 }, name: "labels");
var batch_size = tf.size(labels);
var logits = (tf.get_collection("logits") as List<ITensorOrOperation>)[0] as Tensor;
var loss = tf.losses.sparse_softmax_cross_entropy(labels: labels,
logits: logits);
});
}

public void PrepareData()
{
}
}
}

test/TensorFlowNET.Examples/KMeansClustering.cs → test/TensorFlowNET.Examples/Models/KMeansClustering.cs View File


test/TensorFlowNET.Examples/LinearRegression.cs → test/TensorFlowNET.Examples/Models/LinearRegression.cs View File


test/TensorFlowNET.Examples/LogisticRegression.cs → test/TensorFlowNET.Examples/Models/LogisticRegression.cs View File


test/TensorFlowNET.Examples/NaiveBayesClassifier.cs → test/TensorFlowNET.Examples/Models/NaiveBayesClassifier.cs View File


test/TensorFlowNET.Examples/NearestNeighbor.cs → test/TensorFlowNET.Examples/Models/NearestNeighbor.cs View File


test/TensorFlowNET.Examples/NeuralNetXor.cs → test/TensorFlowNET.Examples/Models/NeuralNetXor.cs View File

@@ -1,156 +1,156 @@
using System;
using System.Collections.Generic;
using System.Text;
using NumSharp;
using Tensorflow;
using TensorFlowNET.Examples.Utility;
using static Tensorflow.Python;
namespace TensorFlowNET.Examples
{
/// <summary>
/// Simple vanilla neural net solving the famous XOR problem
/// https://github.com/amygdala/tensorflow-workshop/blob/master/workshop_sections/getting_started/xor/README.md
/// </summary>
public class NeuralNetXor : IExample
{
public int Priority => 10;
public bool Enabled { get; set; } = true;
public string Name => "NN XOR";
public bool ImportGraph { get; set; } = false;
public int num_steps = 10000;
private NDArray data;
private (Operation, Tensor, Tensor) make_graph(Tensor features,Tensor labels, int num_hidden = 8)
{
var stddev = 1 / Math.Sqrt(2);
var hidden_weights = tf.Variable(tf.truncated_normal(new int []{2, num_hidden}, seed:1, stddev: (float) stddev ));
// Shape [4, num_hidden]
var hidden_activations = tf.nn.relu(tf.matmul(features, hidden_weights));
var output_weights = tf.Variable(tf.truncated_normal(
new[] {num_hidden, 1},
seed: 17,
stddev: (float) (1 / Math.Sqrt(num_hidden))
));
// Shape [4, 1]
var logits = tf.matmul(hidden_activations, output_weights);
// Shape [4]
var predictions = tf.sigmoid(tf.squeeze(logits));
var loss = tf.reduce_mean(tf.square(predictions - tf.cast(labels, tf.float32)), name:"loss");
var gs = tf.Variable(0, trainable: false, name: "global_step");
var train_op = tf.train.GradientDescentOptimizer(0.2f).minimize(loss, global_step: gs);
return (train_op, loss, gs);
}
public bool Run()
{
PrepareData();
float loss_value = 0;
if (ImportGraph)
loss_value = RunWithImportedGraph();
else
loss_value = RunWithBuiltGraph();
return loss_value < 0.0628;
}
private float RunWithImportedGraph()
{
var graph = tf.Graph().as_default();
tf.train.import_meta_graph("graph/xor.meta");
Tensor features = graph.get_operation_by_name("Placeholder");
Tensor labels = graph.get_operation_by_name("Placeholder_1");
Tensor loss = graph.get_operation_by_name("loss");
Tensor train_op = graph.get_operation_by_name("train_op");
Tensor global_step = graph.get_operation_by_name("global_step");
var init = tf.global_variables_initializer();
float loss_value = 0;
// Start tf session
with(tf.Session(graph), sess =>
{
sess.run(init);
var step = 0;
var y_ = np.array(new int[] { 1, 0, 0, 1 }, dtype: np.int32);
while (step < num_steps)
{
// original python:
//_, step, loss_value = sess.run(
// [train_op, gs, loss],
// feed_dict={features: xy, labels: y_}
// )
var result = sess.run(new ITensorOrOperation[] { train_op, global_step, loss }, new FeedItem(features, data), new FeedItem(labels, y_));
loss_value = result[2];
step = result[1];
if (step % 1000 == 0)
Console.WriteLine($"Step {step} loss: {loss_value}");
}
Console.WriteLine($"Final loss: {loss_value}");
});
return loss_value;
}
private float RunWithBuiltGraph()
{
var graph = tf.Graph().as_default();
var features = tf.placeholder(tf.float32, new TensorShape(4, 2));
var labels = tf.placeholder(tf.int32, new TensorShape(4));
var (train_op, loss, gs) = make_graph(features, labels);
var init = tf.global_variables_initializer();
float loss_value = 0;
// Start tf session
with(tf.Session(graph), sess =>
{
sess.run(init);
var step = 0;
var y_ = np.array(new int[] { 1, 0, 0, 1 }, dtype: np.int32);
while (step < num_steps)
{
var result = sess.run(new ITensorOrOperation[] { train_op, gs, loss }, new FeedItem(features, data), new FeedItem(labels, y_));
loss_value = result[2];
step = result[1];
if (step % 1000 == 0)
Console.WriteLine($"Step {step} loss: {loss_value}");
}
Console.WriteLine($"Final loss: {loss_value}");
});
return loss_value;
}
public void PrepareData()
{
data = new float[,]
{
{1, 0 },
{1, 1 },
{0, 0 },
{0, 1 }
};
if (ImportGraph)
{
// download graph meta data
string url = "https://raw.githubusercontent.com/SciSharp/TensorFlow.NET/master/graph/xor.meta";
Web.Download(url, "graph", "xor.meta");
}
}
}
}
using System;
using System.Collections.Generic;
using System.Text;
using NumSharp;
using Tensorflow;
using TensorFlowNET.Examples.Utility;
using static Tensorflow.Python;
namespace TensorFlowNET.Examples
{
/// <summary>
/// Simple vanilla neural net solving the famous XOR problem
/// https://github.com/amygdala/tensorflow-workshop/blob/master/workshop_sections/getting_started/xor/README.md
/// </summary>
public class NeuralNetXor : IExample
{
public int Priority => 10;
public bool Enabled { get; set; } = true;
public string Name => "NN XOR";
public bool ImportGraph { get; set; } = false;
public int num_steps = 10000;
private NDArray data;
private (Operation, Tensor, Tensor) make_graph(Tensor features,Tensor labels, int num_hidden = 8)
{
var stddev = 1 / Math.Sqrt(2);
var hidden_weights = tf.Variable(tf.truncated_normal(new int []{2, num_hidden}, seed:1, stddev: (float) stddev ));
// Shape [4, num_hidden]
var hidden_activations = tf.nn.relu(tf.matmul(features, hidden_weights));
var output_weights = tf.Variable(tf.truncated_normal(
new[] {num_hidden, 1},
seed: 17,
stddev: (float) (1 / Math.Sqrt(num_hidden))
));
// Shape [4, 1]
var logits = tf.matmul(hidden_activations, output_weights);
// Shape [4]
var predictions = tf.sigmoid(tf.squeeze(logits));
var loss = tf.reduce_mean(tf.square(predictions - tf.cast(labels, tf.float32)), name:"loss");
var gs = tf.Variable(0, trainable: false, name: "global_step");
var train_op = tf.train.GradientDescentOptimizer(0.2f).minimize(loss, global_step: gs);
return (train_op, loss, gs);
}
public bool Run()
{
PrepareData();
float loss_value = 0;
if (ImportGraph)
loss_value = RunWithImportedGraph();
else
loss_value = RunWithBuiltGraph();
return loss_value < 0.0628;
}
private float RunWithImportedGraph()
{
var graph = tf.Graph().as_default();
tf.train.import_meta_graph("graph/xor.meta");
Tensor features = graph.get_operation_by_name("Placeholder");
Tensor labels = graph.get_operation_by_name("Placeholder_1");
Tensor loss = graph.get_operation_by_name("loss");
Tensor train_op = graph.get_operation_by_name("train_op");
Tensor global_step = graph.get_operation_by_name("global_step");
var init = tf.global_variables_initializer();
float loss_value = 0;
// Start tf session
with(tf.Session(graph), sess =>
{
sess.run(init);
var step = 0;
var y_ = np.array(new int[] { 1, 0, 0, 1 }, dtype: np.int32);
while (step < num_steps)
{
// original python:
//_, step, loss_value = sess.run(
// [train_op, gs, loss],
// feed_dict={features: xy, labels: y_}
// )
var result = sess.run(new ITensorOrOperation[] { train_op, global_step, loss }, new FeedItem(features, data), new FeedItem(labels, y_));
loss_value = result[2];
step = result[1];
if (step % 1000 == 0)
Console.WriteLine($"Step {step} loss: {loss_value}");
}
Console.WriteLine($"Final loss: {loss_value}");
});
return loss_value;
}
private float RunWithBuiltGraph()
{
var graph = tf.Graph().as_default();
var features = tf.placeholder(tf.float32, new TensorShape(4, 2));
var labels = tf.placeholder(tf.int32, new TensorShape(4));
var (train_op, loss, gs) = make_graph(features, labels);
var init = tf.global_variables_initializer();
float loss_value = 0;
// Start tf session
with(tf.Session(graph), sess =>
{
sess.run(init);
var step = 0;
var y_ = np.array(new int[] { 1, 0, 0, 1 }, dtype: np.int32);
while (step < num_steps)
{
var result = sess.run(new ITensorOrOperation[] { train_op, gs, loss }, new FeedItem(features, data), new FeedItem(labels, y_));
loss_value = result[2];
step = result[1];
if (step % 1000 == 0)
Console.WriteLine($"Step {step} loss: {loss_value}");
}
Console.WriteLine($"Final loss: {loss_value}");
});
return loss_value;
}
public void PrepareData()
{
data = new float[,]
{
{1, 0 },
{1, 1 },
{0, 0 },
{0, 1 }
};
if (ImportGraph)
{
// download graph meta data
string url = "https://raw.githubusercontent.com/SciSharp/TensorFlow.NET/master/graph/xor.meta";
Web.Download(url, "graph", "xor.meta");
}
}
}
}

+ 1
- 0
test/TensorFlowNET.Examples/Program.cs View File

@@ -64,6 +64,7 @@ namespace TensorFlowNET.Examples
disabled.ForEach(x => Console.WriteLine($"{x} is Disabled!", Color.Tan)); disabled.ForEach(x => Console.WriteLine($"{x} is Disabled!", Color.Tan));
errors.ForEach(x => Console.WriteLine($"{x} is Failed!", Color.Red)); errors.ForEach(x => Console.WriteLine($"{x} is Failed!", Color.Red));


Console.Write("Please [Enter] to quit.");
Console.ReadLine(); Console.ReadLine();
} }
} }


+ 0
- 94
test/TensorFlowNET.Examples/Text/DataHelpers.cs View File

@@ -1,94 +0,0 @@
using NumSharp;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace TensorFlowNET.Examples.CnnTextClassification
{
public class DataHelpers
{
private const string TRAIN_PATH = "text_classification/dbpedia_csv/train.csv";
private const string TEST_PATH = "text_classification/dbpedia_csv/test.csv";

public static (int[][], int[], int) build_char_dataset(string step, string model, int document_max_len, int? limit = null)
{
if (model != "vd_cnn")
throw new NotImplementedException(model);
string alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:’'\"/|_#$%ˆ&*˜‘+=<>()[]{} ";
/*if (step == "train")
df = pd.read_csv(TRAIN_PATH, names =["class", "title", "content"]);*/
var char_dict = new Dictionary<string, int>();
char_dict["<pad>"] = 0;
char_dict["<unk>"] = 1;
foreach (char c in alphabet)
char_dict[c.ToString()] = char_dict.Count;

var contents = File.ReadAllLines(TRAIN_PATH);
var size = limit == null ? contents.Length : limit.Value;

var x = new int[size][];
var y = new int[size];
for (int i = 0; i < size; i++)
{
string[] parts = contents[i].ToLower().Split(",\"").ToArray();
string content = parts[2];
content = content.Substring(0, content.Length - 1);
x[i] = new int[document_max_len];
for (int j = 0; j < document_max_len; j++)
{
if (j >= content.Length)
x[i][j] = char_dict["<pad>"];
else
x[i][j] = char_dict.ContainsKey(content[j].ToString()) ? char_dict[content[j].ToString()] : char_dict["<unk>"];
}
y[i] = int.Parse(parts[0]);
}

return (x, y, alphabet.Length + 2);
}

/// <summary>
/// Loads MR polarity data from files, splits the data into words and generates labels.
/// Returns split sentences and labels.
/// </summary>
/// <param name="positive_data_file"></param>
/// <param name="negative_data_file"></param>
/// <returns></returns>
public static (string[], NDArray) load_data_and_labels(string positive_data_file, string negative_data_file)
{
Directory.CreateDirectory("CnnTextClassification");
Utility.Web.Download(positive_data_file, "CnnTextClassification", "rt -polarity.pos");
Utility.Web.Download(negative_data_file, "CnnTextClassification", "rt-polarity.neg");

// Load data from files
var positive_examples = File.ReadAllLines("CnnTextClassification/rt-polarity.pos")
.Select(x => x.Trim())
.ToArray();

var negative_examples = File.ReadAllLines("CnnTextClassification/rt-polarity.neg")
.Select(x => x.Trim())
.ToArray();

var x_text = new List<string>();
x_text.AddRange(positive_examples);
x_text.AddRange(negative_examples);
x_text = x_text.Select(x => clean_str(x)).ToList();

var positive_labels = positive_examples.Select(x => new int[2] { 0, 1 }).ToArray();
var negative_labels = negative_examples.Select(x => new int[2] { 1, 0 }).ToArray();
var y = np.concatenate(new int[][][] { positive_labels, negative_labels });
return (x_text.ToArray(), y);
}

private static string clean_str(string str)
{
str = Regex.Replace(str, @"[^A-Za-z0-9(),!?\'\`]", " ");
str = Regex.Replace(str, @"\'s", " \'s");
return str;
}
}
}

+ 0
- 71
test/TensorFlowNET.Examples/Text/NER/BiLstmCrfNer.cs View File

@@ -1,71 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Tensorflow;
using static Tensorflow.Python;

namespace TensorFlowNET.Examples
{
/// <summary>
/// Bidirectional LSTM-CRF Models for Sequence Tagging
/// https://github.com/guillaumegenthial/tf_ner/tree/master/models/lstm_crf
/// </summary>
public class BiLstmCrfNer : IExample
{
public int Priority => 101;

public bool Enabled { get; set; } = true;
public bool ImportGraph { get; set; } = false;

public string Name => "bi-LSTM + CRF NER";
HyperParams @params = new HyperParams();

public bool Run()
{
PrepareData();
return false;
}

public void PrepareData()
{
if (!Directory.Exists(HyperParams.DATADIR))
Directory.CreateDirectory(HyperParams.DATADIR);

if (!Directory.Exists(@params.RESULTDIR))
Directory.CreateDirectory(@params.RESULTDIR);

if (!Directory.Exists(@params.MODELDIR))
Directory.CreateDirectory(@params.MODELDIR);

if (!Directory.Exists(@params.EVALDIR))
Directory.CreateDirectory(@params.EVALDIR);
}

private class HyperParams
{
public const string DATADIR = "BiLstmCrfNer";
public string RESULTDIR = Path.Combine(DATADIR, "results");
public string MODELDIR;
public string EVALDIR;

public int dim = 300;
public float dropout = 0.5f;
public int num_oov_buckets = 1;
public int epochs = 25;
public int batch_size = 20;
public int buffer = 15000;
public int lstm_size = 100;
public string words = Path.Combine(DATADIR, "vocab.words.txt");
public string chars = Path.Combine(DATADIR, "vocab.chars.txt");
public string tags = Path.Combine(DATADIR, "vocab.tags.txt");
public string glove = Path.Combine(DATADIR, "glove.npz");

public HyperParams()
{
MODELDIR = Path.Combine(RESULTDIR, "model");
EVALDIR = Path.Combine(MODELDIR, "eval");
}
}
}
}

+ 0
- 179
test/TensorFlowNET.Examples/Text/TextClassificationTrain.cs View File

@@ -1,179 +0,0 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using NumSharp;
using Tensorflow;
using Tensorflow.Keras.Engine;
using TensorFlowNET.Examples.Text.cnn_models;
using TensorFlowNET.Examples.TextClassification;
using TensorFlowNET.Examples.Utility;
using static Tensorflow.Python;

namespace TensorFlowNET.Examples.CnnTextClassification
{
/// <summary>
/// https://github.com/dongjun-Lee/text-classification-models-tf
/// </summary>
public class TextClassificationTrain : IExample
{
public int Priority => 100;
public bool Enabled { get; set; } = false;
public string Name => "Text Classification";
public int? DataLimit = null;
public bool ImportGraph { get; set; } = true;

private string dataDir = "text_classification";
private string dataFileName = "dbpedia_csv.tar.gz";

public string model_name = "vd_cnn"; // word_cnn | char_cnn | vd_cnn | word_rnn | att_rnn | rcnn

private const int CHAR_MAX_LEN = 1014;
private const int NUM_CLASS = 2;
private const int BATCH_SIZE = 64;
private const int NUM_EPOCHS = 10;
protected float loss_value = 0;

public bool Run()
{
PrepareData();
return with(tf.Session(), sess =>
{
if (ImportGraph)
return RunWithImportedGraph(sess);
else
return RunWithBuiltGraph(sess);
});
}

protected virtual bool RunWithImportedGraph(Session sess)
{
var graph = tf.Graph().as_default();
Console.WriteLine("Building dataset...");
var (x, y, alphabet_size) = DataHelpers.build_char_dataset("train", model_name, CHAR_MAX_LEN, DataLimit);

var (train_x, valid_x, train_y, valid_y) = train_test_split(x, y, test_size: 0.15f);

var meta_file = model_name + "_untrained.meta";
tf.train.import_meta_graph(Path.Join("graph", meta_file));

//sess.run(tf.global_variables_initializer()); // not necessary here, has already been done before meta graph export

var train_batches = batch_iter(train_x, train_y, BATCH_SIZE, NUM_EPOCHS);
var num_batches_per_epoch = (len(train_x) - 1); // BATCH_SIZE + 1
double max_accuracy = 0;

Tensor is_training = graph.get_operation_by_name("is_training");
Tensor model_x = graph.get_operation_by_name("x");
Tensor model_y = graph.get_operation_by_name("y");
Tensor loss = graph.get_operation_by_name("Variable");
Tensor accuracy = graph.get_operation_by_name("accuracy/accuracy");

foreach (var (x_batch, y_batch) in train_batches)
{
var train_feed_dict = new Hashtable
{
[model_x] = x_batch,
[model_y] = y_batch,
[is_training] = true,
};

//_, step, loss = sess.run([model.optimizer, model.global_step, model.loss], feed_dict = train_feed_dict)
}

return false;
}

protected virtual bool RunWithBuiltGraph(Session session)
{
Console.WriteLine("Building dataset...");
var (x, y, alphabet_size) = DataHelpers.build_char_dataset("train", model_name, CHAR_MAX_LEN, DataLimit);

var (train_x, valid_x, train_y, valid_y) = train_test_split(x, y, test_size: 0.15f);

ITextClassificationModel model = null;
switch (model_name) // word_cnn | char_cnn | vd_cnn | word_rnn | att_rnn | rcnn
{
case "word_cnn":
case "char_cnn":
case "word_rnn":
case "att_rnn":
case "rcnn":
throw new NotImplementedException();
break;
case "vd_cnn":
model=new VdCnn(alphabet_size, CHAR_MAX_LEN, NUM_CLASS);
break;
}
// todo train the model
return false;
}

private (int[][], int[][], int[], int[]) train_test_split(int[][] x, int[] y, float test_size = 0.3f)
{
int len = x.Length;
int classes = y.Distinct().Count();
int samples = len / classes;
int train_size = int.Parse((samples * (1 - test_size)).ToString());

var train_x = new List<int[]>();
var valid_x = new List<int[]>();
var train_y = new List<int>();
var valid_y = new List<int>();

for (int i = 0; i < classes; i++)
{
for (int j = 0; j < samples; j++)
{
int idx = i * samples + j;
if (idx < train_size + samples * i)
{
train_x.Add(x[idx]);
train_y.Add(y[idx]);
}
else
{
valid_x.Add(x[idx]);
valid_y.Add(y[idx]);
}
}
}

return (train_x.ToArray(), valid_x.ToArray(), train_y.ToArray(), valid_y.ToArray());
}

private IEnumerable<(NDArray, NDArray)> batch_iter(int[][] raw_inputs, int[] raw_outputs, int batch_size, int num_epochs)
{
var inputs = np.array(raw_inputs);
var outputs = np.array(raw_outputs);

var num_batches_per_epoch = (len(inputs) - 1); // batch_size + 1
foreach (var epoch in range(num_epochs))
{
foreach (var batch_num in range(num_batches_per_epoch))
{
var start_index = batch_num * batch_size;
var end_index = Math.Min((batch_num + 1) * batch_size, len(inputs));
yield return (inputs[$"{start_index}:{end_index}"], outputs[$"{start_index}:{end_index}"]);
}
}
}

public void PrepareData()
{
string url = "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz";
Web.Download(url, dataDir, dataFileName);
Compress.ExtractTGZ(Path.Join(dataDir, dataFileName), dataDir);

if (ImportGraph)
{
// download graph meta data
var meta_file = model_name + "_untrained.meta";
url = "https://raw.githubusercontent.com/SciSharp/TensorFlow.NET/master/graph/" + meta_file;
Web.Download(url, "graph", meta_file);
}
}
}
}

test/TensorFlowNET.Examples/Text/BinaryTextClassification.cs → test/TensorFlowNET.Examples/TextProcess/BinaryTextClassification.cs View File


+ 163
- 0
test/TensorFlowNET.Examples/TextProcess/DataHelpers.cs View File

@@ -0,0 +1,163 @@
using NumSharp;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using TensorFlowNET.Examples.Utility;

namespace TensorFlowNET.Examples
{
public class DataHelpers
{

public static (int[][], int[], int) build_char_dataset(string path, string model, int document_max_len, int? limit = null, bool shuffle=true)
{
if (model != "vd_cnn")
throw new NotImplementedException(model);
string alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:’'\"/|_#$%ˆ&*˜‘+=<>()[]{} ";
/*if (step == "train")
df = pd.read_csv(TRAIN_PATH, names =["class", "title", "content"]);*/
var char_dict = new Dictionary<string, int>();
char_dict["<pad>"] = 0;
char_dict["<unk>"] = 1;
foreach (char c in alphabet)
char_dict[c.ToString()] = char_dict.Count;
var contents = File.ReadAllLines(path);
if (shuffle)
new Random(17).Shuffle(contents);
//File.WriteAllLines("text_classification/dbpedia_csv/train_6400.csv", contents.Take(6400));
var size = limit == null ? contents.Length : limit.Value;

var x = new int[size][];
var y = new int[size];
var tenth = size / 10;
var percent = 0;
for (int i = 0; i < size; i++)
{
if ((i + 1) % tenth == 0)
{
percent += 10;
Console.WriteLine($"\t{percent}%");
}

string[] parts = contents[i].ToLower().Split(",\"").ToArray();
string content = parts[2];
content = content.Substring(0, content.Length - 1);
var a = new int[document_max_len];
for (int j = 0; j < document_max_len; j++)
{
if (j >= content.Length)
a[j] = char_dict["<pad>"];
else
a[j] = char_dict.ContainsKey(content[j].ToString()) ? char_dict[content[j].ToString()] : char_dict["<unk>"];
}
x[i] = a;
y[i] = int.Parse(parts[0]);
}

return (x, y, alphabet.Length + 2);
}

/// <summary>
/// Loads MR polarity data from files, splits the data into words and generates labels.
/// Returns split sentences and labels.
/// </summary>
/// <param name="positive_data_file"></param>
/// <param name="negative_data_file"></param>
/// <returns></returns>
public static (string[], NDArray) load_data_and_labels(string positive_data_file, string negative_data_file)
{
Directory.CreateDirectory("CnnTextClassification");
Utility.Web.Download(positive_data_file, "CnnTextClassification", "rt -polarity.pos");
Utility.Web.Download(negative_data_file, "CnnTextClassification", "rt-polarity.neg");

// Load data from files
var positive_examples = File.ReadAllLines("CnnTextClassification/rt-polarity.pos")
.Select(x => x.Trim())
.ToArray();

var negative_examples = File.ReadAllLines("CnnTextClassification/rt-polarity.neg")
.Select(x => x.Trim())
.ToArray();

var x_text = new List<string>();
x_text.AddRange(positive_examples);
x_text.AddRange(negative_examples);
x_text = x_text.Select(x => clean_str(x)).ToList();

var positive_labels = positive_examples.Select(x => new int[2] { 0, 1 }).ToArray();
var negative_labels = negative_examples.Select(x => new int[2] { 1, 0 }).ToArray();
var y = np.concatenate(new int[][][] { positive_labels, negative_labels });
return (x_text.ToArray(), y);
}

private static string clean_str(string str)
{
str = Regex.Replace(str, @"[^A-Za-z0-9(),!?\'\`]", " ");
str = Regex.Replace(str, @"\'s", " \'s");
return str;
}

/// <summary>
/// Padding
/// </summary>
/// <param name="sequences"></param>
/// <param name="pad_tok">the char to pad with</param>
/// <returns>a list of list where each sublist has same length</returns>
public static (int[][], int[]) pad_sequences(int[][] sequences, int pad_tok = 0)
{
int max_length = sequences.Select(x => x.Length).Max();
return _pad_sequences(sequences, pad_tok, max_length);
}

public static (int[][][], int[][]) pad_sequences(int[][][] sequences, int pad_tok = 0)
{
int max_length_word = sequences.Select(x => x.Select(w => w.Length).Max()).Max();
int[][][] sequence_padded;
var sequence_length = new int[sequences.Length][];
for (int i = 0; i < sequences.Length; i++)
{
// all words are same length now
var (sp, sl) = _pad_sequences(sequences[i], pad_tok, max_length_word);
sequence_length[i] = sl;
}

int max_length_sentence = sequences.Select(x => x.Length).Max();
(sequence_padded, _) = _pad_sequences(sequences, np.repeat(pad_tok, max_length_word).Data<int>(), max_length_sentence);
(sequence_length, _) = _pad_sequences(sequence_length, 0, max_length_sentence);

return (sequence_padded, sequence_length);
}

private static (int[][], int[]) _pad_sequences(int[][] sequences, int pad_tok, int max_length)
{
var sequence_length = new int[sequences.Length];
for (int i = 0; i < sequences.Length; i++)
{
sequence_length[i] = sequences[i].Length;
Array.Resize(ref sequences[i], max_length);
}

return (sequences, sequence_length);
}

private static (int[][][], int[]) _pad_sequences(int[][][] sequences, int[] pad_tok, int max_length)
{
var sequence_length = new int[sequences.Length];
for (int i = 0; i < sequences.Length; i++)
{
sequence_length[i] = sequences[i].Length;
Array.Resize(ref sequences[i], max_length);
for (int j = 0; j < max_length - sequence_length[i]; j++)
{
sequences[i][max_length - j - 1] = new int[pad_tok.Length];
Array.Copy(pad_tok, sequences[i][max_length - j - 1], pad_tok.Length);
}
}

return (sequences, sequence_length);
}
}
}

+ 39
- 0
test/TensorFlowNET.Examples/TextProcess/NER/BiLstmCrfNer.cs View File

@@ -0,0 +1,39 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Tensorflow;
using Tensorflow.Estimator;
using static Tensorflow.Python;

namespace TensorFlowNET.Examples
{
/// <summary>
/// Bidirectional LSTM-CRF Models for Sequence Tagging
/// https://github.com/guillaumegenthial/tf_ner/tree/master/models/lstm_crf
/// </summary>
public class BiLstmCrfNer : IExample
{
public int Priority => 101;

public bool Enabled { get; set; } = true;
public bool ImportGraph { get; set; } = false;

public string Name => "bi-LSTM + CRF NER";

public bool Run()
{
PrepareData();
return false;
}

public void PrepareData()
{
var hp = new HyperParams("BiLstmCrfNer");
hp.filepath_words = Path.Combine(hp.data_root_dir, "vocab.words.txt");
hp.filepath_chars = Path.Combine(hp.data_root_dir, "vocab.chars.txt");
hp.filepath_tags = Path.Combine(hp.data_root_dir, "vocab.tags.txt");
hp.filepath_glove = Path.Combine(hp.data_root_dir, "glove.npz");
}
}
}

test/TensorFlowNET.Examples/Text/NER/CRF.cs → test/TensorFlowNET.Examples/TextProcess/NER/CRF.cs View File


+ 212
- 0
test/TensorFlowNET.Examples/TextProcess/NER/LstmCrfNer.cs View File

@@ -0,0 +1,212 @@
using NumSharp;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Tensorflow;
using Tensorflow.Estimator;
using TensorFlowNET.Examples.Utility;
using static Tensorflow.Python;
using static TensorFlowNET.Examples.DataHelpers;

namespace TensorFlowNET.Examples.Text.NER
{
/// <summary>
/// A NER model using Tensorflow (LSTM + CRF + chars embeddings).
/// State-of-the-art performance (F1 score between 90 and 91).
///
/// https://github.com/guillaumegenthial/sequence_tagging
/// </summary>
public class LstmCrfNer : IExample
{
public int Priority => 14;

public bool Enabled { get; set; } = true;
public bool ImportGraph { get; set; } = true;

public string Name => "LSTM + CRF NER";

HyperParams hp;
int nwords, nchars, ntags;
CoNLLDataset dev, train;

Tensor word_ids_tensor;
Tensor sequence_lengths_tensor;
Tensor char_ids_tensor;
Tensor word_lengths_tensor;
Tensor labels_tensor;
Tensor dropout_tensor;
Tensor lr_tensor;
Operation train_op;
Tensor loss;
Tensor merged;

public bool Run()
{
PrepareData();
var graph = tf.Graph().as_default();

tf.train.import_meta_graph("graph/lstm_crf_ner.meta");

float loss_value = 0f;

//add_summary();
word_ids_tensor = graph.OperationByName("word_ids");
sequence_lengths_tensor = graph.OperationByName("sequence_lengths");
char_ids_tensor = graph.OperationByName("char_ids");
word_lengths_tensor = graph.OperationByName("word_lengths");
labels_tensor = graph.OperationByName("labels");
dropout_tensor = graph.OperationByName("dropout");
lr_tensor = graph.OperationByName("lr");
train_op = graph.OperationByName("train_step/Adam");
loss = graph.OperationByName("Mean");
//merged = graph.OperationByName("Merge/MergeSummary");

var init = tf.global_variables_initializer();

with(tf.Session(), sess =>
{
sess.run(init);

foreach (var epoch in range(hp.epochs))
{
Console.Write($"Epoch {epoch + 1} out of {hp.epochs}, ");
loss_value = run_epoch(sess, train, dev, epoch);
print($"train loss: {loss_value}");
}
});

return loss_value < 0.1;
}

private float run_epoch(Session sess, CoNLLDataset train, CoNLLDataset dev, int epoch)
{
NDArray results = null;

// iterate over dataset
var batches = minibatches(train, hp.batch_size);
foreach (var(words, labels) in batches)
{
var (fd, _) = get_feed_dict(words, labels, hp.lr, hp.dropout);
results = sess.run(new ITensorOrOperation[] { train_op, loss }, feed_dict: fd);
}

return results[1];
}

private IEnumerable<((int[][], int[])[], int[][])> minibatches(CoNLLDataset data, int minibatch_size)
{
var x_batch = new List<(int[][], int[])>();
var y_batch = new List<int[]>();
foreach(var (x, y) in data.GetItems())
{
if (len(y_batch) == minibatch_size)
{
yield return (x_batch.ToArray(), y_batch.ToArray());
x_batch.Clear();
y_batch.Clear();
}

var x3 = (x.Select(x1 => x1.Item1).ToArray(), x.Select(x2 => x2.Item2).ToArray());
x_batch.Add(x3);
y_batch.Add(y);
}

if (len(y_batch) > 0)
yield return (x_batch.ToArray(), y_batch.ToArray());
}

/// <summary>
/// Given some data, pad it and build a feed dictionary
/// </summary>
/// <param name="words">
/// list of sentences. A sentence is a list of ids of a list of
/// words. A word is a list of ids
/// </param>
/// <param name="labels">list of ids</param>
/// <param name="lr">learning rate</param>
/// <param name="dropout">keep prob</param>
private (FeedItem[], int[]) get_feed_dict((int[][], int[])[] words, int[][] labels, float lr = 0f, float dropout = 0f)
{
int[] sequence_lengths;
int[][] word_lengths;
int[][] word_ids;
int[][][] char_ids;

if (true) // use_chars
{
(char_ids, word_ids) = (words.Select(x => x.Item1).ToArray(), words.Select(x => x.Item2).ToArray());
(word_ids, sequence_lengths) = pad_sequences(word_ids, pad_tok: 0);
(char_ids, word_lengths) = pad_sequences(char_ids, pad_tok: 0);
}

// build feed dictionary
var feeds = new List<FeedItem>();
feeds.Add(new FeedItem(word_ids_tensor, np.array(word_ids)));
feeds.Add(new FeedItem(sequence_lengths_tensor, np.array(sequence_lengths)));
if(true) // use_chars
{
feeds.Add(new FeedItem(char_ids_tensor, np.array(char_ids)));
feeds.Add(new FeedItem(word_lengths_tensor, np.array(word_lengths)));
}

(labels, _) = pad_sequences(labels, 0);
feeds.Add(new FeedItem(labels_tensor, np.array(labels)));

feeds.Add(new FeedItem(lr_tensor, lr));

feeds.Add(new FeedItem(dropout_tensor, dropout));

return (feeds.ToArray(), sequence_lengths);
}

public void PrepareData()
{
hp = new HyperParams("LstmCrfNer")
{
epochs = 50,
dropout = 0.5f,
batch_size = 20,
lr_method = "adam",
lr = 0.001f,
lr_decay = 0.9f,
clip = false,
epoch_no_imprv = 3,
hidden_size_char = 100,
hidden_size_lstm = 300
};
hp.filepath_dev = hp.filepath_test = hp.filepath_train = Path.Combine(hp.data_root_dir, "test.txt");

// Loads vocabulary, processing functions and embeddings
hp.filepath_words = Path.Combine(hp.data_root_dir, "words.txt");
hp.filepath_tags = Path.Combine(hp.data_root_dir, "tags.txt");
hp.filepath_chars = Path.Combine(hp.data_root_dir, "chars.txt");

string url = "https://raw.githubusercontent.com/SciSharp/TensorFlow.NET/master/data/lstm_crf_ner.zip";
Web.Download(url, hp.data_root_dir, "lstm_crf_ner.zip");
Compress.UnZip(Path.Combine(hp.data_root_dir, "lstm_crf_ner.zip"), hp.data_root_dir);

// 1. vocabulary
/*vocab_tags = load_vocab(hp.filepath_tags);

nwords = vocab_words.Count;
nchars = vocab_chars.Count;
ntags = vocab_tags.Count;*/

// 2. get processing functions that map str -> id
dev = new CoNLLDataset(hp.filepath_dev, hp);
train = new CoNLLDataset(hp.filepath_train, hp);

// download graph meta data
var meta_file = "lstm_crf_ner.meta";
var meta_path = Path.Combine("graph", meta_file);
url = "https://raw.githubusercontent.com/SciSharp/TensorFlow.NET/master/graph/" + meta_file;
Web.Download(url, "graph", meta_file);

}
}
}

test/TensorFlowNET.Examples/NamedEntityRecognition.cs → test/TensorFlowNET.Examples/TextProcess/NamedEntityRecognition.cs View File


+ 289
- 0
test/TensorFlowNET.Examples/TextProcess/TextClassificationTrain.cs View File

@@ -0,0 +1,289 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using NumSharp;
using Tensorflow;
using Tensorflow.Keras.Engine;
using Tensorflow.Sessions;
using TensorFlowNET.Examples.Text.cnn_models;
using TensorFlowNET.Examples.TextClassification;
using TensorFlowNET.Examples.Utility;
using static Tensorflow.Python;

namespace TensorFlowNET.Examples.CnnTextClassification
{
/// <summary>
/// https://github.com/dongjun-Lee/text-classification-models-tf
/// </summary>
public class TextClassificationTrain : IExample
{
public int Priority => 100;
public bool Enabled { get; set; } = false;
public string Name => "Text Classification";
public int? DataLimit = null;
public bool ImportGraph { get; set; } = true;
public bool UseSubset = true; // <----- set this true to use a limited subset of dbpedia

private string dataDir = "text_classification";
private string dataFileName = "dbpedia_csv.tar.gz";

public string model_name = "vd_cnn"; // word_cnn | char_cnn | vd_cnn | word_rnn | att_rnn | rcnn

private const string TRAIN_PATH = "text_classification/dbpedia_csv/train.csv";
private const string SUBSET_PATH = "text_classification/dbpedia_csv/dbpedia_6400.csv";
private const string TEST_PATH = "text_classification/dbpedia_csv/test.csv";

private const int CHAR_MAX_LEN = 1014;
private const int WORD_MAX_LEN = 1014;
private const int NUM_CLASS = 14;
private const int BATCH_SIZE = 64;
private const int NUM_EPOCHS = 10;
protected float loss_value = 0;

public bool Run()
{
PrepareData();
var graph = tf.Graph().as_default();
return with(tf.Session(graph), sess =>
{
if (ImportGraph)
return RunWithImportedGraph(sess, graph);
else
return RunWithBuiltGraph(sess, graph);
});
}

protected virtual bool RunWithImportedGraph(Session sess, Graph graph)
{
var stopwatch = Stopwatch.StartNew();
Console.WriteLine("Building dataset...");
var path = UseSubset ? SUBSET_PATH : TRAIN_PATH;
var (x, y, alphabet_size) = DataHelpers.build_char_dataset(path, model_name, CHAR_MAX_LEN, DataLimit = null, shuffle:!UseSubset);
Console.WriteLine("\tDONE ");

var (train_x, valid_x, train_y, valid_y) = train_test_split(x, y, test_size: 0.15f);
Console.WriteLine("Training set size: " + train_x.len);
Console.WriteLine("Test set size: " + valid_x.len);

Console.WriteLine("Import graph...");
var meta_file = model_name + ".meta";
tf.train.import_meta_graph(Path.Join("graph", meta_file));
Console.WriteLine("\tDONE " + stopwatch.Elapsed);

sess.run(tf.global_variables_initializer());
var train_batches = batch_iter(train_x, train_y, BATCH_SIZE, NUM_EPOCHS);
var num_batches_per_epoch = (len(train_x) - 1) / BATCH_SIZE + 1;
double max_accuracy = 0;

Tensor is_training = graph.get_tensor_by_name("is_training:0");
Tensor model_x = graph.get_tensor_by_name("x:0");
Tensor model_y = graph.get_tensor_by_name("y:0");
Tensor loss = graph.get_tensor_by_name("loss/value:0");
Tensor optimizer = graph.get_tensor_by_name("loss/optimizer:0");
Tensor global_step = graph.get_tensor_by_name("global_step:0");
Tensor accuracy = graph.get_tensor_by_name("accuracy/value:0");
stopwatch = Stopwatch.StartNew();
int i = 0;
foreach (var (x_batch, y_batch, total) in train_batches)
{
i++;
var train_feed_dict = new FeedDict
{
[model_x] = x_batch,
[model_y] = y_batch,
[is_training] = true,
};
//Console.WriteLine("x: " + x_batch.ToString() + "\n");
//Console.WriteLine("y: " + y_batch.ToString());
// original python:
//_, step, loss = sess.run([model.optimizer, model.global_step, model.loss], feed_dict = train_feed_dict)
var result = sess.run(new ITensorOrOperation[] { optimizer, global_step, loss }, train_feed_dict);
loss_value = result[2];
var step = (int)result[1];
if (step % 10 == 0 || step < 10)
{
var estimate = TimeSpan.FromSeconds((stopwatch.Elapsed.TotalSeconds / i) * total);
Console.WriteLine($"Training on batch {i}/{total}. Estimated training time: {estimate}");
Console.WriteLine($"Step {step} loss: {loss_value}");
}

if (step % 100 == 0)
{
// # Test accuracy with validation data for each epoch.
var valid_batches = batch_iter(valid_x, valid_y, BATCH_SIZE, 1);
var (sum_accuracy, cnt) = (0.0f, 0);
foreach (var (valid_x_batch, valid_y_batch, total_validation_batches) in valid_batches)
{
var valid_feed_dict = new FeedDict
{
[model_x] = valid_x_batch,
[model_y] = valid_y_batch,
[is_training] = false
};
var result1 = sess.run(accuracy, valid_feed_dict);
float accuracy_value = result1;
sum_accuracy += accuracy_value;
cnt += 1;
}

var valid_accuracy = sum_accuracy / cnt;

print($"\nValidation Accuracy = {valid_accuracy}\n");

// # Save model
// if valid_accuracy > max_accuracy:
// max_accuracy = valid_accuracy
// saver.save(sess, "{0}/{1}.ckpt".format(args.model, args.model), global_step = step)
// print("Model is saved.\n")
}
}

return false;
}

protected virtual bool RunWithBuiltGraph(Session session, Graph graph)
{
Console.WriteLine("Building dataset...");
var (x, y, alphabet_size) = DataHelpers.build_char_dataset("train", model_name, CHAR_MAX_LEN, DataLimit);

var (train_x, valid_x, train_y, valid_y) = train_test_split(x, y, test_size: 0.15f);

ITextClassificationModel model = null;
switch (model_name) // word_cnn | char_cnn | vd_cnn | word_rnn | att_rnn | rcnn
{
case "word_cnn":
case "char_cnn":
case "word_rnn":
case "att_rnn":
case "rcnn":
throw new NotImplementedException();
break;
case "vd_cnn":
model = new VdCnn(alphabet_size, CHAR_MAX_LEN, NUM_CLASS);
break;
}
// todo train the model
return false;
}
// TODO: this originally is an SKLearn utility function. it randomizes train and test which we don't do here
private (NDArray, NDArray, NDArray, NDArray) train_test_split(NDArray x, NDArray y, float test_size = 0.3f)
{
Console.WriteLine("Splitting in Training and Testing data...");
int len = x.shape[0];
//int classes = y.Data<int>().Distinct().Count();
//int samples = len / classes;
int train_size = (int)Math.Round(len * (1 - test_size));
var train_x = x[new Slice(stop: train_size), new Slice()];
var valid_x = x[new Slice(start: train_size + 1), new Slice()];
var train_y = y[new Slice(stop: train_size)];
var valid_y = y[new Slice(start: train_size + 1)];
Console.WriteLine("\tDONE");
return (train_x, valid_x, train_y, valid_y);
}

//private (int[][], int[][], int[], int[]) train_test_split(int[][] x, int[] y, float test_size = 0.3f)
//{
// Console.WriteLine("Splitting in Training and Testing data...");
// var stopwatch = Stopwatch.StartNew();
// int len = x.Length;
// int train_size = int.Parse((len * (1 - test_size)).ToString());
// var random = new Random(17);

// // we collect indices of labels
// var labels = new Dictionary<int, HashSet<int>>();
// var shuffled_indices = random.Shuffle<int>(range(len).ToArray());
// foreach (var i in shuffled_indices)
// {
// var label = y[i];
// if (!labels.ContainsKey(i))
// labels[label] = new HashSet<int>();
// labels[label].Add(i);
// }

// var train_x = new int[train_size][];
// var valid_x = new int[len - train_size][];
// var train_y = new int[train_size];
// var valid_y = new int[len - train_size];
// FillWithShuffledLabels(x, y, train_x, train_y, random, labels);
// FillWithShuffledLabels(x, y, valid_x, valid_y, random, labels);

// Console.WriteLine("\tDONE " + stopwatch.Elapsed);
// return (train_x, valid_x, train_y, valid_y);
//}

private static void FillWithShuffledLabels(int[][] x, int[] y, int[][] shuffled_x, int[] shuffled_y, Random random, Dictionary<int, HashSet<int>> labels)
{
int i = 0;
var label_keys = labels.Keys.ToArray();
while (i < shuffled_x.Length)
{
var key = label_keys[random.Next(label_keys.Length)];
var set = labels[key];
var index = set.First();
if (set.Count == 0)
{
labels.Remove(key); // remove the set as it is empty
label_keys = labels.Keys.ToArray();
}
shuffled_x[i] = x[index];
shuffled_y[i] = y[index];
i++;
}
}

private IEnumerable<(NDArray, NDArray, int)> batch_iter(NDArray inputs, NDArray outputs, int batch_size, int num_epochs)
{
var num_batches_per_epoch = (len(inputs) - 1) / batch_size + 1;
var total_batches = num_batches_per_epoch * num_epochs;
foreach (var epoch in range(num_epochs))
{
foreach (var batch_num in range(num_batches_per_epoch))
{
var start_index = batch_num * batch_size;
var end_index = Math.Min((batch_num + 1) * batch_size, len(inputs));
if (end_index <= start_index)
break;
yield return (inputs[new Slice(start_index, end_index)], outputs[new Slice(start_index, end_index)], total_batches);
}
}
}

public void PrepareData()
{
if (UseSubset)
{
var url = "https://raw.githubusercontent.com/SciSharp/TensorFlow.NET/master/data/dbpedia_subset.zip";
Web.Download(url, dataDir, "dbpedia_subset.zip");
Compress.UnZip(Path.Combine(dataDir, "dbpedia_subset.zip"), Path.Combine(dataDir, "dbpedia_csv"));
}
else
{
string url = "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz";
Web.Download(url, dataDir, dataFileName);
Compress.ExtractTGZ(Path.Join(dataDir, dataFileName), dataDir);
}

if (ImportGraph)
{
// download graph meta data
var meta_file = model_name + ".meta";
var meta_path = Path.Combine("graph", meta_file);
if (File.GetLastWriteTime(meta_path) < new DateTime(2019, 05, 11))
{
// delete old cached file which contains errors
Console.WriteLine("Discarding cached file: " + meta_path);
File.Delete(meta_path);
}
var url = "https://raw.githubusercontent.com/SciSharp/TensorFlow.NET/master/graph/" + meta_file;
Web.Download(url, "graph", meta_file);
}
}
}
}

test/TensorFlowNET.Examples/Text/Word2Vec.cs → test/TensorFlowNET.Examples/TextProcess/Word2Vec.cs View File


test/TensorFlowNET.Examples/Text/cnn_models/ITextClassificationModel.cs → test/TensorFlowNET.Examples/TextProcess/cnn_models/ITextClassificationModel.cs View File

@@ -1,14 +1,14 @@
using System;
using System.Collections.Generic;
using System.Text;
using Tensorflow;
namespace TensorFlowNET.Examples.Text.cnn_models
{
interface ITextClassificationModel
{
Tensor is_training { get; }
Tensor x { get;}
Tensor y { get; }
}
}
using System;
using System.Collections.Generic;
using System.Text;
using Tensorflow;
namespace TensorFlowNET.Examples.Text.cnn_models
{
interface ITextClassificationModel
{
Tensor is_training { get; }
Tensor x { get;}
Tensor y { get; }
}
}

test/TensorFlowNET.Examples/Text/cnn_models/VdCnn.cs → test/TensorFlowNET.Examples/TextProcess/cnn_models/VdCnn.cs View File


+ 22
- 0
test/TensorFlowNET.Examples/Utility/ArrayShuffling.cs View File

@@ -0,0 +1,22 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace TensorFlowNET.Examples.Utility
{
public static class ArrayShuffling
{
public static T[] Shuffle<T>(this Random rng, T[] array)
{
int n = array.Length;
while (n > 1)
{
int k = rng.Next(n--);
T temp = array[n];
array[n] = array[k];
array[k] = temp;
}
return array;
}
}
}

+ 108
- 0
test/TensorFlowNET.Examples/Utility/CoNLLDataset.cs View File

@@ -0,0 +1,108 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Tensorflow.Estimator;

namespace TensorFlowNET.Examples.Utility
{
public class CoNLLDataset
{
static Dictionary<string, int> vocab_chars;
static Dictionary<string, int> vocab_words;
static Dictionary<string, int> vocab_tags;

HyperParams _hp;
string _path;

public CoNLLDataset(string path, HyperParams hp)
{
if (vocab_chars == null)
vocab_chars = load_vocab(hp.filepath_chars);

if (vocab_words == null)
vocab_words = load_vocab(hp.filepath_words);

if (vocab_tags == null)
vocab_tags = load_vocab(hp.filepath_tags);

_path = path;
}

private (int[], int) processing_word(string word)
{
var char_ids = word.ToCharArray().Select(x => vocab_chars[x.ToString()]).ToArray();

// 1. preprocess word
if (true) // lowercase
word = word.ToLower();
if (false) // isdigit
word = "$NUM$";

// 2. get id of word
int id = vocab_words.GetValueOrDefault(word, vocab_words["$UNK$"]);
return (char_ids, id);
}

private int processing_tag(string word)
{
// 1. preprocess word
if (false) // lowercase
word = word.ToLower();
if (false) // isdigit
word = "$NUM$";

// 2. get id of word
int id = vocab_tags.GetValueOrDefault(word, -1);

return id;
}

private Dictionary<string, int> load_vocab(string filename)
{
var dict = new Dictionary<string, int>();
int i = 0;
File.ReadAllLines(filename)
.Select(x => dict[x] = i++)
.Count();
return dict;
}

public IEnumerable<((int[], int)[], int[])> GetItems()
{
var lines = File.ReadAllLines(_path);

int niter = 0;
var words = new List<(int[], int)>();
var tags = new List<int>();

foreach (var l in lines)
{
string line = l.Trim();
if (string.IsNullOrEmpty(line) || line.StartsWith("-DOCSTART-"))
{
if (words.Count > 0)
{
niter++;
yield return (words.ToArray(), tags.ToArray());
words.Clear();
tags.Clear();
}
}
else
{
var ls = line.Split(' ');
// process word
var word = processing_word(ls[0]);
var tag = processing_tag(ls[1]);

words.Add(word);
tags.Add(tag);
}
}
}
}
}

+ 28
- 35
test/TensorFlowNET.Examples/Utility/PbtxtParser.cs View File

@@ -23,52 +23,45 @@ namespace TensorFlowNET.Examples.Utility
string line; string line;
string newText = "{\"items\":["; string newText = "{\"items\":[";


try
using (System.IO.StreamReader reader = new System.IO.StreamReader(filePath))
{ {
using (System.IO.StreamReader reader = new System.IO.StreamReader(filePath))

while ((line = reader.ReadLine()) != null)
{ {
string newline = string.Empty;


while ((line = reader.ReadLine()) != null)
if (line.Contains("{"))
{ {
string newline = string.Empty;

if (line.Contains("{"))
{
newline = line.Replace("item", "").Trim();
//newText += line.Insert(line.IndexOf("=") + 1, "\"") + "\",";
newText += newline;
}
else if (line.Contains("}"))
{
newText = newText.Remove(newText.Length - 1);
newText += line;
newText += ",";
}
else
{
newline = line.Replace(":", "\":").Trim();
newline = "\"" + newline;// newline.Insert(0, "\"");
newline += ",";

newText += newline;
}

newline = line.Replace("item", "").Trim();
//newText += line.Insert(line.IndexOf("=") + 1, "\"") + "\",";
newText += newline;
}
else if (line.Contains("}"))
{
newText = newText.Remove(newText.Length - 1);
newText += line;
newText += ",";
} }
else
{
newline = line.Replace(":", "\":").Trim();
newline = "\"" + newline;// newline.Insert(0, "\"");
newline += ",";


newText = newText.Remove(newText.Length - 1);
newText += "]}";
newText += newline;
}


reader.Close();
} }


PbtxtItems items = JsonConvert.DeserializeObject<PbtxtItems>(newText);
newText = newText.Remove(newText.Length - 1);
newText += "]}";


return items;
}
catch (Exception ex)
{
return null;
reader.Close();
} }

PbtxtItems items = JsonConvert.DeserializeObject<PbtxtItems>(newText);

return items;
} }
} }
} }

+ 0
- 8
test/TensorFlowNET.UnitTest/ExamplesTests/ExamplesTest.cs View File

@@ -61,14 +61,6 @@ namespace TensorFlowNET.ExamplesTests
new LogisticRegression() { Enabled = true, training_epochs=10, train_size = 500, validation_size = 100, test_size = 100 }.Run(); new LogisticRegression() { Enabled = true, training_epochs=10, train_size = 500, validation_size = 100, test_size = 100 }.Run();
} }
[Ignore]
[TestMethod]
public void MetaGraph()
{
tf.Graph().as_default();
new MetaGraph() { Enabled = true }.Run();
}
[Ignore] [Ignore]
[TestMethod] [TestMethod]
public void NaiveBayesClassifier() public void NaiveBayesClassifier()


+ 15
- 1
test/TensorFlowNET.UnitTest/GraphTest.cs View File

@@ -5,6 +5,7 @@ using System.Runtime.InteropServices;
using System.Text; using System.Text;
using Tensorflow; using Tensorflow;
using Buffer = Tensorflow.Buffer; using Buffer = Tensorflow.Buffer;
using static Tensorflow.Python;


namespace TensorFlowNET.UnitTest namespace TensorFlowNET.UnitTest
{ {
@@ -417,6 +418,19 @@ namespace TensorFlowNET.UnitTest


} }



public void ImportGraphMeta()
{
var dir = "my-save-dir/";
with(tf.Session(), sess =>
{
var new_saver = tf.train.import_meta_graph(dir + "my-model-10000.meta");
new_saver.restore(sess, dir + "my-model-10000");
var labels = tf.constant(0, dtype: tf.int32, shape: new int[] { 100 }, name: "labels");
var batch_size = tf.size(labels);
var logits = (tf.get_collection("logits") as List<ITensorOrOperation>)[0] as Tensor;
var loss = tf.losses.sparse_softmax_cross_entropy(labels: labels,
logits: logits);
});
}
} }
} }

+ 2
- 2
test/TensorFlowNET.UnitTest/control_flow_ops_test/WhileContextTestCase.cs View File

@@ -25,10 +25,10 @@ namespace TensorFlowNET.UnitTest.control_flow_ops_test
foreach (Operation op in sess.graph.get_operations()) foreach (Operation op in sess.graph.get_operations())
{ {
var control_flow_context = op._get_control_flow_context(); var control_flow_context = op._get_control_flow_context();
if (control_flow_context != null)
/*if (control_flow_context != null)
self.assertProtoEquals(control_flow_context.to_proto(), self.assertProtoEquals(control_flow_context.to_proto(),
WhileContext.from_proto( WhileContext.from_proto(
control_flow_context.to_proto()).to_proto());
control_flow_context.to_proto()).to_proto(), "");*/
} }
}); });
} }


Loading…
Cancel
Save