Browse Source

fix keras.preprocessing.sequence

tags/v0.8.0
haiping008 6 years ago
parent
commit
031670b168
12 changed files with 216 additions and 28 deletions
  1. +5
    -5
      TensorFlow.NET.sln
  2. +15
    -0
      src/TensorFlowNET.Core/APIs/keras.preprocessing.cs
  3. +17
    -0
      src/TensorFlowNET.Core/Keras/Engine/Layer.cs
  4. +10
    -0
      src/TensorFlowNET.Core/Keras/Engine/Model.cs
  5. +10
    -0
      src/TensorFlowNET.Core/Keras/Engine/Network.cs
  6. +24
    -0
      src/TensorFlowNET.Core/Keras/Engine/Sequential.cs
  7. +11
    -0
      src/TensorFlowNET.Core/Keras/Preprocessing.cs
  8. +77
    -0
      src/TensorFlowNET.Core/Keras/Sequence.cs
  9. +1
    -1
      src/TensorFlowNET.Core/TensorFlowNET.Core.csproj
  10. +10
    -0
      src/TensorFlowNET.Core/Train/Checkpointable/CheckpointableBase.cs
  11. +1
    -1
      test/TensorFlowNET.Examples/TensorFlowNET.Examples.csproj
  12. +35
    -21
      test/TensorFlowNET.Examples/TextClassificationWithMovieReviews.cs

+ 5
- 5
TensorFlow.NET.sln View File

@@ -13,7 +13,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TensorFlowNET.Utility", "sr
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TensorFlowNET.Visualization", "TensorFlowNET.Visualization\TensorFlowNET.Visualization.csproj", "{4BB2ABD1-635E-41E4-B534-CB5B6A2D754D}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "KerasNET.Core", "..\Keras.NET\src\KerasNET.Core\KerasNET.Core.csproj", "{E2F0C39C-D706-4CF5-AE00-81FB447F949D}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NumSharp.Core", "..\NumSharp\src\NumSharp.Core\NumSharp.Core.csproj", "{0AB4662E-7E3C-455F-BF0C-23D56CBE74F3}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -41,10 +41,10 @@ Global
{4BB2ABD1-635E-41E4-B534-CB5B6A2D754D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{4BB2ABD1-635E-41E4-B534-CB5B6A2D754D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{4BB2ABD1-635E-41E4-B534-CB5B6A2D754D}.Release|Any CPU.Build.0 = Release|Any CPU
{E2F0C39C-D706-4CF5-AE00-81FB447F949D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E2F0C39C-D706-4CF5-AE00-81FB447F949D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E2F0C39C-D706-4CF5-AE00-81FB447F949D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E2F0C39C-D706-4CF5-AE00-81FB447F949D}.Release|Any CPU.Build.0 = Release|Any CPU
{0AB4662E-7E3C-455F-BF0C-23D56CBE74F3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{0AB4662E-7E3C-455F-BF0C-23D56CBE74F3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0AB4662E-7E3C-455F-BF0C-23D56CBE74F3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0AB4662E-7E3C-455F-BF0C-23D56CBE74F3}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE


+ 15
- 0
src/TensorFlowNET.Core/APIs/keras.preprocessing.cs View File

@@ -0,0 +1,15 @@
using System;
using System.Collections.Generic;
using System.Text;
using Tensorflow.Keras;
using Tensorflow.Keras.Engine;

namespace Tensorflow
{
public static partial class keras
{
public static Preprocessing preprocessing => new Preprocessing();
public static Sequence sequence = new Sequence();
public static Sequential Sequential() => new Sequential();
}
}

+ 17
- 0
src/TensorFlowNET.Core/Keras/Engine/Layer.cs View File

@@ -0,0 +1,17 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Tensorflow.Keras.Engine
{
/// <summary>
/// Base layer class.
/// A layer is a class implementing common neural networks operations, such
/// as convolution, batch norm, etc. These operations require managing weights,
/// losses, updates, and inter-layer connectivity.
/// </summary>
public class Layer : CheckpointableBase
{

}
}

+ 10
- 0
src/TensorFlowNET.Core/Keras/Engine/Model.cs View File

@@ -0,0 +1,10 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Tensorflow.Keras.Engine
{
internal class Model : Network
{
}
}

+ 10
- 0
src/TensorFlowNET.Core/Keras/Engine/Network.cs View File

@@ -0,0 +1,10 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Tensorflow.Keras.Engine
{
public class Network : Layer
{
}
}

+ 24
- 0
src/TensorFlowNET.Core/Keras/Engine/Sequential.cs View File

@@ -0,0 +1,24 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Tensorflow.Keras.Engine
{
public class Sequential : Network, IPython
{
public void Dispose()
{
throw new NotImplementedException();
}

public void __enter__()
{
throw new NotImplementedException();
}

public void __exit__()
{
throw new NotImplementedException();
}
}
}

+ 11
- 0
src/TensorFlowNET.Core/Keras/Preprocessing.cs View File

@@ -0,0 +1,11 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Tensorflow.Keras
{
public class Preprocessing
{
public Sequence sequence => new Sequence();
}
}

+ 77
- 0
src/TensorFlowNET.Core/Keras/Sequence.cs View File

@@ -0,0 +1,77 @@
using NumSharp.Core;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace Tensorflow.Keras
{
public class Sequence
{
/// <summary>
/// Pads sequences to the same length.
/// https://keras.io/preprocessing/sequence/
/// https://faroit.github.io/keras-docs/1.2.0/preprocessing/sequence/
/// </summary>
/// <param name="sequences">List of lists, where each element is a sequence.</param>
/// <param name="maxlen">Int, maximum length of all sequences.</param>
/// <param name="dtype">Type of the output sequences.</param>
/// <param name="padding">String, 'pre' or 'post':</param>
/// <param name="truncating">String, 'pre' or 'post'</param>
/// <param name="value">Float or String, padding value.</param>
/// <returns></returns>
public NDArray pad_sequences(NDArray sequences,
int? maxlen = null,
string dtype = "int32",
string padding = "pre",
string truncating = "pre",
object value = null)
{
int[] length = new int[sequences.size];
switch (sequences.dtype.Name)
{
case "Object":
for (int i = 0; i < sequences.size; i++)
{
switch (sequences.Data<object>(i))
{
case string data:
length[i] = Regex.Matches(data, ",").Count;
break;
}
}
break;
case "Int32":
for (int i = 0; i < sequences.size; i++)
length[i] = Regex.Matches(sequences.Data<object>(i).ToString(), ",").Count;
break;
default:
throw new NotImplementedException($"pad_sequences: {sequences.dtype.Name}");
}

if (maxlen == null)
maxlen = length.Max();

if (value == null)
value = 0f;

var nd = new NDArray(np.int32, new Shape(sequences.size, maxlen.Value));
for (int i = 0; i < nd.shape[0]; i++)
{
switch(sequences[i])
{
case int[] data:
for (int j = 0; j < nd.shape[1]; j++)
nd[i, j] = j < data.Length ? data[j] : value;
break;
default:
throw new NotImplementedException("pad_sequences");
}
}

return nd;
}
}
}

+ 1
- 1
src/TensorFlowNET.Core/TensorFlowNET.Core.csproj View File

@@ -52,7 +52,7 @@ Docs: https://tensorflownet.readthedocs.io</Description>
</ItemGroup>

<ItemGroup>
<Folder Include="APIs\Keras\" />
<ProjectReference Include="..\..\..\NumSharp\src\NumSharp.Core\NumSharp.Core.csproj" />
</ItemGroup>

</Project>

+ 10
- 0
src/TensorFlowNET.Core/Train/Checkpointable/CheckpointableBase.cs View File

@@ -0,0 +1,10 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Tensorflow
{
public class CheckpointableBase
{
}
}

+ 1
- 1
test/TensorFlowNET.Examples/TensorFlowNET.Examples.csproj View File

@@ -11,7 +11,7 @@
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\..\Keras.NET\src\KerasNET.Core\KerasNET.Core.csproj" />
<ProjectReference Include="..\..\..\NumSharp\src\NumSharp.Core\NumSharp.Core.csproj" />
<ProjectReference Include="..\..\src\TensorFlowNET.Core\TensorFlowNET.Core.csproj" />
<ProjectReference Include="..\..\src\TensorFlowNET.Utility\TensorFlowNET.Utility.csproj" />
</ItemGroup>


+ 35
- 21
test/TensorFlowNET.Examples/TextClassificationWithMovieReviews.cs View File

@@ -1,12 +1,10 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Tensorflow;
using NumSharp.Core;
using Newtonsoft.Json;
using System.Linq;
using Keras;
using System.Text.RegularExpressions;

namespace TensorFlowNET.Examples
@@ -24,11 +22,21 @@ namespace TensorFlowNET.Examples

// A dictionary mapping words to an integer index
var word_index = GetWordIndex();
train_data = keras.preprocessing.sequence.pad_sequences(train_data,
value: word_index["<PAD>"],
padding: "post",
maxlen: 256);

test_data = keras.preprocessing.sequence.pad_sequences(test_data,
value: word_index["<PAD>"],
padding: "post",
maxlen: 256);

// input shape is the vocabulary count used for the movie reviews (10,000 words)
int vocab_size = 10000;

var model = keras.Sequential();
}

private ((NDArray, NDArray), (NDArray, NDArray)) PrepareData()
@@ -46,42 +54,48 @@ namespace TensorFlowNET.Examples
var x_train = ReadData(Path.Join(dir, "x_train.txt"));
var labels_train = ReadData(Path.Join(dir, "y_train.txt"));
var indices_train = ReadData(Path.Join(dir, "indices_train.txt"));
// x_train = x_train[indices_train];
// labels_train = labels_train[indices_train];
x_train = x_train[indices_train];
labels_train = labels_train[indices_train];

var x_test = ReadData(Path.Join(dir, "x_test.txt"));
var labels_test = ReadData(Path.Join(dir, "y_test.txt"));
var indices_test = ReadData(Path.Join(dir, "indices_test.txt"));
// x_test = x_test[indices_test];
// labels_test = labels_test[indices_test];
x_test = x_test[indices_test];
labels_test = labels_test[indices_test];

// not completed
/*var xs = x_train.hstack(x_test);
var xs = x_train.hstack(x_test);
var labels = labels_train.hstack(labels_test);

var idx = x_train.size;
var y_train = labels_train;
var y_test = labels_test;

return ((x_train, y_train), (x_test, y_test));*/

throw new NotImplementedException();
return ((x_train, y_train), (x_test, y_test));
}

private int[][] ReadData(string file)
private NDArray ReadData(string file)
{
var lines = new List<int[]>();
var lines = File.ReadAllLines(file);
var nd = new NDArray(lines[0].StartsWith("[") ? typeof(object) : np.int32, new Shape(lines.Length));

foreach(var line in File.ReadAllLines(file))
if (lines[0].StartsWith("["))
{
var matches = Regex.Matches(line, @"\d+,*");
var data = new int[matches.Count];
for (int i = 0; i < data.Length; i++)
data[i] = Convert.ToInt32(matches[i].Value.Trim(','));
lines.Add(data.ToArray());
for (int i = 0; i < lines.Length; i++)
{
var matches = Regex.Matches(lines[i], @"\d+\s*");
var data = new int[matches.Count];
for (int j = 0; j < data.Length; j++)
data[j] = Convert.ToInt32(matches[j].Value);
nd[i] = data.ToArray();
}
}

return lines.ToArray();
else
{
for (int i = 0; i < lines.Length; i++)
nd[i] = Convert.ToInt32(lines[i]);
}
return nd;
}

private Dictionary<string, int> GetWordIndex()


Loading…
Cancel
Save