@@ -15,6 +15,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TensorFlowNET.Visualization | |||||
EndProject | EndProject | ||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NumSharp.Core", "..\NumSharp\src\NumSharp.Core\NumSharp.Core.csproj", "{E8340C61-12C1-4BEE-A340-403E7C1ACD82}" | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NumSharp.Core", "..\NumSharp\src\NumSharp.Core\NumSharp.Core.csproj", "{E8340C61-12C1-4BEE-A340-403E7C1ACD82}" | ||||
EndProject | EndProject | ||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "scikit-learn", "..\scikit-learn.net\src\scikit-learn\scikit-learn.csproj", "{199DDAD8-4A6F-43B3-A560-C0393619E304}" | |||||
EndProject | |||||
Global | Global | ||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||||
Debug|Any CPU = Debug|Any CPU | Debug|Any CPU = Debug|Any CPU | ||||
@@ -45,6 +47,10 @@ Global | |||||
{E8340C61-12C1-4BEE-A340-403E7C1ACD82}.Debug|Any CPU.Build.0 = Debug|Any CPU | {E8340C61-12C1-4BEE-A340-403E7C1ACD82}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||||
{E8340C61-12C1-4BEE-A340-403E7C1ACD82}.Release|Any CPU.ActiveCfg = Release|Any CPU | {E8340C61-12C1-4BEE-A340-403E7C1ACD82}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||||
{E8340C61-12C1-4BEE-A340-403E7C1ACD82}.Release|Any CPU.Build.0 = Release|Any CPU | {E8340C61-12C1-4BEE-A340-403E7C1ACD82}.Release|Any CPU.Build.0 = Release|Any CPU | ||||
{199DDAD8-4A6F-43B3-A560-C0393619E304}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | |||||
{199DDAD8-4A6F-43B3-A560-C0393619E304}.Debug|Any CPU.Build.0 = Debug|Any CPU | |||||
{199DDAD8-4A6F-43B3-A560-C0393619E304}.Release|Any CPU.ActiveCfg = Release|Any CPU | |||||
{199DDAD8-4A6F-43B3-A560-C0393619E304}.Release|Any CPU.Build.0 = Release|Any CPU | |||||
EndGlobalSection | EndGlobalSection | ||||
GlobalSection(SolutionProperties) = preSolution | GlobalSection(SolutionProperties) = preSolution | ||||
HideSolutionNode = FALSE | HideSolutionNode = FALSE | ||||
@@ -1,6 +1,7 @@ | |||||
using System; | using System; | ||||
using System.Collections.Generic; | using System.Collections.Generic; | ||||
using System.IO; | using System.IO; | ||||
using System.Linq; | |||||
using System.Net; | using System.Net; | ||||
using System.Text; | using System.Text; | ||||
using System.Threading; | using System.Threading; | ||||
@@ -10,24 +11,31 @@ namespace TensorFlowNET.Utility | |||||
{ | { | ||||
public class Web | public class Web | ||||
{ | { | ||||
public static bool Download(string url, string file) | |||||
public static bool Download(string url, string destDir, string destFileName) | |||||
{ | { | ||||
if (File.Exists(file)) | |||||
if (destFileName == null) | |||||
destFileName = url.Split(Path.DirectorySeparatorChar).Last(); | |||||
Directory.CreateDirectory(destDir); | |||||
string relativeFilePath = Path.Combine(destDir, destFileName); | |||||
if (File.Exists(relativeFilePath)) | |||||
{ | { | ||||
Console.WriteLine($"{file} already exists."); | |||||
Console.WriteLine($"{relativeFilePath} already exists."); | |||||
return false; | return false; | ||||
} | } | ||||
var wc = new WebClient(); | var wc = new WebClient(); | ||||
Console.WriteLine($"Downloading {file}"); | |||||
var download = Task.Run(() => wc.DownloadFile(url, file)); | |||||
Console.WriteLine($"Downloading {relativeFilePath}"); | |||||
var download = Task.Run(() => wc.DownloadFile(url, relativeFilePath)); | |||||
while (!download.IsCompleted) | while (!download.IsCompleted) | ||||
{ | { | ||||
Thread.Sleep(1000); | Thread.Sleep(1000); | ||||
Console.Write("."); | Console.Write("."); | ||||
} | } | ||||
Console.WriteLine(""); | Console.WriteLine(""); | ||||
Console.WriteLine($"Downloaded {file}"); | |||||
Console.WriteLine($"Downloaded {relativeFilePath}"); | |||||
return true; | return true; | ||||
} | } | ||||
@@ -1,58 +0,0 @@ | |||||
using NumSharp.Core; | |||||
using System; | |||||
using System.Collections.Generic; | |||||
using System.Linq; | |||||
using System.Text; | |||||
using Tensorflow; | |||||
namespace TensorFlowNET.Examples.CnnTextClassification | |||||
{ | |||||
public class CnnTextTrain : Python, IExample | |||||
{ | |||||
// Percentage of the training data to use for validation | |||||
private float dev_sample_percentage = 0.1f; | |||||
// Data source for the positive data. | |||||
private string positive_data_file = "https://raw.githubusercontent.com/dennybritz/cnn-text-classification-tf/master/data/rt-polaritydata/rt-polarity.pos"; | |||||
// Data source for the negative data. | |||||
private string negative_data_file = "https://raw.githubusercontent.com/dennybritz/cnn-text-classification-tf/master/data/rt-polaritydata/rt-polarity.neg"; | |||||
// Dimensionality of character embedding (default: 128) | |||||
private int embedding_dim = 128; | |||||
// Comma-separated filter sizes (default: '3,4,5') | |||||
private string filter_sizes = "3,4,5"; | |||||
// Number of filters per filter size (default: 128) | |||||
private int num_filters = 128; | |||||
// Dropout keep probability (default: 0.5) | |||||
private float dropout_keep_prob = 0.5f; | |||||
// L2 regularization lambda (default: 0.0) | |||||
private float l2_reg_lambda = 0.0f; | |||||
// Batch Size (default: 64) | |||||
private int batch_size = 64; | |||||
// Number of training epochs (default: 200) | |||||
private int num_epochs = 200; | |||||
// Evaluate model on dev set after this many steps (default: 100) | |||||
private int evaluate_every = 100; | |||||
// Save model after this many steps (default: 100) | |||||
private int checkpoint_every = 100; | |||||
// Number of checkpoints to store (default: 5) | |||||
private int num_checkpoints = 5; | |||||
// Allow device soft device placement | |||||
private bool allow_soft_placement = true; | |||||
// Log placement of ops on devices | |||||
private bool log_device_placement = false; | |||||
public void Run() | |||||
{ | |||||
var (x_train, y_train, vocab_processor, x_dev, y_dev) = preprocess(); | |||||
} | |||||
public (NDArray, NDArray, NDArray, NDArray, NDArray) preprocess() | |||||
{ | |||||
var (x_text, y) = DataHelpers.load_data_and_labels(positive_data_file, negative_data_file); | |||||
// Build vocabulary | |||||
int max_document_length = x_text.Select(x => x.Split(' ').Length).Max(); | |||||
var vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) | |||||
throw new NotImplementedException(""); | |||||
} | |||||
} | |||||
} |
@@ -1,16 +0,0 @@ | |||||
using System; | |||||
using System.Collections.Generic; | |||||
using System.Text; | |||||
using Tensorflow; | |||||
namespace TensorFlowNET.Examples.CnnTextClassification | |||||
{ | |||||
/// <summary> | |||||
/// Convolutional Neural Network for Text Classification | |||||
/// https://github.com/dennybritz/cnn-text-classification-tf | |||||
/// </summary> | |||||
public class TextCNN : Python | |||||
{ | |||||
} | |||||
} |
@@ -85,15 +85,14 @@ namespace TensorFlowNET.Examples | |||||
// get model file | // get model file | ||||
string url = "https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip"; | string url = "https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip"; | ||||
string zipFile = Path.Join(dir, "inception5h.zip"); | |||||
Utility.Web.Download(url, zipFile); | |||||
Utility.Web.Download(url, dir, "inception5h.zip"); | |||||
Utility.Compress.UnZip(zipFile, dir); | |||||
Utility.Compress.UnZip(Path.Join(dir, "inception5h.zip"), dir); | |||||
// download sample picture | // download sample picture | ||||
string pic = Path.Join(dir, "img", "grace_hopper.jpg"); | |||||
Directory.CreateDirectory(Path.Join(dir, "img")); | Directory.CreateDirectory(Path.Join(dir, "img")); | ||||
Utility.Web.Download($"https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/label_image/data/grace_hopper.jpg", pic); | |||||
url = $"https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/label_image/data/grace_hopper.jpg"; | |||||
Utility.Web.Download(url, Path.Join(dir, "img"), "grace_hopper.jpg"); | |||||
} | } | ||||
} | } | ||||
} | } |
@@ -90,14 +90,14 @@ namespace TensorFlowNET.Examples | |||||
// get model file | // get model file | ||||
string url = "https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz"; | string url = "https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz"; | ||||
string zipFile = Path.Join(dir, $"{pbFile}.tar.gz"); | |||||
Utility.Web.Download(url, zipFile); | |||||
Utility.Web.Download(url, dir, $"{pbFile}.tar.gz"); | |||||
Utility.Compress.ExtractTGZ(zipFile, dir); | |||||
Utility.Compress.ExtractTGZ(Path.Join(dir, $"{pbFile}.tar.gz"), dir); | |||||
// download sample picture | // download sample picture | ||||
string pic = "grace_hopper.jpg"; | string pic = "grace_hopper.jpg"; | ||||
Utility.Web.Download($"https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/label_image/data/{pic}", Path.Join(dir, pic)); | |||||
url = $"https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/label_image/data/{pic}"; | |||||
Utility.Web.Download(url, dir, pic); | |||||
} | } | ||||
} | } | ||||
} | } |
@@ -12,6 +12,7 @@ | |||||
<ItemGroup> | <ItemGroup> | ||||
<ProjectReference Include="..\..\..\NumSharp\src\NumSharp.Core\NumSharp.Core.csproj" /> | <ProjectReference Include="..\..\..\NumSharp\src\NumSharp.Core\NumSharp.Core.csproj" /> | ||||
<ProjectReference Include="..\..\..\scikit-learn.net\src\scikit-learn\scikit-learn.csproj" /> | |||||
<ProjectReference Include="..\..\src\TensorFlowNET.Core\TensorFlowNET.Core.csproj" /> | <ProjectReference Include="..\..\src\TensorFlowNET.Core\TensorFlowNET.Core.csproj" /> | ||||
<ProjectReference Include="..\..\src\TensorFlowNET.Utility\TensorFlowNET.Utility.csproj" /> | <ProjectReference Include="..\..\src\TensorFlowNET.Utility\TensorFlowNET.Utility.csproj" /> | ||||
</ItemGroup> | </ItemGroup> | ||||
@@ -10,6 +10,44 @@ namespace TensorFlowNET.Examples.CnnTextClassification | |||||
{ | { | ||||
public class DataHelpers | public class DataHelpers | ||||
{ | { | ||||
private const string TRAIN_PATH = "text_classification/dbpedia_csv/train.csv"; | |||||
private const string TEST_PATH = "text_classification/dbpedia_csv/test.csv"; | |||||
public static (int[][], int[], int) build_char_dataset(string step, string model, int document_max_len) | |||||
{ | |||||
string alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:’'\"/|_#$%ˆ&*˜‘+=<>()[]{} "; | |||||
/*if (step == "train") | |||||
df = pd.read_csv(TRAIN_PATH, names =["class", "title", "content"]);*/ | |||||
var char_dict = new Dictionary<string, int>(); | |||||
char_dict["<pad>"] = 0; | |||||
char_dict["<unk>"] = 1; | |||||
foreach (char c in alphabet) | |||||
char_dict[c.ToString()] = char_dict.Count; | |||||
var contents = File.ReadAllLines(TRAIN_PATH); | |||||
var x = new int[contents.Length][]; | |||||
var y = new int[contents.Length]; | |||||
for (int i = 0; i < contents.Length; i++) | |||||
{ | |||||
string[] parts = contents[i].ToLower().Split(",\"").ToArray(); | |||||
string content = parts[2]; | |||||
content = content.Substring(0, content.Length - 1); | |||||
x[i] = new int[document_max_len]; | |||||
for (int j = 0; j < document_max_len; j++) | |||||
{ | |||||
if (j >= content.Length) | |||||
x[i][j] = char_dict["<pad>"]; | |||||
else | |||||
x[i][j] = char_dict.ContainsKey(content[j].ToString()) ? char_dict[content[j].ToString()] : char_dict["<unk>"]; | |||||
} | |||||
y[i] = int.Parse(parts[0]); | |||||
} | |||||
return (x, y, alphabet.Length + 2); | |||||
} | |||||
/// <summary> | /// <summary> | ||||
/// Loads MR polarity data from files, splits the data into words and generates labels. | /// Loads MR polarity data from files, splits the data into words and generates labels. | ||||
/// Returns split sentences and labels. | /// Returns split sentences and labels. | ||||
@@ -20,8 +58,8 @@ namespace TensorFlowNET.Examples.CnnTextClassification | |||||
public static (string[], NDArray) load_data_and_labels(string positive_data_file, string negative_data_file) | public static (string[], NDArray) load_data_and_labels(string positive_data_file, string negative_data_file) | ||||
{ | { | ||||
Directory.CreateDirectory("CnnTextClassification"); | Directory.CreateDirectory("CnnTextClassification"); | ||||
Utility.Web.Download(positive_data_file, "CnnTextClassification/rt-polarity.pos"); | |||||
Utility.Web.Download(negative_data_file, "CnnTextClassification/rt-polarity.neg"); | |||||
Utility.Web.Download(positive_data_file, "CnnTextClassification", "rt -polarity.pos"); | |||||
Utility.Web.Download(negative_data_file, "CnnTextClassification", "rt-polarity.neg"); | |||||
// Load data from files | // Load data from files | ||||
var positive_examples = File.ReadAllLines("CnnTextClassification/rt-polarity.pos") | var positive_examples = File.ReadAllLines("CnnTextClassification/rt-polarity.pos") |
@@ -0,0 +1,37 @@ | |||||
using NumSharp.Core; | |||||
using System; | |||||
using System.Collections.Generic; | |||||
using System.IO; | |||||
using System.Linq; | |||||
using System.Text; | |||||
using Tensorflow; | |||||
using TensorFlowNET.Utility; | |||||
namespace TensorFlowNET.Examples.CnnTextClassification | |||||
{ | |||||
/// <summary> | |||||
/// https://github.com/dongjun-Lee/text-classification-models-tf | |||||
/// </summary> | |||||
public class TextClassificationTrain : Python, IExample | |||||
{ | |||||
private string dataDir = "text_classification"; | |||||
private string dataFileName = "dbpedia_csv.tar.gz"; | |||||
private const int CHAR_MAX_LEN = 1014; | |||||
public void Run() | |||||
{ | |||||
download_dbpedia(); | |||||
Console.WriteLine("Building dataset..."); | |||||
var (x, y, alphabet_size) = DataHelpers.build_char_dataset("train", "vdcnn", CHAR_MAX_LEN); | |||||
var (train_x, valid_x, train_y, valid_y) = train_test_split(x, y, test_size: 0.15); | |||||
} | |||||
public void download_dbpedia() | |||||
{ | |||||
string url = "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz"; | |||||
Web.Download(url, dataDir, dataFileName); | |||||
Compress.ExtractTGZ(Path.Join(dataDir, dataFileName), dataDir); | |||||
} | |||||
} | |||||
} |
@@ -46,9 +46,8 @@ namespace TensorFlowNET.Examples | |||||
// get model file | // get model file | ||||
string url = $"https://github.com/SciSharp/TensorFlow.NET/raw/master/data/{dataFile}"; | string url = $"https://github.com/SciSharp/TensorFlow.NET/raw/master/data/{dataFile}"; | ||||
string zipFile = Path.Join(dir, $"imdb.zip"); | |||||
Utility.Web.Download(url, zipFile); | |||||
Utility.Compress.UnZip(zipFile, dir); | |||||
Utility.Web.Download(url, dir, "imdb.zip"); | |||||
Utility.Compress.UnZip(Path.Join(dir, $"imdb.zip"), dir); | |||||
// prepare training dataset | // prepare training dataset | ||||
var x_train = ReadData(Path.Join(dir, "x_train.txt")); | var x_train = ReadData(Path.Join(dir, "x_train.txt")); | ||||