@@ -15,6 +15,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TensorFlowNET.Visualization | |||
EndProject | |||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NumSharp.Core", "..\NumSharp\src\NumSharp.Core\NumSharp.Core.csproj", "{E8340C61-12C1-4BEE-A340-403E7C1ACD82}" | |||
EndProject | |||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "scikit-learn", "..\scikit-learn.net\src\scikit-learn\scikit-learn.csproj", "{199DDAD8-4A6F-43B3-A560-C0393619E304}" | |||
EndProject | |||
Global | |||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | |||
Debug|Any CPU = Debug|Any CPU | |||
@@ -45,6 +47,10 @@ Global | |||
{E8340C61-12C1-4BEE-A340-403E7C1ACD82}.Debug|Any CPU.Build.0 = Debug|Any CPU | |||
{E8340C61-12C1-4BEE-A340-403E7C1ACD82}.Release|Any CPU.ActiveCfg = Release|Any CPU | |||
{E8340C61-12C1-4BEE-A340-403E7C1ACD82}.Release|Any CPU.Build.0 = Release|Any CPU | |||
{199DDAD8-4A6F-43B3-A560-C0393619E304}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | |||
{199DDAD8-4A6F-43B3-A560-C0393619E304}.Debug|Any CPU.Build.0 = Debug|Any CPU | |||
{199DDAD8-4A6F-43B3-A560-C0393619E304}.Release|Any CPU.ActiveCfg = Release|Any CPU | |||
{199DDAD8-4A6F-43B3-A560-C0393619E304}.Release|Any CPU.Build.0 = Release|Any CPU | |||
EndGlobalSection | |||
GlobalSection(SolutionProperties) = preSolution | |||
HideSolutionNode = FALSE | |||
@@ -1,6 +1,7 @@ | |||
using System; | |||
using System.Collections.Generic; | |||
using System.IO; | |||
using System.Linq; | |||
using System.Net; | |||
using System.Text; | |||
using System.Threading; | |||
@@ -10,24 +11,31 @@ namespace TensorFlowNET.Utility | |||
{ | |||
public class Web | |||
{ | |||
public static bool Download(string url, string file) | |||
public static bool Download(string url, string destDir, string destFileName) | |||
{ | |||
if (File.Exists(file)) | |||
if (destFileName == null) | |||
destFileName = url.Split(Path.DirectorySeparatorChar).Last(); | |||
Directory.CreateDirectory(destDir); | |||
string relativeFilePath = Path.Combine(destDir, destFileName); | |||
if (File.Exists(relativeFilePath)) | |||
{ | |||
Console.WriteLine($"{file} already exists."); | |||
Console.WriteLine($"{relativeFilePath} already exists."); | |||
return false; | |||
} | |||
var wc = new WebClient(); | |||
Console.WriteLine($"Downloading {file}"); | |||
var download = Task.Run(() => wc.DownloadFile(url, file)); | |||
Console.WriteLine($"Downloading {relativeFilePath}"); | |||
var download = Task.Run(() => wc.DownloadFile(url, relativeFilePath)); | |||
while (!download.IsCompleted) | |||
{ | |||
Thread.Sleep(1000); | |||
Console.Write("."); | |||
} | |||
Console.WriteLine(""); | |||
Console.WriteLine($"Downloaded {file}"); | |||
Console.WriteLine($"Downloaded {relativeFilePath}"); | |||
return true; | |||
} | |||
@@ -1,58 +0,0 @@ | |||
using NumSharp.Core; | |||
using System; | |||
using System.Collections.Generic; | |||
using System.Linq; | |||
using System.Text; | |||
using Tensorflow; | |||
namespace TensorFlowNET.Examples.CnnTextClassification | |||
{ | |||
public class CnnTextTrain : Python, IExample | |||
{ | |||
// Percentage of the training data to use for validation | |||
private float dev_sample_percentage = 0.1f; | |||
// Data source for the positive data. | |||
private string positive_data_file = "https://raw.githubusercontent.com/dennybritz/cnn-text-classification-tf/master/data/rt-polaritydata/rt-polarity.pos"; | |||
// Data source for the negative data. | |||
private string negative_data_file = "https://raw.githubusercontent.com/dennybritz/cnn-text-classification-tf/master/data/rt-polaritydata/rt-polarity.neg"; | |||
// Dimensionality of character embedding (default: 128) | |||
private int embedding_dim = 128; | |||
// Comma-separated filter sizes (default: '3,4,5') | |||
private string filter_sizes = "3,4,5"; | |||
// Number of filters per filter size (default: 128) | |||
private int num_filters = 128; | |||
// Dropout keep probability (default: 0.5) | |||
private float dropout_keep_prob = 0.5f; | |||
// L2 regularization lambda (default: 0.0) | |||
private float l2_reg_lambda = 0.0f; | |||
// Batch Size (default: 64) | |||
private int batch_size = 64; | |||
// Number of training epochs (default: 200) | |||
private int num_epochs = 200; | |||
// Evaluate model on dev set after this many steps (default: 100) | |||
private int evaluate_every = 100; | |||
// Save model after this many steps (default: 100) | |||
private int checkpoint_every = 100; | |||
// Number of checkpoints to store (default: 5) | |||
private int num_checkpoints = 5; | |||
// Allow device soft device placement | |||
private bool allow_soft_placement = true; | |||
// Log placement of ops on devices | |||
private bool log_device_placement = false; | |||
public void Run() | |||
{ | |||
var (x_train, y_train, vocab_processor, x_dev, y_dev) = preprocess(); | |||
} | |||
public (NDArray, NDArray, NDArray, NDArray, NDArray) preprocess() | |||
{ | |||
var (x_text, y) = DataHelpers.load_data_and_labels(positive_data_file, negative_data_file); | |||
// Build vocabulary | |||
int max_document_length = x_text.Select(x => x.Split(' ').Length).Max(); | |||
var vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) | |||
throw new NotImplementedException(""); | |||
} | |||
} | |||
} |
@@ -1,16 +0,0 @@ | |||
using System; | |||
using System.Collections.Generic; | |||
using System.Text; | |||
using Tensorflow; | |||
namespace TensorFlowNET.Examples.CnnTextClassification | |||
{ | |||
/// <summary> | |||
/// Convolutional Neural Network for Text Classification | |||
/// https://github.com/dennybritz/cnn-text-classification-tf | |||
/// </summary> | |||
public class TextCNN : Python | |||
{ | |||
} | |||
} |
@@ -85,15 +85,14 @@ namespace TensorFlowNET.Examples | |||
// get model file | |||
string url = "https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip"; | |||
string zipFile = Path.Join(dir, "inception5h.zip"); | |||
Utility.Web.Download(url, zipFile); | |||
Utility.Web.Download(url, dir, "inception5h.zip"); | |||
Utility.Compress.UnZip(zipFile, dir); | |||
Utility.Compress.UnZip(Path.Join(dir, "inception5h.zip"), dir); | |||
// download sample picture | |||
string pic = Path.Join(dir, "img", "grace_hopper.jpg"); | |||
Directory.CreateDirectory(Path.Join(dir, "img")); | |||
Utility.Web.Download($"https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/label_image/data/grace_hopper.jpg", pic); | |||
url = $"https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/label_image/data/grace_hopper.jpg"; | |||
Utility.Web.Download(url, Path.Join(dir, "img"), "grace_hopper.jpg"); | |||
} | |||
} | |||
} |
@@ -90,14 +90,14 @@ namespace TensorFlowNET.Examples | |||
// get model file | |||
string url = "https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz"; | |||
string zipFile = Path.Join(dir, $"{pbFile}.tar.gz"); | |||
Utility.Web.Download(url, zipFile); | |||
Utility.Web.Download(url, dir, $"{pbFile}.tar.gz"); | |||
Utility.Compress.ExtractTGZ(zipFile, dir); | |||
Utility.Compress.ExtractTGZ(Path.Join(dir, $"{pbFile}.tar.gz"), dir); | |||
// download sample picture | |||
string pic = "grace_hopper.jpg"; | |||
Utility.Web.Download($"https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/label_image/data/{pic}", Path.Join(dir, pic)); | |||
url = $"https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/examples/label_image/data/{pic}"; | |||
Utility.Web.Download(url, dir, pic); | |||
} | |||
} | |||
} |
@@ -12,6 +12,7 @@ | |||
<ItemGroup> | |||
<ProjectReference Include="..\..\..\NumSharp\src\NumSharp.Core\NumSharp.Core.csproj" /> | |||
<ProjectReference Include="..\..\..\scikit-learn.net\src\scikit-learn\scikit-learn.csproj" /> | |||
<ProjectReference Include="..\..\src\TensorFlowNET.Core\TensorFlowNET.Core.csproj" /> | |||
<ProjectReference Include="..\..\src\TensorFlowNET.Utility\TensorFlowNET.Utility.csproj" /> | |||
</ItemGroup> | |||
@@ -10,6 +10,44 @@ namespace TensorFlowNET.Examples.CnnTextClassification | |||
{ | |||
public class DataHelpers | |||
{ | |||
private const string TRAIN_PATH = "text_classification/dbpedia_csv/train.csv"; | |||
private const string TEST_PATH = "text_classification/dbpedia_csv/test.csv"; | |||
public static (int[][], int[], int) build_char_dataset(string step, string model, int document_max_len) | |||
{ | |||
string alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:’'\"/|_#$%ˆ&*˜‘+=<>()[]{} "; | |||
/*if (step == "train") | |||
df = pd.read_csv(TRAIN_PATH, names =["class", "title", "content"]);*/ | |||
var char_dict = new Dictionary<string, int>(); | |||
char_dict["<pad>"] = 0; | |||
char_dict["<unk>"] = 1; | |||
foreach (char c in alphabet) | |||
char_dict[c.ToString()] = char_dict.Count; | |||
var contents = File.ReadAllLines(TRAIN_PATH); | |||
var x = new int[contents.Length][]; | |||
var y = new int[contents.Length]; | |||
for (int i = 0; i < contents.Length; i++) | |||
{ | |||
string[] parts = contents[i].ToLower().Split(",\"").ToArray(); | |||
string content = parts[2]; | |||
content = content.Substring(0, content.Length - 1); | |||
x[i] = new int[document_max_len]; | |||
for (int j = 0; j < document_max_len; j++) | |||
{ | |||
if (j >= content.Length) | |||
x[i][j] = char_dict["<pad>"]; | |||
else | |||
x[i][j] = char_dict.ContainsKey(content[j].ToString()) ? char_dict[content[j].ToString()] : char_dict["<unk>"]; | |||
} | |||
y[i] = int.Parse(parts[0]); | |||
} | |||
return (x, y, alphabet.Length + 2); | |||
} | |||
/// <summary> | |||
/// Loads MR polarity data from files, splits the data into words and generates labels. | |||
/// Returns split sentences and labels. | |||
@@ -20,8 +58,8 @@ namespace TensorFlowNET.Examples.CnnTextClassification | |||
public static (string[], NDArray) load_data_and_labels(string positive_data_file, string negative_data_file) | |||
{ | |||
Directory.CreateDirectory("CnnTextClassification"); | |||
Utility.Web.Download(positive_data_file, "CnnTextClassification/rt-polarity.pos"); | |||
Utility.Web.Download(negative_data_file, "CnnTextClassification/rt-polarity.neg"); | |||
Utility.Web.Download(positive_data_file, "CnnTextClassification", "rt -polarity.pos"); | |||
Utility.Web.Download(negative_data_file, "CnnTextClassification", "rt-polarity.neg"); | |||
// Load data from files | |||
var positive_examples = File.ReadAllLines("CnnTextClassification/rt-polarity.pos") |
@@ -0,0 +1,37 @@ | |||
using NumSharp.Core; | |||
using System; | |||
using System.Collections.Generic; | |||
using System.IO; | |||
using System.Linq; | |||
using System.Text; | |||
using Tensorflow; | |||
using TensorFlowNET.Utility; | |||
namespace TensorFlowNET.Examples.CnnTextClassification | |||
{ | |||
/// <summary> | |||
/// https://github.com/dongjun-Lee/text-classification-models-tf | |||
/// </summary> | |||
public class TextClassificationTrain : Python, IExample | |||
{ | |||
private string dataDir = "text_classification"; | |||
private string dataFileName = "dbpedia_csv.tar.gz"; | |||
private const int CHAR_MAX_LEN = 1014; | |||
public void Run() | |||
{ | |||
download_dbpedia(); | |||
Console.WriteLine("Building dataset..."); | |||
var (x, y, alphabet_size) = DataHelpers.build_char_dataset("train", "vdcnn", CHAR_MAX_LEN); | |||
var (train_x, valid_x, train_y, valid_y) = train_test_split(x, y, test_size: 0.15); | |||
} | |||
public void download_dbpedia() | |||
{ | |||
string url = "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz"; | |||
Web.Download(url, dataDir, dataFileName); | |||
Compress.ExtractTGZ(Path.Join(dataDir, dataFileName), dataDir); | |||
} | |||
} | |||
} |
@@ -46,9 +46,8 @@ namespace TensorFlowNET.Examples | |||
// get model file | |||
string url = $"https://github.com/SciSharp/TensorFlow.NET/raw/master/data/{dataFile}"; | |||
string zipFile = Path.Join(dir, $"imdb.zip"); | |||
Utility.Web.Download(url, zipFile); | |||
Utility.Compress.UnZip(zipFile, dir); | |||
Utility.Web.Download(url, dir, "imdb.zip"); | |||
Utility.Compress.UnZip(Path.Join(dir, $"imdb.zip"), dir); | |||
// prepare training dataset | |||
var x_train = ReadData(Path.Join(dir, "x_train.txt")); | |||