using System; using System.Collections.Generic; using System.IO; using System.Text; using Tensorflow.Keras.Utils; using Tensorflow.NumPy; using System.Linq; namespace Tensorflow.Keras.Datasets { /// /// This is a dataset of 25,000 movies reviews from IMDB, labeled by sentiment /// (positive/negative). Reviews have been preprocessed, and each review is /// encoded as a list of word indexes(integers). /// public class Imdb { string origin_folder = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/"; string file_name = "imdb.npz"; string dest_folder = "imdb"; /// /// Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/). /// /// /// /// /// /// /// /// /// /// public DatasetPass load_data(string path = "imdb.npz", int num_words = -1, int skip_top = 0, int maxlen = -1, int seed = 113, int start_char = 1, int oov_char= 2, int index_from = 3) { if (maxlen == -1) throw new InvalidArgumentError("maxlen must be assigned."); var dst = Download(); var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt")); var x_train_string = new string[lines.Length]; var y_train = np.zeros(new int[] { lines.Length }, np.int64); for (int i = 0; i < lines.Length; i++) { y_train[i] = long.Parse(lines[i].Substring(0, 1)); x_train_string[i] = lines[i].Substring(2); } var x_train = keras.preprocessing.sequence.pad_sequences(PraseData(x_train_string), maxlen: maxlen); File.ReadAllLines(Path.Combine(dst, "imdb_test.txt")); var x_test_string = new string[lines.Length]; var y_test = np.zeros(new int[] { lines.Length }, np.int64); for (int i = 0; i < lines.Length; i++) { y_test[i] = long.Parse(lines[i].Substring(0, 1)); x_test_string[i] = lines[i].Substring(2); } var x_test = keras.preprocessing.sequence.pad_sequences(PraseData(x_test_string), maxlen: maxlen); return new DatasetPass { Train = (x_train, y_train), Test = (x_test, y_test) }; } (NDArray, NDArray) LoadX(byte[] bytes) { var y = np.Load_Npz(bytes); return (y["x_train.npy"], y["x_test.npy"]); } (NDArray, NDArray) LoadY(byte[] bytes) { var y = np.Load_Npz(bytes); return (y["y_train.npy"], y["y_test.npy"]); } string Download() { var dst = Path.Combine(Path.GetTempPath(), dest_folder); Directory.CreateDirectory(dst); Web.Download(origin_folder + file_name, dst, file_name); return dst; // return Path.Combine(dst, file_name); } protected IEnumerable PraseData(string[] x) { var data_list = new List(); for (int i = 0; i < len(x); i++) { var list_string = x[i]; var cleaned_list_string = list_string.Replace("[", "").Replace("]", "").Replace(" ", ""); string[] number_strings = cleaned_list_string.Split(','); int[] numbers = new int[number_strings.Length]; for (int j = 0; j < number_strings.Length; j++) { numbers[j] = int.Parse(number_strings[j]); } data_list.Add(numbers); } return data_list; } } }