From 83fe6f9f2127336910a264907886e6e4abab6ba3 Mon Sep 17 00:00:00 2001 From: FengZiYjun Date: Sun, 1 Jul 2018 18:28:09 +0800 Subject: [PATCH 01/13] combine controller and trainer --- fastNLP/action/action.py | 11 -- fastNLP/action/trainer.py | 174 ++++++++++++++++++++++++-------- fastNLP/loader/config_loader.py | 3 +- fastNLP/models/base_model.py | 95 ----------------- 4 files changed, 134 insertions(+), 149 deletions(-) diff --git a/fastNLP/action/action.py b/fastNLP/action/action.py index c85a74df..5512c7b1 100644 --- a/fastNLP/action/action.py +++ b/fastNLP/action/action.py @@ -1,4 +1,3 @@ -from saver.logger import Logger class Action(object): @@ -8,16 +7,6 @@ class Action(object): def __init__(self): super(Action, self).__init__() - self.logger = Logger("logger_output.txt") - - def load_config(self, args): - raise NotImplementedError - - def load_dataset(self, args): - raise NotImplementedError - - def log(self, string): - self.logger.log(string) def batchify(self, batch_size, X, Y=None): """ diff --git a/fastNLP/action/trainer.py b/fastNLP/action/trainer.py index b3640ba2..79f14df3 100644 --- a/fastNLP/action/trainer.py +++ b/fastNLP/action/trainer.py @@ -1,36 +1,56 @@ from collections import namedtuple -from .action import Action -from .tester import Tester +import numpy as np +import torch +from fastNLP.action.action import Action +from fastNLP.action.tester import Tester -class Trainer(Action): - """ - Trainer is a common training pipeline shared among all models. + +class BaseTrainer(Action): + """Base trainer for all trainers. + Trainer receives a model and data, and then performs training. + + Subclasses must implement the following abstract methods: + - prepare_input + - mode + - define_optimizer + - data_forward + - grad_backward + - get_loss """ TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better", "log_per_step", "log_validation", "batch_size"]) def __init__(self, train_args): """ - :param train_args: namedtuple + training parameters """ - super(Trainer, self).__init__() + super(BaseTrainer, self).__init__() self.n_epochs = train_args.epochs self.validate = train_args.validate - self.save_when_better = train_args.save_when_better - self.log_per_step = train_args.log_per_step - self.log_validation = train_args.log_validation self.batch_size = train_args.batch_size + self.model = None def train(self, network, train_data, dev_data=None): - """ - :param network: the models controller + """General training loop. + :param network: a model :param train_data: raw data for training :param dev_data: raw data for validation - This method will call all the base methods of network (implemented in models.base_model). + + The method is framework independent. + Work by calling the following methods: + - prepare_input + - mode + - define_optimizer + - data_forward + - get_loss + - grad_backward + - update + Subclasses must implement these methods with a specific framework. """ - train_x, train_y = network.prepare_input(train_data) + self.model = network + train_x, train_y = self.prepare_input(train_data) iterations, train_batch_generator = self.batchify(self.batch_size, train_x, train_y) @@ -39,55 +59,125 @@ class Trainer(Action): evaluator = Tester(test_args) best_loss = 1e10 - loss_history = list() for epoch in range(self.n_epochs): - network.mode(test=False) # turn on the train mode + self.mode(test=False) # turn on the train mode - network.define_optimizer() + self.define_optimizer() for step in range(iterations): batch_x, batch_y = train_batch_generator.__next__() - prediction = network.data_forward(batch_x) - - loss = network.get_loss(prediction, batch_y) - network.grad_backward() + prediction = self.data_forward(network, batch_x) - if step % self.log_per_step == 0: - print("step ", step) - loss_history.append(loss) - self.log(self.make_log(epoch, step, loss)) + loss = self.get_loss(prediction, batch_y) + self.grad_backward(loss) + self.update() - #################### evaluate over dev set ################### if self.validate: if dev_data is None: raise RuntimeError("No validation data provided.") - # give all controls to tester evaluator.test(network, dev_data) - - if self.log_validation: - self.log(self.make_valid_log(epoch, evaluator.loss)) if evaluator.loss < best_loss: best_loss = evaluator.loss - if self.save_when_better: - self.save_model(network) # finish training - def make_log(self, *args): - return "make a log" + def prepare_input(self, data): + """ + Perform data transformation from raw input to vector/matrix inputs. + :param data: raw inputs + :return (X, Y): tuple, input features and labels + """ + raise NotImplementedError - def make_valid_log(self, *args): - return "make a valid log" + def mode(self, test=False): + """ + Tell the network to be trained or not. + :param test: bool + """ + raise NotImplementedError - def save_model(self, model): - model.save() + def define_optimizer(self): + """ + Define framework-specific optimizer specified by the models. + """ + raise NotImplementedError - def load_data(self, data_name): - print("load data") + def update(self): + """ + Perform weight update on a model. - def load_config(self, args): + For PyTorch, just call optimizer to update. + """ raise NotImplementedError - def load_dataset(self, args): + def data_forward(self, network, *x): + """ + Forward pass of the data. + :param network: a model + :param x: input feature matrix and label vector + :return: output by the models + + For PyTorch, just do "network(*x)" + """ raise NotImplementedError + + def grad_backward(self, loss): + """ + Compute gradient with link rules. + :param loss: a scalar where back-prop starts + + For PyTorch, just do "loss.backward()" + """ + raise NotImplementedError + + def get_loss(self, predict, truth): + """ + Compute loss given prediction and ground truth. + :param predict: prediction label vector + :param truth: ground truth label vector + :return: a scalar + """ + raise NotImplementedError + + +class ToyTrainer(BaseTrainer): + """A simple trainer for a PyTorch model.""" + + def __init__(self, train_args): + super(ToyTrainer, self).__init__(train_args) + self.test_mode = False + self.weight = np.random.rand(5, 1) + self.bias = np.random.rand() + self._loss = 0 + self._optimizer = None + + def prepare_input(self, data): + return data[:, :-1], data[:, -1] + + def mode(self, test=False): + self.model.mode(test) + + def data_forward(self, network, *x): + return np.matmul(x, self.weight) + self.bias + + def grad_backward(self, loss): + loss.backward() + + def get_loss(self, pred, truth): + self._loss = np.mean(np.square(pred - truth)) + return self._loss + + def define_optimizer(self): + self._optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01) + + def update(self): + self._optimizer.step() + + +if __name__ == "__name__": + Config = namedtuple("config", ["epochs", "validate", "save_when_better", "log_per_step", + "log_validation", "batch_size"]) + train_config = Config(epochs=5, validate=True, save_when_better=True, log_per_step=10, log_validation=True, + batch_size=32) + trainer = ToyTrainer(train_config) diff --git a/fastNLP/loader/config_loader.py b/fastNLP/loader/config_loader.py index fa1d446d..0f40ec51 100644 --- a/fastNLP/loader/config_loader.py +++ b/fastNLP/loader/config_loader.py @@ -1,4 +1,4 @@ -from loader.base_loader import BaseLoader +from fastNLP.loader.base_loader import BaseLoader class ConfigLoader(BaseLoader): @@ -11,3 +11,4 @@ class ConfigLoader(BaseLoader): @staticmethod def parse(string): raise NotImplementedError + diff --git a/fastNLP/models/base_model.py b/fastNLP/models/base_model.py index 1a2782c3..9249e2e3 100644 --- a/fastNLP/models/base_model.py +++ b/fastNLP/models/base_model.py @@ -1,4 +1,3 @@ -import numpy as np import torch @@ -30,100 +29,6 @@ class BaseModel(torch.nn.Module): raise NotImplementedError -class BaseController(object): - """Base Controller for all controllers. - This class and its subclasses are actually "controllers" of the PyTorch models. - They act as an interface between Trainer and the PyTorch models. - This controller provides the following methods to be called by Trainer. - - prepare_input - - mode - - define_optimizer - - data_forward - - grad_backward - - get_loss - """ - - def __init__(self): - """ - Define PyTorch model parameters here. - """ - pass - - def prepare_input(self, data): - """ - Perform data transformation from raw input to vector/matrix inputs. - :param data: raw inputs - :return (X, Y): tuple, input features and labels - """ - raise NotImplementedError - - def mode(self, test=False): - """ - Tell the network to be trained or not, required by PyTorch. - :param test: bool - """ - raise NotImplementedError - - def define_optimizer(self): - """ - Define PyTorch optimizer specified by the models. - """ - raise NotImplementedError - - def data_forward(self, *x): - """ - Forward pass of the data. - :param x: input feature matrix and label vector - :return: output by the models - """ - # required by PyTorch nn - raise NotImplementedError - - def grad_backward(self): - """ - Perform gradient descent to update the models parameters. - """ - raise NotImplementedError - - def get_loss(self, pred, truth): - """ - Compute loss given models prediction and ground truth. Loss function specified by the models. - :param pred: prediction label vector - :param truth: ground truth label vector - :return: a scalar - """ - raise NotImplementedError - - -class ToyController(BaseController): - """This is for code testing.""" - - def __init__(self): - super(ToyController, self).__init__() - self.test_mode = False - self.weight = np.random.rand(5, 1) - self.bias = np.random.rand() - self._loss = 0 - - def prepare_input(self, data): - return data[:, :-1], data[:, -1] - - def mode(self, test=False): - self.test_mode = test - - def data_forward(self, x): - return np.matmul(x, self.weight) + self.bias - - def grad_backward(self): - print("loss gradient backward") - - def get_loss(self, pred, truth): - self._loss = np.mean(np.square(pred - truth)) - return self._loss - - def define_optimizer(self): - pass - class Vocabulary(object): """A look-up table that allows you to access `Lexeme` objects. The `Vocab` From 1426fc3582ed5d99f5471bd1136f9706c17bac19 Mon Sep 17 00:00:00 2001 From: FengZiYjun Date: Sun, 1 Jul 2018 19:50:07 +0800 Subject: [PATCH 02/13] refactor word_seg model & its test --- fastNLP/action/tester.py | 2 +- fastNLP/action/trainer.py | 89 +++++++++++++++++++++++++- fastNLP/models/char_language_model.py | 7 ++- fastNLP/models/word_seg_model.py | 91 +-------------------------- test/test_word_seg.py | 21 +++---- 5 files changed, 104 insertions(+), 106 deletions(-) diff --git a/fastNLP/action/tester.py b/fastNLP/action/tester.py index 0be1b010..7f660bb0 100644 --- a/fastNLP/action/tester.py +++ b/fastNLP/action/tester.py @@ -2,7 +2,7 @@ from collections import namedtuple import numpy as np -from fastNLP.action import Action +from fastNLP.action.action import Action class Tester(Action): diff --git a/fastNLP/action/trainer.py b/fastNLP/action/trainer.py index 79f14df3..0bbcccd7 100644 --- a/fastNLP/action/trainer.py +++ b/fastNLP/action/trainer.py @@ -111,7 +111,7 @@ class BaseTrainer(Action): """ raise NotImplementedError - def data_forward(self, network, *x): + def data_forward(self, network, x): """ Forward pass of the data. :param network: a model @@ -158,7 +158,7 @@ class ToyTrainer(BaseTrainer): def mode(self, test=False): self.model.mode(test) - def data_forward(self, network, *x): + def data_forward(self, network, x): return np.matmul(x, self.weight) + self.bias def grad_backward(self, loss): @@ -175,6 +175,91 @@ class ToyTrainer(BaseTrainer): self._optimizer.step() +class WordSegTrainer(BaseTrainer): + """ + reserve for changes + """ + + def __init__(self, train_args): + super(WordSegTrainer, self).__init__(train_args) + self.id2word = None + self.word2id = None + self.id2tag = None + self.tag2id = None + + self.lstm_batch_size = 8 + self.lstm_seq_len = 32 # Trainer batch_size == lstm_batch_size * lstm_seq_len + self.hidden_dim = 100 + self.lstm_num_layers = 2 + self.vocab_size = 100 + self.word_emb_dim = 100 + + self.hidden = (self.to_var(torch.zeros(2, self.lstm_batch_size, self.word_emb_dim)), + self.to_var(torch.zeros(2, self.lstm_batch_size, self.word_emb_dim))) + + self.optimizer = None + self._loss = None + + self.USE_GPU = False + + def to_var(self, x): + if torch.cuda.is_available() and self.USE_GPU: + x = x.cuda() + return torch.autograd.Variable(x) + + def prepare_input(self, data): + """ + perform word indices lookup to convert strings into indices + :param data: list of string, each string contains word + space + [B, M, E, S] + :return + """ + word_list = [] + tag_list = [] + for line in data: + if len(line) > 2: + tokens = line.split("#") + word_list.append(tokens[0]) + tag_list.append(tokens[2][0]) + self.id2word = list(set(word_list)) + self.word2id = {word: idx for idx, word in enumerate(self.id2word)} + self.id2tag = list(set(tag_list)) + self.tag2id = {tag: idx for idx, tag in enumerate(self.id2tag)} + words = np.array([self.word2id[w] for w in word_list]).reshape(-1, 1) + tags = np.array([self.tag2id[t] for t in tag_list]).reshape(-1, 1) + return words, tags + + def mode(self, test=False): + if test: + self.model.eval() + else: + self.model.train() + + def data_forward(self, network, x): + """ + :param network: a PyTorch model + :param x: sequence of length [batch_size], word indices + :return: + """ + x = x.reshape(self.lstm_batch_size, self.lstm_seq_len) + output, self.hidden = network(x, self.hidden) + return output + + def define_optimizer(self): + self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.85) + + def get_loss(self, predict, truth): + self._loss = torch.nn.CrossEntropyLoss(predict, truth) + return self._loss + + def grad_backward(self, network): + self.model.zero_grad() + self._loss.backward() + torch.nn.utils.clip_grad_norm(self.model.parameters(), 5, norm_type=2) + + def update(self): + self.optimizer.step() + + if __name__ == "__name__": Config = namedtuple("config", ["epochs", "validate", "save_when_better", "log_per_step", "log_validation", "batch_size"]) diff --git a/fastNLP/models/char_language_model.py b/fastNLP/models/char_language_model.py index 9a6997b9..27a83903 100644 --- a/fastNLP/models/char_language_model.py +++ b/fastNLP/models/char_language_model.py @@ -6,11 +6,16 @@ import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim -from model.base_model import BaseModel from torch.autograd import Variable +from fastNLP.models.base_model import BaseModel + USE_GPU = True +""" + To be deprecated. +""" + class CharLM(BaseModel): """ diff --git a/fastNLP/models/word_seg_model.py b/fastNLP/models/word_seg_model.py index 58d7186d..969c7ff7 100644 --- a/fastNLP/models/word_seg_model.py +++ b/fastNLP/models/word_seg_model.py @@ -1,95 +1,6 @@ -import numpy as np -import torch import torch.nn as nn -import torch.optim as optim -from torch.autograd import Variable -from fastNLP.models.base_model import BaseModel, BaseController - -USE_GPU = True - - -def to_var(x): - if torch.cuda.is_available() and USE_GPU: - x = x.cuda() - return Variable(x) - - -class WordSegModel(BaseController): - """ - Model controller for WordSeg - """ - - def __init__(self): - super(WordSegModel, self).__init__() - self.id2word = None - self.word2id = None - self.id2tag = None - self.tag2id = None - - self.lstm_batch_size = 8 - self.lstm_seq_len = 32 # Trainer batch_size == lstm_batch_size * lstm_seq_len - self.hidden_dim = 100 - self.lstm_num_layers = 2 - self.vocab_size = 100 - self.word_emb_dim = 100 - - self.model = WordSeg(self.hidden_dim, self.lstm_num_layers, self.vocab_size, self.word_emb_dim) - self.hidden = (to_var(torch.zeros(2, self.lstm_batch_size, self.word_emb_dim)), - to_var(torch.zeros(2, self.lstm_batch_size, self.word_emb_dim))) - - self.optimizer = None - self._loss = None - - def prepare_input(self, data): - """ - perform word indices lookup to convert strings into indices - :param data: list of string, each string contains word + space + [B, M, E, S] - :return - """ - word_list = [] - tag_list = [] - for line in data: - if len(line) > 2: - tokens = line.split("#") - word_list.append(tokens[0]) - tag_list.append(tokens[2][0]) - self.id2word = list(set(word_list)) - self.word2id = {word: idx for idx, word in enumerate(self.id2word)} - self.id2tag = list(set(tag_list)) - self.tag2id = {tag: idx for idx, tag in enumerate(self.id2tag)} - words = np.array([self.word2id[w] for w in word_list]).reshape(-1, 1) - tags = np.array([self.tag2id[t] for t in tag_list]).reshape(-1, 1) - return words, tags - - def mode(self, test=False): - if test: - self.model.eval() - else: - self.model.train() - - def data_forward(self, x): - """ - :param x: sequence of length [batch_size], word indices - :return: - """ - x = x.reshape(self.lstm_batch_size, self.lstm_seq_len) - output, self.hidden = self.model(x, self.hidden) - return output - - def define_optimizer(self): - self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.85) - - def get_loss(self, pred, truth): - - self._loss = nn.CrossEntropyLoss(pred, truth) - return self._loss - - def grad_backward(self): - self.model.zero_grad() - self._loss.backward() - torch.nn.utils.clip_grad_norm(self.model.parameters(), 5, norm_type=2) - self.optimizer.step() +from fastNLP.models.base_model import BaseModel class WordSeg(BaseModel): diff --git a/test/test_word_seg.py b/test/test_word_seg.py index bf693350..fca75356 100644 --- a/test/test_word_seg.py +++ b/test/test_word_seg.py @@ -1,23 +1,20 @@ -from loader.base_loader import BaseLoader -from model.word_seg_model import WordSegModel +from fastNLP.action.tester import Tester +from fastNLP.action.trainer import WordSegTrainer +from fastNLP.loader.base_loader import BaseLoader +from fastNLP.models.word_seg_model import WordSeg -from fastNLP.action import Tester -from fastNLP.action.trainer import Trainer - -def test_charlm(): - train_config = Trainer.TrainConfig(epochs=5, validate=False, save_when_better=False, +def test_wordseg(): + train_config = WordSegTrainer.TrainConfig(epochs=5, validate=False, save_when_better=False, log_per_step=10, log_validation=False, batch_size=254) - trainer = Trainer(train_config) + trainer = WordSegTrainer(train_config) - model = WordSegModel() + model = WordSeg(100, 2, 1000) train_data = BaseLoader("load_train", "./data_for_tests/cws_train").load_lines() trainer.train(model, train_data) - trainer.save_model(model) - test_config = Tester.TestConfig(save_output=False, validate_in_training=False, save_dev_input=False, save_loss=False, batch_size=254) tester = Tester(test_config) @@ -28,4 +25,4 @@ def test_charlm(): if __name__ == "__main__": - test_charlm() + test_wordseg() From 0e11dd5f242887e6cbff4dd4e12c2a9bc17791fe Mon Sep 17 00:00:00 2001 From: FengZiYjun Date: Sun, 1 Jul 2018 19:53:55 +0800 Subject: [PATCH 03/13] correct spell error --- fastNLP/loader/dataset_loader.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fastNLP/loader/dataset_loader.py b/fastNLP/loader/dataset_loader.py index 0cec50e5..3871626b 100644 --- a/fastNLP/loader/dataset_loader.py +++ b/fastNLP/loader/dataset_loader.py @@ -1,6 +1,7 @@ -from fastNLP.loader.base_loader import BaseLoader import os +from fastNLP.loader.base_loader import BaseLoader + class DatasetLoader(BaseLoader): """"loader for data sets""" @@ -16,7 +17,6 @@ class POSDatasetLoader(DatasetLoader): super(POSDatasetLoader, self).__init__(data_name, data_path) #self.data_set = self.load() - def load(self): assert os.path.exists(self.data_path) with open(self.data_path, "r", encoding="utf-8") as f: @@ -42,11 +42,12 @@ class POSDatasetLoader(DatasetLoader): dataset.append(sentence) return dataset -class ClassficationDatasetLoader(DatasetLoader): + +class ClassificationDatasetLoader(DatasetLoader): """loader for classfication data sets""" def __init__(self, data_name, data_path): - super(ClassficationDatasetLoader, data_name) + super(ClassificationDatasetLoader, data_name).__init__() def load(self): assert os.path.exists(self.data_path) @@ -70,6 +71,7 @@ class ClassficationDatasetLoader(DatasetLoader): dataset.append(sentence) return dataset + class ConllLoader(DatasetLoader): """loader for conll format files""" From b93cf0869122058dece64d732ba8128f1deca460 Mon Sep 17 00:00:00 2001 From: HENRY L Date: Mon, 2 Jul 2018 01:40:17 +0800 Subject: [PATCH 04/13] initial commit --- fastNLP/modules/prototype/Word2Idx.py | 62 +++++++++++++ fastNLP/modules/prototype/aggregation.py | 41 +++++++++ fastNLP/modules/prototype/dataloader.py | 82 +++++++++++++++++ fastNLP/modules/prototype/embedding.py | 23 +++++ fastNLP/modules/prototype/encoder.py | 25 ++++++ fastNLP/modules/prototype/example.py | 108 +++++++++++++++++++++++ fastNLP/modules/prototype/predict.py | 25 ++++++ 7 files changed, 366 insertions(+) create mode 100644 fastNLP/modules/prototype/Word2Idx.py create mode 100644 fastNLP/modules/prototype/aggregation.py create mode 100644 fastNLP/modules/prototype/dataloader.py create mode 100644 fastNLP/modules/prototype/embedding.py create mode 100644 fastNLP/modules/prototype/encoder.py create mode 100644 fastNLP/modules/prototype/example.py create mode 100644 fastNLP/modules/prototype/predict.py diff --git a/fastNLP/modules/prototype/Word2Idx.py b/fastNLP/modules/prototype/Word2Idx.py new file mode 100644 index 00000000..544126be --- /dev/null +++ b/fastNLP/modules/prototype/Word2Idx.py @@ -0,0 +1,62 @@ +import collections +import pickle + +class Word2Idx(): + """ + Build a word index according to word frequency. + If "min_freq" is given, then only words with a frequncy not lesser than min_freq will be kept. + If "max_num" is given, then at most the most frequent $max_num words will be kept. + "words" should be a list [ w_1,w_2,...,w_i,...,w_n ] where each w_i is a string representing a word. + + num is the size of the lookup table. + w2i is a lookup table assigning each word an index. + Note that index 0 will be returned for any unregistered words. + i2w is a vector which serves as an invert mapping of w2i. + Token "" will be returned for index 0 + e.g. i2w[w2i["word"]] == "word" + """ + def __init__(self): + self.__w2i = dict() + self.__i2w = [] + self.num = 0 + + def build(self, words, min_freq=0, max_num=None): + """build a model from words""" + counter = collections.Counter(words) + word_set = set(words) + if max_num is not None: + most_common = counter.most_common(min(len(word_set), max_num - 1)) + else: + most_common = counter.most_common() + self.__w2i = dict((w[0],i + 1) for i,w in enumerate(most_common) if w[1] >= min_freq) + self.__w2i[""] = 0 + self.__i2w = [""] + [ w[0] for w in most_common if w[1] >= min_freq ] + self.num = len(self.__i2w) + + def w2i(self,word): + """word to index""" + if word in self.__w2i: + return self.__w2i[word] + return 0 + + def i2w(self,idx): + """index to word""" + if idx >= self.num: + raise Exception("out of range\n") + return self.__i2w[idx] + + def save(self,addr): + """save the model to a file with address "addr" """ + f = open(addr,"wb") + pickle.dump([self.__i2w, self.__w2i, self.num], f) + f.close() + + def load(self,addr): + """load a model from a file with address "addr" """ + f = open(addr,"rb") + paras = pickle.load(f) + self.__i2w, self.__w2i, self.num = paras[0], paras[1], paras[2] + f.close() + + + diff --git a/fastNLP/modules/prototype/aggregation.py b/fastNLP/modules/prototype/aggregation.py new file mode 100644 index 00000000..e87862b8 --- /dev/null +++ b/fastNLP/modules/prototype/aggregation.py @@ -0,0 +1,41 @@ +import torch +import torch.nn as nn + +class Selfattention(nn.Module): + """ + Self Attention Module. + + Args: + input_size : the size for the input vector + d_a : the width of weight matrix + r : the number of encoded vectors + """ + def __init__(self, input_size, d_a, r): + super(Selfattention, self).__init__() + self.W_s1 = nn.Parameter(torch.randn(d_a, input_size), requires_grad=True) + self.W_s2 = nn.Parameter(torch.randn(r, d_a), requires_grad=True) + self.softmax = nn.Softmax(dim=2) + self.tanh = nn.Tanh() + + def penalization(self, A): + """ + compute the penalization term for attention module + """ + if self.W_s1.is_cuda: + I = Variable(torch.eye(A.size(1)).cuda(), requires_grad=False) + else: + I = Variable(torch.eye(A.size(1)), requires_grad=False) + M = torch.matmul(A, torch.transpose(A, 1, 2)) - I + M = M.view(M.size(0), -1) + return torch.sum(M ** 2, dim=1) + + def forward(self, x): + inter = self.tanh(torch.matmul(self.W_s1, torch.transpose(x, 1, 2))) + A = self.softmax(torch.matmul(self.W_s2, inter)) + out = torch.matmul(A, H) + out = out.view(out.size(0), -1) + penalty = self.penalization(A) + return out, penalty + +if __name__ == "__main__": + model = Selfattention(100, 10, 20) diff --git a/fastNLP/modules/prototype/dataloader.py b/fastNLP/modules/prototype/dataloader.py new file mode 100644 index 00000000..a7eafdc2 --- /dev/null +++ b/fastNLP/modules/prototype/dataloader.py @@ -0,0 +1,82 @@ +import random +import pickle +import torch +import numpy as np +from torch.autograd import Variable + +def float_wrapper(x, requires_grad=True, using_cuda=True): + """ + transform float type list to pytorch variable + """ + if using_cuda==True: + return Variable(torch.FloatTensor(x).cuda(), requires_grad=requires_grad) + else: + return Variable(torch.FloatTensor(x), requires_grad=requires_grad) + +def long_wrapper(x, requires_grad=True, using_cuda=True): + """ + transform long type list to pytorch variable + """ + if using_cuda==True: + return Variable(torch.LongTensor(x).cuda(), requires_grad=requires_grad) + else: + return Variable(torch.LongTensor(x), requires_grad=requires_grad) + +def pad(X, using_cuda): + """ + zero-pad sequnces to same length then pack them together + """ + maxlen = max([x.size(0) for x in X]) + Y = [] + for x in X: + padlen = maxlen - x.size(0) + if padlen > 0: + if using_cuda: + paddings = torch.zeros(padlen).cuda() + else: + paddings = torch.zeros(padlen) + x_ = torch.cat(x, paddings) + Y.append(x_) + else: + Y.append(x) + return torch.stack(Y) + +class DataLoader(object): + """ + load data with form {"feature", "class"} + + Args: + fdir : data file address + batch_size : batch_size + shuffle : if True, shuffle dataset every epoch + using_cuda : if True, return tensors on GPU + """ + def __init__(self, fdir, batch_size, shuffle=True, using_cuda=True): + with open(fdir, "rb") as f: + self.data = pickle.load(f) + self.batch_size = batch_size + self.num = len(self.data) + self.count = 0 + self.iters = int(self.num / batch_size) + self.shuffle = shuffle + self.using_cuda = using_cuda + + def __iter__(self): + return self + + def __next__(self): + if self.count == self.iters: + self.count = 0 + if self.shuffle: + random.shuffle(self.data) + raise StopIteration() + else: + X = self.data[self.count * self.batch_size : (self.count + 1) * self.batch_size] + self.count += 1 + X = [long_wrapper(x["sent"], using_cuda=self.using_cuda) for x in X] + X = pad(X, self.using_cuda) + y = [long_wrapper(x["class"], using_cuda=self.using_cuda) for x in X] + y = torch.stack(y) + return {"feature" : X, "class" : y} + + diff --git a/fastNLP/modules/prototype/embedding.py b/fastNLP/modules/prototype/embedding.py new file mode 100644 index 00000000..1ee88a92 --- /dev/null +++ b/fastNLP/modules/prototype/embedding.py @@ -0,0 +1,23 @@ +import torch +import torch.nn as nn + +class Lookuptable(nn.Module): + """ + A simple lookup table + + Args: + nums : the size of the lookup table + dims : the size of each vector + padding_idx : pads the tensor with zeros whenever it encounters this index + sparse : If True, gradient matrix will be a sparse tensor. In this case, + only optim.SGD(cuda and cpu) and optim.Adagrad(cpu) can be used + """ + def __init__(self, nums, dims, padding_idx=0, sparse=False): + super(Lookuptable, self).__init__() + self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse) + + def forward(self, x): + return self.embed(x) + +if __name__ == "__main__": + model = Lookuptable(10, 20) diff --git a/fastNLP/modules/prototype/encoder.py b/fastNLP/modules/prototype/encoder.py new file mode 100644 index 00000000..249eaf8c --- /dev/null +++ b/fastNLP/modules/prototype/encoder.py @@ -0,0 +1,25 @@ +import torch +import torch.nn as nn + +class Lstm(nn.Module): + """ + LSTM module + + Args: + input_size : input size + hidden_size : hidden size + num_layers : number of hidden layers + dropout : dropout rate + bidirectional : If True, becomes a bidirectional RNN + """ + def __init__(self, input_size, hidden_size, num_layers, dropout, bidirectional): + super(Lstm, self).__init__() + self.lstm = nn.LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=True,\ + dropout=dropout, bidirectional=bidirectional) + + def forward(self, x): + x, _ = self.lstm(x) + return x + +if __name__ == "__main__": + model = Lstm(20, 30, 1, 0.5, False) diff --git a/fastNLP/modules/prototype/example.py b/fastNLP/modules/prototype/example.py new file mode 100644 index 00000000..9dffc59a --- /dev/null +++ b/fastNLP/modules/prototype/example.py @@ -0,0 +1,108 @@ +import torch +import torch.nn as nn +import encoder +import aggregation +import embedding +import predict +import torch.optim as optim +import time +import dataloader + +WORD_SIZE = 100 +HIDDEN_SIZE = 300 +D_A = 350 +R = 20 +MLP_HIDDEN = 2000 +CLASSES_NUM = 5 +WORD_NUM = 357361 + +class Net(nn.Module): + """ + A model for sentiment analysis using lstm and self-attention + """ + def __init__(self): + super(Net, self).__init__() + self.embedding = embedding.Lookuptable(WORD_NUM, WORD_SIZE) + self.encoder = encoder.Lstm(WORD_SIZE, HIDDEN_SIZE, 1, 0.5, True) + self.aggregation = aggregation.Selfattention(2 * HIDDEN_SIZE, D_A, R) + self.predict = predict.MLP(R * HIDDEN_SIZE * 2, MLP_HIDDEN, CLASSES_NUM) + + def forward(self, x): + x = self.embedding(x) + x = self.encoder(x) + x, penalty = self.aggregation(x) + x = self.predict(x) + return r, x + +def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ + momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10): + """ + training procedure + + Args: + If model_dict is given (a file address), it will continue training on the given model. + Otherwise, it would train a new model from scratch. + If using_cuda is true, the training would be conducted on GPU. + Learning_rate and momentum is for SGD optimizer. + coef is the coefficent between the cross-entropy loss and the penalization term. + interval is the frequncy of reporting. + + the result will be saved with a form "model_dict_+current time", which could be used for further training + """ + + if using_cuda == True: + net = Net().cuda() + else: + net = Net() + + if model_dict != None: + net.load_state_dict(torch.load(model_dict)) + + optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum) + criterion = nn.CrossEntropyLoss() + dataset = dataloader.DataLoader("trainset.pkl", using_cuda=using_cuda) + + #statistics + loss_count = 0 + prepare_time = 0 + run_time = 0 + count = 0 + + for epoch in range(epochs): + for i, batch in enumerate(dataset): + t1 = time.time() + X = batch["feature"] + y = batch["class"] + + t2 = time.time() + y_pred, y_penl = net(X) + loss = criterion(y_pred, y) + torch.sum(y_penl) / batch_size * coef + optimizer.zero_grad() + loss.backward() + nn.utils.clip_grad_norm(net.parameters(), 0.5) + optimizer.step() + t3 = time.time() + + loss_count += torch.sum(y_penl).data[0] + prepare_time += (t2 - t1) + run_time += (t3 - t2) + p, idx = torch.max(y_pred, dim=1) + idx = idx.data + count += torch.sum(torch.eq(idx.cpu(), y)) + + if i % interval == 0: + print(i) + print("loss count:" + str(loss_count / batch_size)) + print("acuracy:" + str(count / batch_size)) + print("penalty:" + str(torch.sum(y_penl).data[0] / batch_size)) + print("prepare time:" + str(prepare_time / batch_size)) + print("run time:" + str(run_time / batch_size)) + prepare_time = 0 + run_time = 0 + loss_count = 0 + count = 0 + torch.save(net.state_dict(), "model_dict_%s.pkl"%(str(time.time()))) + +if __name__ == "__main__": + train(using_cuda=torch.cuda.is_available()) + diff --git a/fastNLP/modules/prototype/predict.py b/fastNLP/modules/prototype/predict.py new file mode 100644 index 00000000..c8e72629 --- /dev/null +++ b/fastNLP/modules/prototype/predict.py @@ -0,0 +1,25 @@ +import torch +import torch.nn as nn + +class MLP(nn.Module): + """ + A two layers perceptron for classification. + + Output : Unnormalized possibility distribution + Args: + input_size : the size of input + hidden_size : the size of hidden layer + output_size : the size of output + """ + def __init__(self, input_size, hidden_size, output_size): + super(MLP,self).__init__() + self.L1 = nn.Linear(input_size, hidden_size) + self.L2 = nn.Linear(hidden_size, output_size) + self.softmax = nn.Softmax(dim=1) + + def forward(self, x): + out = self.L2(F.relu(self.L1(x))) + return out + +if __name__ == "__main__": + MLP(20, 30, 20) \ No newline at end of file From 561305e03d51eb9209300fb21a32f7b5c0560ff8 Mon Sep 17 00:00:00 2001 From: HENRY L Date: Mon, 2 Jul 2018 02:06:33 +0800 Subject: [PATCH 05/13] update and add readme --- fastNLP/modules/prototype/README.md | 41 +++++++++++++++++++ fastNLP/modules/prototype/Word2Idx.py | 19 ++++----- fastNLP/modules/prototype/aggregation.py | 5 +-- fastNLP/modules/prototype/dataloader.py | 13 +++--- fastNLP/modules/prototype/encoder.py | 3 -- fastNLP/modules/prototype/example.py | 51 +++++++++++++++++------- fastNLP/modules/prototype/predict.py | 2 +- fastNLP/modules/prototype/prepare.py | 50 +++++++++++++++++++++++ 8 files changed, 146 insertions(+), 38 deletions(-) create mode 100644 fastNLP/modules/prototype/README.md create mode 100644 fastNLP/modules/prototype/prepare.py diff --git a/fastNLP/modules/prototype/README.md b/fastNLP/modules/prototype/README.md new file mode 100644 index 00000000..2dff7caa --- /dev/null +++ b/fastNLP/modules/prototype/README.md @@ -0,0 +1,41 @@ +# Prototype + +## Word2Idx.py +A mapping model between words and indexes + +## embedding.py +embedding modules + +Contains a simple encapsulation for torch.nn.Embedding + +## encoder.py +encoder modules + +Contains a simple encapsulation for torch.nn.LSTM + +## aggregation.py +aggregation modules + +Contains a self-attention model, according to paper "A Structured Self-attentive Sentence Embedding", https://arxiv.org/abs/1703.03130 + +## predict.py +predict modules + +Contains a two layers perceptron for classification + +## example.py +An example showing how to use above modules to build a model + +Contains a model for sentiment analysis on Yelp dataset, and its training and testing procedures. See https://arxiv.org/abs/1703.03130 for more details. + +## prepare.py +A case of using Word2Idx to build Yelp datasets + +## dataloader.py +A dataloader for Yelp dataset + +It is an iterable object, returning a zero-padded batch every iteration. + + + + diff --git a/fastNLP/modules/prototype/Word2Idx.py b/fastNLP/modules/prototype/Word2Idx.py index 544126be..2499aeae 100644 --- a/fastNLP/modules/prototype/Word2Idx.py +++ b/fastNLP/modules/prototype/Word2Idx.py @@ -4,15 +4,15 @@ import pickle class Word2Idx(): """ Build a word index according to word frequency. + If "min_freq" is given, then only words with a frequncy not lesser than min_freq will be kept. If "max_num" is given, then at most the most frequent $max_num words will be kept. "words" should be a list [ w_1,w_2,...,w_i,...,w_n ] where each w_i is a string representing a word. - num is the size of the lookup table. w2i is a lookup table assigning each word an index. - Note that index 0 will be returned for any unregistered words. i2w is a vector which serves as an invert mapping of w2i. - Token "" will be returned for index 0 + Note that index 0 is token "" for padding + index 1 is token "" for unregistered words e.g. i2w[w2i["word"]] == "word" """ def __init__(self): @@ -29,29 +29,30 @@ class Word2Idx(): else: most_common = counter.most_common() self.__w2i = dict((w[0],i + 1) for i,w in enumerate(most_common) if w[1] >= min_freq) - self.__w2i[""] = 0 - self.__i2w = [""] + [ w[0] for w in most_common if w[1] >= min_freq ] + self.__w2i[""] = 0 + self.__w2i[""] = 1 + self.__i2w = ["", ""] + [ w[0] for w in most_common if w[1] >= min_freq ] self.num = len(self.__i2w) - def w2i(self,word): + def w2i(self, word): """word to index""" if word in self.__w2i: return self.__w2i[word] return 0 - def i2w(self,idx): + def i2w(self, idx): """index to word""" if idx >= self.num: raise Exception("out of range\n") return self.__i2w[idx] - def save(self,addr): + def save(self, addr): """save the model to a file with address "addr" """ f = open(addr,"wb") pickle.dump([self.__i2w, self.__w2i, self.num], f) f.close() - def load(self,addr): + def load(self, addr): """load a model from a file with address "addr" """ f = open(addr,"rb") paras = pickle.load(f) diff --git a/fastNLP/modules/prototype/aggregation.py b/fastNLP/modules/prototype/aggregation.py index e87862b8..59e50e99 100644 --- a/fastNLP/modules/prototype/aggregation.py +++ b/fastNLP/modules/prototype/aggregation.py @@ -1,5 +1,6 @@ import torch import torch.nn as nn +from torch.autograd import Variable class Selfattention(nn.Module): """ @@ -32,10 +33,8 @@ class Selfattention(nn.Module): def forward(self, x): inter = self.tanh(torch.matmul(self.W_s1, torch.transpose(x, 1, 2))) A = self.softmax(torch.matmul(self.W_s2, inter)) - out = torch.matmul(A, H) + out = torch.matmul(A, x) out = out.view(out.size(0), -1) penalty = self.penalization(A) return out, penalty -if __name__ == "__main__": - model = Selfattention(100, 10, 20) diff --git a/fastNLP/modules/prototype/dataloader.py b/fastNLP/modules/prototype/dataloader.py index a7eafdc2..af5cd8b8 100644 --- a/fastNLP/modules/prototype/dataloader.py +++ b/fastNLP/modules/prototype/dataloader.py @@ -32,10 +32,10 @@ def pad(X, using_cuda): padlen = maxlen - x.size(0) if padlen > 0: if using_cuda: - paddings = torch.zeros(padlen).cuda() + paddings = Variable(torch.zeros(padlen).long()).cuda() else: - paddings = torch.zeros(padlen) - x_ = torch.cat(x, paddings) + paddings = Variable(torch.zeros(padlen).long()) + x_ = torch.cat((x, paddings), 0) Y.append(x_) else: Y.append(x) @@ -71,12 +71,11 @@ class DataLoader(object): random.shuffle(self.data) raise StopIteration() else: - X = self.data[self.count * self.batch_size : (self.count + 1) * self.batch_size] + batch = self.data[self.count * self.batch_size : (self.count + 1) * self.batch_size] self.count += 1 - X = [long_wrapper(x["sent"], using_cuda=self.using_cuda) for x in X] + X = [long_wrapper(x["sent"], using_cuda=self.using_cuda, requires_grad=False) for x in batch] X = pad(X, self.using_cuda) - y = [long_wrapper(x["class"], using_cuda=self.using_cuda) for x in X] - y = torch.stack(y) + y = long_wrapper([x["class"] for x in batch], using_cuda=self.using_cuda, requires_grad=False) return {"feature" : X, "class" : y} diff --git a/fastNLP/modules/prototype/encoder.py b/fastNLP/modules/prototype/encoder.py index 249eaf8c..142496e1 100644 --- a/fastNLP/modules/prototype/encoder.py +++ b/fastNLP/modules/prototype/encoder.py @@ -20,6 +20,3 @@ class Lstm(nn.Module): def forward(self, x): x, _ = self.lstm(x) return x - -if __name__ == "__main__": - model = Lstm(20, 30, 1, 0.5, False) diff --git a/fastNLP/modules/prototype/example.py b/fastNLP/modules/prototype/example.py index 9dffc59a..782937fe 100644 --- a/fastNLP/modules/prototype/example.py +++ b/fastNLP/modules/prototype/example.py @@ -8,13 +8,13 @@ import torch.optim as optim import time import dataloader +WORD_NUM = 357361 WORD_SIZE = 100 HIDDEN_SIZE = 300 D_A = 350 -R = 20 +R = 10 MLP_HIDDEN = 2000 CLASSES_NUM = 5 -WORD_NUM = 357361 class Net(nn.Module): """ @@ -32,7 +32,7 @@ class Net(nn.Module): x = self.encoder(x) x, penalty = self.aggregation(x) x = self.predict(x) - return r, x + return x, penalty def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10): @@ -50,7 +50,7 @@ def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ the result will be saved with a form "model_dict_+current time", which could be used for further training """ - if using_cuda == True: + if using_cuda: net = Net().cuda() else: net = Net() @@ -60,7 +60,7 @@ def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum) criterion = nn.CrossEntropyLoss() - dataset = dataloader.DataLoader("trainset.pkl", using_cuda=using_cuda) + dataset = dataloader.DataLoader("test_set.pkl", batch_size, using_cuda=using_cuda) #statistics loss_count = 0 @@ -69,6 +69,7 @@ def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ count = 0 for epoch in range(epochs): + print("epoch: %d"%(epoch)) for i, batch in enumerate(dataset): t1 = time.time() X = batch["feature"] @@ -86,23 +87,43 @@ def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ loss_count += torch.sum(y_penl).data[0] prepare_time += (t2 - t1) run_time += (t3 - t2) - p, idx = torch.max(y_pred, dim=1) - idx = idx.data - count += torch.sum(torch.eq(idx.cpu(), y)) + p, idx = torch.max(y_pred.data, dim=1) + count += torch.sum(torch.eq(idx.cpu(), y.data.cpu())) - if i % interval == 0: - print(i) - print("loss count:" + str(loss_count / batch_size)) - print("acuracy:" + str(count / batch_size)) + if (i + 1) % interval == 0: + print("epoch : %d, iters: %d"%(epoch, i + 1)) + print("loss count:" + str(loss_count / (interval * batch_size))) + print("acuracy:" + str(count / (interval * batch_size))) print("penalty:" + str(torch.sum(y_penl).data[0] / batch_size)) - print("prepare time:" + str(prepare_time / batch_size)) - print("run time:" + str(run_time / batch_size)) + print("prepare time:" + str(prepare_time)) + print("run time:" + str(run_time)) prepare_time = 0 run_time = 0 loss_count = 0 count = 0 - torch.save(net.state_dict(), "model_dict_%s.pkl"%(str(time.time()))) + string = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()) + torch.save(net.state_dict(), "model_dict_%s.dict"%(string)) + +def test(model_dict, using_cuda=True): + if using_cuda: + net = Net().cuda() + else: + net = Net() + net.load_state_dict(torch.load(model_dict)) + dataset = dataloader.DataLoader("test_set.pkl", batch_size=1, using_cuda=using_cuda) + count = 0 + for i, batch in enumerate(dataset): + X = batch["feature"] + y = batch["class"] + y_pred, _ = net(X) + p, idx = torch.max(y_pred.data, dim=1) + count += torch.sum(torch.eq(idx.cpu(), y.data.cpu())) + print("accuracy: %f"%(count / dataset.num)) + if __name__ == "__main__": train(using_cuda=torch.cuda.is_available()) + + + diff --git a/fastNLP/modules/prototype/predict.py b/fastNLP/modules/prototype/predict.py index c8e72629..d5346c0e 100644 --- a/fastNLP/modules/prototype/predict.py +++ b/fastNLP/modules/prototype/predict.py @@ -1,5 +1,6 @@ import torch import torch.nn as nn +import torch.nn.functional as F class MLP(nn.Module): """ @@ -15,7 +16,6 @@ class MLP(nn.Module): super(MLP,self).__init__() self.L1 = nn.Linear(input_size, hidden_size) self.L2 = nn.Linear(hidden_size, output_size) - self.softmax = nn.Softmax(dim=1) def forward(self, x): out = self.L2(F.relu(self.L1(x))) diff --git a/fastNLP/modules/prototype/prepare.py b/fastNLP/modules/prototype/prepare.py new file mode 100644 index 00000000..02fd19c5 --- /dev/null +++ b/fastNLP/modules/prototype/prepare.py @@ -0,0 +1,50 @@ +import pickle +import Word2Idx + +def get_sets(m, n): + """ + get a train set containing m samples and a test set containing n samples + """ + samples = pickle.load(open("tuples.pkl","rb")) + if m+n > len(samples): + print("asking for too many tuples\n") + return + train_samples = samples[ : m] + test_samples = samples[m: m+n] + return train_samples, test_samples + +def build_wordidx(): + """ + build wordidx using word2idx + """ + train, test = get_sets(500000, 2000) + words = [] + for x in train: + words += x[0] + wordidx = Word2Idx.Word2Idx() + wordidx.build(words) + print(wordidx.num) + print(wordidx.i2w(0)) + wordidx.save("wordidx.pkl") + +def build_sets(): + """ + build train set and test set, transform word to index + """ + train, test = get_sets(500000, 2000) + wordidx = Word2Idx.Word2Idx() + wordidx.load("wordidx.pkl") + train_set = [] + for x in train: + sent = [wordidx.w2i(w) for w in x[0]] + train_set.append({"sent" : sent, "class" : x[1]}) + test_set = [] + for x in test: + sent = [wordidx.w2i(w) for w in x[0]] + test_set.append({"sent" : sent, "class" : x[1]}) + pickle.dump(train_set, open("train_set.pkl", "wb")) + pickle.dump(test_set, open("test_set.pkl", "wb")) + +if __name__ == "__main__": + build_wordidx() + build_sets() From f585a9aa7df9b73e757dd51526a45bf3380b2ead Mon Sep 17 00:00:00 2001 From: HENRY L Date: Mon, 2 Jul 2018 02:49:55 +0800 Subject: [PATCH 06/13] update --- fastNLP/modules/prototype/example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastNLP/modules/prototype/example.py b/fastNLP/modules/prototype/example.py index 782937fe..a19898c6 100644 --- a/fastNLP/modules/prototype/example.py +++ b/fastNLP/modules/prototype/example.py @@ -60,7 +60,7 @@ def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum) criterion = nn.CrossEntropyLoss() - dataset = dataloader.DataLoader("test_set.pkl", batch_size, using_cuda=using_cuda) + dataset = dataloader.DataLoader("train_set.pkl", batch_size, using_cuda=using_cuda) #statistics loss_count = 0 From b297f93537600760ad6026808fe778cf2667fac6 Mon Sep 17 00:00:00 2001 From: Ke Zhen Date: Mon, 2 Jul 2018 12:03:45 +0800 Subject: [PATCH 07/13] add conv and pooling module --- fastNLP/modules/convolution/AvgPool1d.py | 22 +++++++++++++++++++ fastNLP/modules/convolution/Conv1d.py | 28 ++++++++++++++++++++++++ fastNLP/modules/convolution/MaxPool1d.py | 23 +++++++++++++++++++ 3 files changed, 73 insertions(+) create mode 100644 fastNLP/modules/convolution/AvgPool1d.py create mode 100644 fastNLP/modules/convolution/Conv1d.py create mode 100644 fastNLP/modules/convolution/MaxPool1d.py diff --git a/fastNLP/modules/convolution/AvgPool1d.py b/fastNLP/modules/convolution/AvgPool1d.py new file mode 100644 index 00000000..c427fc9a --- /dev/null +++ b/fastNLP/modules/convolution/AvgPool1d.py @@ -0,0 +1,22 @@ +# python: 3.6 +# encoding: utf-8 + +import torch.nn as nn +# import torch.nn.functional as F + + +class AvgPool1d(nn.Module): + """1-d average pooling module.""" + + def __init__(self, kernel_size, stride=None, padding=0, + ceil_mode=False, count_include_pad=True): + super(AvgPool1d, self).__init__() + self.pool = nn.AvgPool1d( + kernel_size=kernel_size, + stride=stride, + padding=padding, + ceil_mode=ceil_mode, + count_include_pad=count_include_pad) + + def forward(self, x): + return self.pool(x) diff --git a/fastNLP/modules/convolution/Conv1d.py b/fastNLP/modules/convolution/Conv1d.py new file mode 100644 index 00000000..60554a24 --- /dev/null +++ b/fastNLP/modules/convolution/Conv1d.py @@ -0,0 +1,28 @@ +# python: 3.6 +# encoding: utf-8 + +import torch.nn as nn +# import torch.nn.functional as F + + +class Conv1d(nn.Module): + """ + Basic 1-d convolution module. + """ + + def __init__(self, in_channels, out_channels, kernel_size, + stride=1, padding=0, dilation=1, + groups=1, bias=True): + super(Conv1d, self).__init__() + self.conv = nn.Conv1d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + + def forward(self, x): + return self.conv(x) diff --git a/fastNLP/modules/convolution/MaxPool1d.py b/fastNLP/modules/convolution/MaxPool1d.py new file mode 100644 index 00000000..d1f39395 --- /dev/null +++ b/fastNLP/modules/convolution/MaxPool1d.py @@ -0,0 +1,23 @@ +# python: 3.6 +# encoding: utf-8 + +import torch.nn as nn +# import torch.nn.functional as F + + +class MaxPool1d(nn.Module): + """1-d max-pooling module.""" + + def __init__(self, kernel_size, stride=None, padding=0, + dilation=1, return_indices=False, ceil_mode=False): + super(MaxPool1d, self).__init__() + self.maxpool = nn.MaxPool1d( + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + return_indices=return_indices, + ceil_mode=ceil_mode) + + def forward(self, x): + return self.maxpool(x) From 2569c85c8e87c195b89e8fc58bd0088ae09cc12e Mon Sep 17 00:00:00 2001 From: Ke Zhen Date: Mon, 2 Jul 2018 14:02:45 +0800 Subject: [PATCH 08/13] modify conv and pool module --- fastNLP/modules/convolution/AvgPool.py | 24 +++++++++++++++++ fastNLP/modules/convolution/AvgPool1d.py | 22 ---------------- .../convolution/{Conv1d.py => Conv.py} | 6 ++--- fastNLP/modules/convolution/MaxPool.py | 26 +++++++++++++++++++ fastNLP/modules/convolution/MaxPool1d.py | 23 ---------------- 5 files changed, 53 insertions(+), 48 deletions(-) create mode 100644 fastNLP/modules/convolution/AvgPool.py delete mode 100644 fastNLP/modules/convolution/AvgPool1d.py rename fastNLP/modules/convolution/{Conv1d.py => Conv.py} (85%) create mode 100644 fastNLP/modules/convolution/MaxPool.py delete mode 100644 fastNLP/modules/convolution/MaxPool1d.py diff --git a/fastNLP/modules/convolution/AvgPool.py b/fastNLP/modules/convolution/AvgPool.py new file mode 100644 index 00000000..70e473df --- /dev/null +++ b/fastNLP/modules/convolution/AvgPool.py @@ -0,0 +1,24 @@ +# python: 3.6 +# encoding: utf-8 + +import torch.nn as nn +import torch.nn.functional as F + + +class AvgPool(nn.Module): + """1-d average pooling module.""" + + def __init__(self, stride=None, padding=0): + super(AvgPool, self).__init__() + self.stride = stride + self.padding = padding + + def forward(self, x): + # [N,C,L] -> [N,C] + kernel_size = x.size(2) + x = F.max_pool1d( + input=x, + kernel_size=kernel_size, + stride=self.stride, + padding=self.padding) + return x.squeeze(dim=-1) diff --git a/fastNLP/modules/convolution/AvgPool1d.py b/fastNLP/modules/convolution/AvgPool1d.py deleted file mode 100644 index c427fc9a..00000000 --- a/fastNLP/modules/convolution/AvgPool1d.py +++ /dev/null @@ -1,22 +0,0 @@ -# python: 3.6 -# encoding: utf-8 - -import torch.nn as nn -# import torch.nn.functional as F - - -class AvgPool1d(nn.Module): - """1-d average pooling module.""" - - def __init__(self, kernel_size, stride=None, padding=0, - ceil_mode=False, count_include_pad=True): - super(AvgPool1d, self).__init__() - self.pool = nn.AvgPool1d( - kernel_size=kernel_size, - stride=stride, - padding=padding, - ceil_mode=ceil_mode, - count_include_pad=count_include_pad) - - def forward(self, x): - return self.pool(x) diff --git a/fastNLP/modules/convolution/Conv1d.py b/fastNLP/modules/convolution/Conv.py similarity index 85% rename from fastNLP/modules/convolution/Conv1d.py rename to fastNLP/modules/convolution/Conv.py index 60554a24..a3a572d9 100644 --- a/fastNLP/modules/convolution/Conv1d.py +++ b/fastNLP/modules/convolution/Conv.py @@ -5,7 +5,7 @@ import torch.nn as nn # import torch.nn.functional as F -class Conv1d(nn.Module): +class Conv(nn.Module): """ Basic 1-d convolution module. """ @@ -13,7 +13,7 @@ class Conv1d(nn.Module): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): - super(Conv1d, self).__init__() + super(Conv, self).__init__() self.conv = nn.Conv1d( in_channels=in_channels, out_channels=out_channels, @@ -25,4 +25,4 @@ class Conv1d(nn.Module): bias=bias) def forward(self, x): - return self.conv(x) + return self.conv(x) # [N,C,L] diff --git a/fastNLP/modules/convolution/MaxPool.py b/fastNLP/modules/convolution/MaxPool.py new file mode 100644 index 00000000..12bdd96f --- /dev/null +++ b/fastNLP/modules/convolution/MaxPool.py @@ -0,0 +1,26 @@ +# python: 3.6 +# encoding: utf-8 + +import torch.nn as nn +import torch.nn.functional as F + + +class MaxPool(nn.Module): + """1-d max-pooling module.""" + + def __init__(self, stride=None, padding=0, dilation=1): + super(MaxPool, self).__init__() + self.stride = stride + self.padding = padding + self.dilation = dilation + + def forward(self, x): + # [N,C,L] -> [N,C] + kernel_size = x.size(2) + x = F.max_pool1d( + input=x, + kernel_size=kernel_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation) + return x.squeeze(dim=-1) diff --git a/fastNLP/modules/convolution/MaxPool1d.py b/fastNLP/modules/convolution/MaxPool1d.py deleted file mode 100644 index d1f39395..00000000 --- a/fastNLP/modules/convolution/MaxPool1d.py +++ /dev/null @@ -1,23 +0,0 @@ -# python: 3.6 -# encoding: utf-8 - -import torch.nn as nn -# import torch.nn.functional as F - - -class MaxPool1d(nn.Module): - """1-d max-pooling module.""" - - def __init__(self, kernel_size, stride=None, padding=0, - dilation=1, return_indices=False, ceil_mode=False): - super(MaxPool1d, self).__init__() - self.maxpool = nn.MaxPool1d( - kernel_size=kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - return_indices=return_indices, - ceil_mode=ceil_mode) - - def forward(self, x): - return self.maxpool(x) From 7b7826544e3f59241290fe88a4830ad7ea7b6e43 Mon Sep 17 00:00:00 2001 From: Ke Zhen Date: Mon, 2 Jul 2018 15:12:18 +0800 Subject: [PATCH 09/13] add kmax pooling module --- .../convolution/{AvgPool.py => avg_pool.py} | 0 fastNLP/modules/convolution/kmax_pool.py | 20 +++++++++++++++++++ .../convolution/{MaxPool.py => max_pool.py} | 0 3 files changed, 20 insertions(+) rename fastNLP/modules/convolution/{AvgPool.py => avg_pool.py} (100%) create mode 100644 fastNLP/modules/convolution/kmax_pool.py rename fastNLP/modules/convolution/{MaxPool.py => max_pool.py} (100%) diff --git a/fastNLP/modules/convolution/AvgPool.py b/fastNLP/modules/convolution/avg_pool.py similarity index 100% rename from fastNLP/modules/convolution/AvgPool.py rename to fastNLP/modules/convolution/avg_pool.py diff --git a/fastNLP/modules/convolution/kmax_pool.py b/fastNLP/modules/convolution/kmax_pool.py new file mode 100644 index 00000000..17fa9248 --- /dev/null +++ b/fastNLP/modules/convolution/kmax_pool.py @@ -0,0 +1,20 @@ +# python: 3.6 +# encoding: utf-8 + +import torch +import torch.nn as nn +# import torch.nn.functional as F + + +class KMaxPool(nn.Module): + """K max-pooling module.""" + + def __init__(self, k): + super(KMaxPool, self).__init__() + self.k = k + + def forward(self, x): + # [N,C,L] -> [N,C*k] + x, index = torch.topk(x, self.k, dim=-1, sorted=False) + x = torch.reshape(x, (x.size(0), -1)) + return x diff --git a/fastNLP/modules/convolution/MaxPool.py b/fastNLP/modules/convolution/max_pool.py similarity index 100% rename from fastNLP/modules/convolution/MaxPool.py rename to fastNLP/modules/convolution/max_pool.py From d6187274be8759cc8dd74d7dab264e343ff28204 Mon Sep 17 00:00:00 2001 From: Zhen Ke Date: Mon, 2 Jul 2018 15:16:17 +0800 Subject: [PATCH 10/13] Rename Conv.py to conv.py --- fastNLP/modules/convolution/{Conv.py => conv.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename fastNLP/modules/convolution/{Conv.py => conv.py} (100%) diff --git a/fastNLP/modules/convolution/Conv.py b/fastNLP/modules/convolution/conv.py similarity index 100% rename from fastNLP/modules/convolution/Conv.py rename to fastNLP/modules/convolution/conv.py From ceffed6a1615cfbb7fe1520bdd6fd3f0d9670473 Mon Sep 17 00:00:00 2001 From: FengZiYjun Date: Tue, 3 Jul 2018 09:00:29 +0800 Subject: [PATCH 11/13] update trainer: add sampling and padding in batchify, add pkl loading in prepare_input, check model loss in get_loss --- fastNLP/action/action.py | 86 +++++++++++++++++++++++++++------------ fastNLP/action/trainer.py | 71 ++++++++++++++++++++++++++------ 2 files changed, 119 insertions(+), 38 deletions(-) diff --git a/fastNLP/action/action.py b/fastNLP/action/action.py index 5512c7b1..ea12a37e 100644 --- a/fastNLP/action/action.py +++ b/fastNLP/action/action.py @@ -1,3 +1,4 @@ +import numpy as np class Action(object): @@ -8,28 +9,63 @@ class Action(object): def __init__(self): super(Action, self).__init__() - def batchify(self, batch_size, X, Y=None): - """ - :param batch_size: int - :param X: feature matrix of size [n_sample, m_feature] - :param Y: label vector of size [n_sample, 1] (optional) - :return iteration:int, the number of step in each epoch - generator:generator, to generate batch inputs - """ - n_samples = X.shape[0] - num_iter = n_samples // batch_size - if Y is None: - generator = self._batch_generate(batch_size, num_iter, X) - else: - generator = self._batch_generate(batch_size, num_iter, X, Y) - return num_iter, generator - - @staticmethod - def _batch_generate(batch_size, num_iter, *data): - for step in range(num_iter): - start = batch_size * step - end = batch_size * (step + 1) - yield tuple([x[start:end] for x in data]) - - def make_log(self, *args): - return "log" + +class BaseSampler(object): + """ + Base class for all samplers. + """ + + def __init__(self, data_set): + self.data_set_length = len(data_set) + + def __len__(self): + return self.data_set_length + + def __iter__(self): + raise NotImplementedError + + +class SequentialSampler(BaseSampler): + """ + Sample data in the original order. + """ + + def __init__(self, data_set): + super(SequentialSampler, self).__init__(data_set) + + def __iter__(self): + return iter(range(self.data_set_length)) + + +class RandomSampler(BaseSampler): + """ + Sample data in random permutation order. + """ + + def __init__(self, data_set): + super(RandomSampler, self).__init__(data_set) + + def __iter__(self): + return iter(np.random.permutation(self.data_set_length)) + + +class Batchifier(object): + """ + Wrap random or sequential sampler to generate a mini-batch. + """ + + def __init__(self, sampler, batch_size, drop_last=True): + super(Batchifier, self).__init__() + self.sampler = sampler + self.batch_size = batch_size + self.drop_last = drop_last + + def __iter__(self): + batch = [] + for idx in self.sampler: + batch.append(idx) + if len(batch) == self.batch_size: + yield batch + batch = [] + if len(batch) < self.batch_size and self.drop_last is False: + yield batch diff --git a/fastNLP/action/trainer.py b/fastNLP/action/trainer.py index 0bbcccd7..8b9eb717 100644 --- a/fastNLP/action/trainer.py +++ b/fastNLP/action/trainer.py @@ -1,9 +1,11 @@ +import pickle from collections import namedtuple import numpy as np import torch from fastNLP.action.action import Action +from fastNLP.action.action import RandomSampler, Batchifier from fastNLP.action.tester import Tester @@ -31,8 +33,10 @@ class BaseTrainer(Action): self.validate = train_args.validate self.batch_size = train_args.batch_size self.model = None + self.iterator = None + self.loss_func = None - def train(self, network, train_data, dev_data=None): + def train(self, network): """General training loop. :param network: a model :param train_data: raw data for training @@ -50,22 +54,21 @@ class BaseTrainer(Action): Subclasses must implement these methods with a specific framework. """ self.model = network - train_x, train_y = self.prepare_input(train_data) - - iterations, train_batch_generator = self.batchify(self.batch_size, train_x, train_y) + data_train, data_dev, data_test, embedding = self.prepare_input("./save/") test_args = Tester.TestConfig(save_output=True, validate_in_training=True, save_dev_input=True, save_loss=True, batch_size=self.batch_size) evaluator = Tester(test_args) best_loss = 1e10 + iterations = len(data_train) // self.batch_size for epoch in range(self.n_epochs): - self.mode(test=False) # turn on the train mode + self.mode(test=False) self.define_optimizer() for step in range(iterations): - batch_x, batch_y = train_batch_generator.__next__() + batch_x, batch_y = self.batchify(self.batch_size, data_train) prediction = self.data_forward(network, batch_x) @@ -74,21 +77,23 @@ class BaseTrainer(Action): self.update() if self.validate: - if dev_data is None: + if data_dev is None: raise RuntimeError("No validation data provided.") - evaluator.test(network, dev_data) + evaluator.test(network, data_dev) if evaluator.loss < best_loss: best_loss = evaluator.loss # finish training - def prepare_input(self, data): + def prepare_input(self, data_path): """ - Perform data transformation from raw input to vector/matrix inputs. - :param data: raw inputs - :return (X, Y): tuple, input features and labels + To do: Load pkl files of train/dev/test and embedding """ - raise NotImplementedError + data_train = pickle.load(open(data_path + "data_train.pkl", "rb")) + data_dev = pickle.load(open(data_path + "data_dev.pkl", "rb")) + data_test = pickle.load(open(data_path + "data_test.pkl", "rb")) + embedding = pickle.load(open(data_path + "embedding.pkl", "rb")) + return data_train, data_dev, data_test, embedding def mode(self, test=False): """ @@ -138,8 +143,48 @@ class BaseTrainer(Action): :param truth: ground truth label vector :return: a scalar """ + if self.loss_func is None: + if hasattr(self.model, "loss"): + self.loss_func = self.model.loss + else: + self.loss_func = self.define_loss() + return self.loss_func(predict, truth) + + def define_loss(self): raise NotImplementedError + def batchify(self, batch_size, data): + """ + Perform batching from data and produce a batch of training data. + Add padding. + :param batch_size: + :param data: + :param pad: + :return: batch_x, batch_y + """ + if self.iterator is None: + self.iterator = iter(Batchifier(RandomSampler(data), batch_size, drop_last=True)) + indices = next(self.iterator) + batch = [data[idx] for idx in indices] + batch_x = [sample[0] for sample in batch] + batch_y = [sample[1] for sample in batch] + batch_x = self.pad(batch_x) + return batch_x, batch_y + + @staticmethod + def pad(batch, fill=0): + """ + Pad a batch of samples to maximum length. + :param batch: list of list + :param fill: word index to pad, default 0. + :return: a padded batch + """ + max_length = max([len(x) for x in batch]) + for idx, sample in enumerate(batch): + if len(sample) < max_length: + batch[idx] = sample + [fill * (max_length - len(sample))] + return batch + class ToyTrainer(BaseTrainer): """A simple trainer for a PyTorch model.""" From 982503d03329b9942ef2fb143cb6f7e8e176e65a Mon Sep 17 00:00:00 2001 From: FengZiYjun Date: Wed, 4 Jul 2018 22:56:24 +0800 Subject: [PATCH 12/13] optimize code style --- fastNLP/loader/base_preprocess.py | 35 --------------- fastNLP/loader/config_loader.py | 11 +++-- fastNLP/loader/dataset_loader.py | 1 - fastNLP/loader/preprocess.py | 73 +++++++++++++++++++++---------- fastNLP/saver/base_saver.py | 14 ++++++ fastNLP/saver/logger.py | 12 +++++ fastNLP/saver/model_saver.py | 8 ++++ 7 files changed, 88 insertions(+), 66 deletions(-) delete mode 100644 fastNLP/loader/base_preprocess.py create mode 100644 fastNLP/saver/base_saver.py create mode 100644 fastNLP/saver/logger.py create mode 100644 fastNLP/saver/model_saver.py diff --git a/fastNLP/loader/base_preprocess.py b/fastNLP/loader/base_preprocess.py deleted file mode 100644 index 806fbd18..00000000 --- a/fastNLP/loader/base_preprocess.py +++ /dev/null @@ -1,35 +0,0 @@ - - -class BasePreprocess(object): - - - def __init__(self, data, pickle_path): - super(BasePreprocess, self).__init__() - self.data = data - self.pickle_path = pickle_path - if not self.pickle_path.endswith('/'): - self.pickle_path = self.pickle_path + '/' - - def word2id(self): - raise NotImplementedError - - def id2word(self): - raise NotImplementedError - - def class2id(self): - raise NotImplementedError - - def id2class(self): - raise NotImplementedError - - def embedding(self): - raise NotImplementedError - - def data_train(self): - raise NotImplementedError - - def data_dev(self): - raise NotImplementedError - - def data_test(self): - raise NotImplementedError diff --git a/fastNLP/loader/config_loader.py b/fastNLP/loader/config_loader.py index 371de4f1..e57d9891 100644 --- a/fastNLP/loader/config_loader.py +++ b/fastNLP/loader/config_loader.py @@ -1,9 +1,8 @@ -from fastNLP.loader.base_loader import BaseLoader - import configparser -import traceback import json +from fastNLP.loader.base_loader import BaseLoader + class ConfigLoader(BaseLoader): """loader for configuration files""" @@ -17,14 +16,14 @@ class ConfigLoader(BaseLoader): raise NotImplementedError @staticmethod - def loadConfig(filePath, sections): + def load_config(file_path, sections): """ - :param filePath: the path of config file + :param file_path: the path of config file :param sections: the dict of sections :return: """ cfg = configparser.ConfigParser() - cfg.read(filePath) + cfg.read(file_path) for s in sections: attr_list = [i for i in type(sections[s]).__dict__.keys() if not callable(getattr(sections[s], i)) and not i.startswith("__")] diff --git a/fastNLP/loader/dataset_loader.py b/fastNLP/loader/dataset_loader.py index f8bcb276..7132eb3b 100644 --- a/fastNLP/loader/dataset_loader.py +++ b/fastNLP/loader/dataset_loader.py @@ -30,7 +30,6 @@ class POSDatasetLoader(DatasetLoader): return lines - class ClassificationDatasetLoader(DatasetLoader): """loader for classfication data sets""" diff --git a/fastNLP/loader/preprocess.py b/fastNLP/loader/preprocess.py index 8e880107..b8d88c35 100644 --- a/fastNLP/loader/preprocess.py +++ b/fastNLP/loader/preprocess.py @@ -1,25 +1,57 @@ -import pickle import _pickle import os -from fastNLP.loader.base_preprocess import BasePreprocess - -DEFAULT_PADDING_LABEL = '' #dict index = 0 -DEFAULT_UNKNOWN_LABEL = '' #dict index = 1 +DEFAULT_PADDING_LABEL = '' # dict index = 0 +DEFAULT_UNKNOWN_LABEL = '' # dict index = 1 DEFAULT_RESERVED_LABEL = ['', '', - ''] #dict index = 2~4 -#the first vocab in dict with the index = 5 + ''] # dict index = 2~4 + + +# the first vocab in dict with the index = 5 + + +class BasePreprocess(object): + + def __init__(self, data, pickle_path): + super(BasePreprocess, self).__init__() + self.data = data + self.pickle_path = pickle_path + if not self.pickle_path.endswith('/'): + self.pickle_path = self.pickle_path + '/' + + def word2id(self): + raise NotImplementedError + + def id2word(self): + raise NotImplementedError + + def class2id(self): + raise NotImplementedError + + def id2class(self): + raise NotImplementedError + def embedding(self): + raise NotImplementedError + + def data_train(self): + raise NotImplementedError + + def data_dev(self): + raise NotImplementedError + + def data_test(self): + raise NotImplementedError class POSPreprocess(BasePreprocess): """ This class are used to preprocess the pos datasets. - In these datasets, each line are divided by '\t' - while the first Col is the vocabulary and the second - Col is the label. + In these datasets, each line is divided by '\t' + The first Col is the vocabulary. + The second Col is the labels. Different sentence are divided by an empty line. e.g: Tom label1 @@ -36,7 +68,9 @@ class POSPreprocess(BasePreprocess): """ def __init__(self, data, pickle_path): - super(POSPreprocess, self).__init(data, pickle_path) + super(POSPreprocess, self).__init__(data, pickle_path) + self.word_dict = None + self.label_dict = None self.build_dict() self.word2id() self.id2word() @@ -46,8 +80,6 @@ class POSPreprocess(BasePreprocess): self.data_train() self.data_dev() self.data_test() - #... - def build_dict(self): self.word_dict = {DEFAULT_PADDING_LABEL: 0, DEFAULT_UNKNOWN_LABEL: 1, @@ -68,7 +100,6 @@ class POSPreprocess(BasePreprocess): index = len(self.label_dict) self.label_dict[label] = index - def pickle_exist(self, pickle_name): """ :param pickle_name: the filename of target pickle file @@ -82,7 +113,6 @@ class POSPreprocess(BasePreprocess): else: return False - def word2id(self): if self.pickle_exist("word2id.pkl"): return @@ -92,11 +122,10 @@ class POSPreprocess(BasePreprocess): with open(file_name, "wb", encoding='utf-8') as f: _pickle.dump(self.word_dict, f) - def id2word(self): if self.pickle_exist("id2word.pkl"): return - #nothing will be done if id2word.pkl exists + # nothing will be done if id2word.pkl exists id2word_dict = {} for word in self.word_dict: @@ -105,7 +134,6 @@ class POSPreprocess(BasePreprocess): with open(file_name, "wb", encoding='utf-8') as f: _pickle.dump(id2word_dict, f) - def class2id(self): if self.pickle_exist("class2id.pkl"): return @@ -115,11 +143,10 @@ class POSPreprocess(BasePreprocess): with open(file_name, "wb", encoding='utf-8') as f: _pickle.dump(self.label_dict, f) - def id2class(self): if self.pickle_exist("id2class.pkl"): return - #nothing will be done if id2class.pkl exists + # nothing will be done if id2class.pkl exists id2class_dict = {} for label in self.label_dict: @@ -128,17 +155,15 @@ class POSPreprocess(BasePreprocess): with open(file_name, "wb", encoding='utf-8') as f: _pickle.dump(id2class_dict, f) - def embedding(self): if self.pickle_exist("embedding.pkl"): return - #nothing will be done if embedding.pkl exists - + # nothing will be done if embedding.pkl exists def data_train(self): if self.pickle_exist("data_train.pkl"): return - #nothing will be done if data_train.pkl exists + # nothing will be done if data_train.pkl exists data_train = [] sentence = [] diff --git a/fastNLP/saver/base_saver.py b/fastNLP/saver/base_saver.py new file mode 100644 index 00000000..d721da2c --- /dev/null +++ b/fastNLP/saver/base_saver.py @@ -0,0 +1,14 @@ +class BaseSaver(object): + """base class for all savers""" + + def __init__(self, save_path): + self.save_path = save_path + + def save_bytes(self): + raise NotImplementedError + + def save_str(self): + raise NotImplementedError + + def compress(self): + raise NotImplementedError diff --git a/fastNLP/saver/logger.py b/fastNLP/saver/logger.py new file mode 100644 index 00000000..be38de40 --- /dev/null +++ b/fastNLP/saver/logger.py @@ -0,0 +1,12 @@ +from saver.base_saver import BaseSaver + + +class Logger(BaseSaver): + """Logging""" + + def __init__(self, save_path): + super(Logger, self).__init__(save_path) + + def log(self, string): + with open(self.save_path, "a") as f: + f.write(string) diff --git a/fastNLP/saver/model_saver.py b/fastNLP/saver/model_saver.py new file mode 100644 index 00000000..3b3cbeca --- /dev/null +++ b/fastNLP/saver/model_saver.py @@ -0,0 +1,8 @@ +from saver.base_saver import BaseSaver + + +class ModelSaver(BaseSaver): + """Save a models""" + + def __init__(self, save_path): + super(ModelSaver, self).__init__(save_path) From 7ea015c0f96b27bcb6091154adfac4ffae563766 Mon Sep 17 00:00:00 2001 From: FengZiYjun Date: Wed, 4 Jul 2018 23:28:48 +0800 Subject: [PATCH 13/13] update trainer: loading data with _pickle; add arguments comments. --- fastNLP/action/trainer.py | 51 ++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/fastNLP/action/trainer.py b/fastNLP/action/trainer.py index 8b9eb717..437ab7d2 100644 --- a/fastNLP/action/trainer.py +++ b/fastNLP/action/trainer.py @@ -1,4 +1,4 @@ -import pickle +import _pickle from collections import namedtuple import numpy as np @@ -21,8 +21,7 @@ class BaseTrainer(Action): - grad_backward - get_loss """ - TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better", - "log_per_step", "log_validation", "batch_size"]) + TrainConfig = namedtuple("config", ["epochs", "validate", "batch_size", "pickle_path"]) def __init__(self, train_args): """ @@ -32,6 +31,7 @@ class BaseTrainer(Action): self.n_epochs = train_args.epochs self.validate = train_args.validate self.batch_size = train_args.batch_size + self.pickle_path = train_args.pickle_path self.model = None self.iterator = None self.loss_func = None @@ -39,8 +39,6 @@ class BaseTrainer(Action): def train(self, network): """General training loop. :param network: a model - :param train_data: raw data for training - :param dev_data: raw data for validation The method is framework independent. Work by calling the following methods: @@ -54,7 +52,7 @@ class BaseTrainer(Action): Subclasses must implement these methods with a specific framework. """ self.model = network - data_train, data_dev, data_test, embedding = self.prepare_input("./save/") + data_train, data_dev, data_test, embedding = self.prepare_input(self.pickle_path) test_args = Tester.TestConfig(save_output=True, validate_in_training=True, save_dev_input=True, save_loss=True, batch_size=self.batch_size) @@ -89,10 +87,10 @@ class BaseTrainer(Action): """ To do: Load pkl files of train/dev/test and embedding """ - data_train = pickle.load(open(data_path + "data_train.pkl", "rb")) - data_dev = pickle.load(open(data_path + "data_dev.pkl", "rb")) - data_test = pickle.load(open(data_path + "data_test.pkl", "rb")) - embedding = pickle.load(open(data_path + "embedding.pkl", "rb")) + data_train = _pickle.load(open(data_path + "data_train.pkl", "rb")) + data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb")) + data_test = _pickle.load(open(data_path + "data_test.pkl", "rb")) + embedding = _pickle.load(open(data_path + "embedding.pkl", "rb")) return data_train, data_dev, data_test, embedding def mode(self, test=False): @@ -147,20 +145,30 @@ class BaseTrainer(Action): if hasattr(self.model, "loss"): self.loss_func = self.model.loss else: - self.loss_func = self.define_loss() + self.define_loss() return self.loss_func(predict, truth) def define_loss(self): + """ + Assign an instance of loss function to self.loss_func + E.g. self.loss_func = nn.CrossEntropyLoss() + """ raise NotImplementedError def batchify(self, batch_size, data): """ - Perform batching from data and produce a batch of training data. - Add padding. - :param batch_size: - :param data: - :param pad: - :return: batch_x, batch_y + 1. Perform batching from data and produce a batch of training data. + 2. Add padding. + :param batch_size: int, the size of a batch + :param data: list. Each entry is a sample, which is also a list of features and label(s). + E.g. + [ + [[feature_1, feature_2, feature_3], [label_1. label_2]], # sample 1 + [[feature_1, feature_2, feature_3], [label_1. label_2]], # sample 2 + ... + ] + :return batch_x: list. Each entry is a list of features of a sample. + batch_y: list. Each entry is a list of labels of a sample. """ if self.iterator is None: self.iterator = iter(Batchifier(RandomSampler(data), batch_size, drop_last=True)) @@ -306,8 +314,7 @@ class WordSegTrainer(BaseTrainer): if __name__ == "__name__": - Config = namedtuple("config", ["epochs", "validate", "save_when_better", "log_per_step", - "log_validation", "batch_size"]) - train_config = Config(epochs=5, validate=True, save_when_better=True, log_per_step=10, log_validation=True, - batch_size=32) - trainer = ToyTrainer(train_config) + train_args = BaseTrainer.TrainConfig(epochs=1, validate=False, batch_size=3, pickle_path="./") + trainer = BaseTrainer(train_args) + data_train = [[[1, 2, 3, 4], [0]] * 10] + [[[1, 3, 5, 2], [1]] * 10] + trainer.batchify(batch_size=3, data=data_train)