| @@ -1,14 +1,18 @@ | |||||
| import torch | import torch | ||||
| from fastNLP.core.dataset import DataSet | |||||
| from fastNLP.core.instance import Instance | |||||
| from fastNLP.core.predictor import Predictor | |||||
| class API: | class API: | ||||
| def __init__(self): | def __init__(self): | ||||
| self.pipeline = None | self.pipeline = None | ||||
| self.model = None | self.model = None | ||||
| def predict(self): | |||||
| pass | |||||
| def predict(self, *args, **kwargs): | |||||
| raise NotImplementedError | |||||
| def load(self, name): | def load(self, name): | ||||
| _dict = torch.load(name) | _dict = torch.load(name) | ||||
| @@ -19,3 +23,47 @@ class API: | |||||
| _dict = {'pipeline': self.pipeline, | _dict = {'pipeline': self.pipeline, | ||||
| 'model': self.model} | 'model': self.model} | ||||
| torch.save(_dict, path) | torch.save(_dict, path) | ||||
| class POS_tagger(API): | |||||
| """FastNLP API for Part-Of-Speech tagging. | |||||
| """ | |||||
| def __init__(self): | |||||
| super(POS_tagger, self).__init__() | |||||
| def predict(self, query): | |||||
| """ | |||||
| :param query: list of list of str. Each string is a token(word). | |||||
| :return answer: list of list of str. Each string is a tag. | |||||
| """ | |||||
| self.load("/home/zyfeng/fastnlp_0.2.0/reproduction/pos_tag_model/model_pp.pkl") | |||||
| data = DataSet() | |||||
| for example in query: | |||||
| data.append(Instance(words=example)) | |||||
| data = self.pipeline(data) | |||||
| predictor = Predictor() | |||||
| outputs = predictor.predict(self.model, data) | |||||
| answers = [] | |||||
| for out in outputs: | |||||
| out = out.numpy() | |||||
| for sent in out: | |||||
| answers.append([self.tag_vocab.to_word(tag) for tag in sent]) | |||||
| return answers | |||||
| def load(self, name): | |||||
| _dict = torch.load(name) | |||||
| self.pipeline = _dict['pipeline'] | |||||
| self.model = _dict['model'] | |||||
| self.tag_vocab = _dict["tag_vocab"] | |||||
| if __name__ == "__main__": | |||||
| tagger = POS_tagger() | |||||
| print(tagger.predict([["我", "是", "学生", "。"], ["我", "是", "学生", "。"]])) | |||||
| @@ -11,7 +11,7 @@ class Pipeline: | |||||
| self.pipeline = [] | self.pipeline = [] | ||||
| if isinstance(processors, list): | if isinstance(processors, list): | ||||
| for proc in processors: | for proc in processors: | ||||
| assert isinstance(proc, Processor), "Must be a Processor, not {}.".format(type(processor)) | |||||
| assert isinstance(proc, Processor), "Must be a Processor, not {}.".format(type(proc)) | |||||
| self.pipeline = processors | self.pipeline = processors | ||||
| def add_processor(self, processor): | def add_processor(self, processor): | ||||
| @@ -9,7 +9,7 @@ class Batch(object): | |||||
| """ | """ | ||||
| def __init__(self, dataset, batch_size, sampler, use_cuda, sort_in_batch=False, sort_key=None): | |||||
| def __init__(self, dataset, batch_size, sampler, use_cuda): | |||||
| """ | """ | ||||
| :param dataset: a DataSet object | :param dataset: a DataSet object | ||||
| @@ -22,8 +22,6 @@ class Batch(object): | |||||
| self.batch_size = batch_size | self.batch_size = batch_size | ||||
| self.sampler = sampler | self.sampler = sampler | ||||
| self.use_cuda = use_cuda | self.use_cuda = use_cuda | ||||
| self.sort_in_batch = sort_in_batch | |||||
| self.sort_key = sort_key if sort_key is not None else 'word_seq' | |||||
| self.idx_list = None | self.idx_list = None | ||||
| self.curidx = 0 | self.curidx = 0 | ||||
| @@ -119,7 +119,7 @@ class DataSet(object): | |||||
| assert isinstance(val, bool) | assert isinstance(val, bool) | ||||
| self.field_arrays[name].is_target = val | self.field_arrays[name].is_target = val | ||||
| else: | else: | ||||
| raise KeyError | |||||
| raise KeyError("{} is not a valid field name.".format(name)) | |||||
| return self | return self | ||||
| def set_need_tensor(self, **kwargs): | def set_need_tensor(self, **kwargs): | ||||
| @@ -43,12 +43,11 @@ class SeqLabelEvaluator(Evaluator): | |||||
| :return accuracy: | :return accuracy: | ||||
| """ | """ | ||||
| truth = [item["truth"] for item in truth] | truth = [item["truth"] for item in truth] | ||||
| total_correct, total_count= 0., 0. | |||||
| total_correct, total_count = 0., 0. | |||||
| for x, y in zip(predict, truth): | for x, y in zip(predict, truth): | ||||
| x = torch.Tensor(x) | |||||
| x = torch.tensor(x) | |||||
| y = y.to(x) # make sure they are in the same device | y = y.to(x) # make sure they are in the same device | ||||
| mask = x.ge(1).float() | |||||
| # correct = torch.sum(x * mask.float() == (y * mask.long()).float()) | |||||
| mask = x.ge(1).long() | |||||
| correct = torch.sum(x * mask == y * mask) | correct = torch.sum(x * mask == y * mask) | ||||
| correct -= torch.sum(x.le(0)) | correct -= torch.sum(x.le(0)) | ||||
| total_correct += float(correct) | total_correct += float(correct) | ||||
| @@ -74,7 +74,7 @@ class Tester(object): | |||||
| output_list = [] | output_list = [] | ||||
| truth_list = [] | truth_list = [] | ||||
| data_iterator = Batch(dev_data, self.batch_size, sampler=RandomSampler(), use_cuda=self.use_cuda, sort_in_batch=True, sort_key='word_seq') | |||||
| data_iterator = Batch(dev_data, self.batch_size, sampler=RandomSampler(), use_cuda=self.use_cuda) | |||||
| with torch.no_grad(): | with torch.no_grad(): | ||||
| for batch_x, batch_y in data_iterator: | for batch_x, batch_y in data_iterator: | ||||
| @@ -11,12 +11,14 @@ from fastNLP.core.metrics import Evaluator | |||||
| from fastNLP.core.optimizer import Optimizer | from fastNLP.core.optimizer import Optimizer | ||||
| from fastNLP.core.sampler import RandomSampler | from fastNLP.core.sampler import RandomSampler | ||||
| from fastNLP.core.tester import SeqLabelTester, ClassificationTester, SNLITester | from fastNLP.core.tester import SeqLabelTester, ClassificationTester, SNLITester | ||||
| from fastNLP.core.tester import Tester | |||||
| from fastNLP.saver.logger import create_logger | from fastNLP.saver.logger import create_logger | ||||
| from fastNLP.saver.model_saver import ModelSaver | from fastNLP.saver.model_saver import ModelSaver | ||||
| logger = create_logger(__name__, "./train_test.log") | logger = create_logger(__name__, "./train_test.log") | ||||
| logger.disabled = True | logger.disabled = True | ||||
| class Trainer(object): | class Trainer(object): | ||||
| """Operations of training a model, including data loading, gradient descent, and validation. | """Operations of training a model, including data loading, gradient descent, and validation. | ||||
| @@ -138,23 +140,22 @@ class Trainer(object): | |||||
| print("training epochs started " + self.start_time) | print("training epochs started " + self.start_time) | ||||
| logger.info("training epochs started " + self.start_time) | logger.info("training epochs started " + self.start_time) | ||||
| epoch, iters = 1, 0 | epoch, iters = 1, 0 | ||||
| while(1): | |||||
| if self.n_epochs != -1 and epoch > self.n_epochs: | |||||
| break | |||||
| while epoch <= self.n_epochs: | |||||
| logger.info("training epoch {}".format(epoch)) | logger.info("training epoch {}".format(epoch)) | ||||
| # prepare mini-batch iterator | # prepare mini-batch iterator | ||||
| data_iterator = Batch(train_data, batch_size=self.batch_size, sampler=RandomSampler(), | data_iterator = Batch(train_data, batch_size=self.batch_size, sampler=RandomSampler(), | ||||
| use_cuda=self.use_cuda, sort_in_batch=True, sort_key='word_seq') | |||||
| use_cuda=self.use_cuda) | |||||
| logger.info("prepared data iterator") | logger.info("prepared data iterator") | ||||
| # one forward and backward pass | # one forward and backward pass | ||||
| iters = self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch, step=iters, dev_data=dev_data) | |||||
| iters = self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch, | |||||
| step=iters, dev_data=dev_data) | |||||
| # validation | # validation | ||||
| if self.validate: | if self.validate: | ||||
| self.valid_model() | self.valid_model() | ||||
| self.save_model(self._model, 'training_model_'+self.start_time) | |||||
| self.save_model(self._model, 'training_model_' + self.start_time) | |||||
| epoch += 1 | epoch += 1 | ||||
| def _train_step(self, data_iterator, network, **kwargs): | def _train_step(self, data_iterator, network, **kwargs): | ||||
| @@ -171,13 +172,13 @@ class Trainer(object): | |||||
| loss = self.get_loss(prediction, batch_y) | loss = self.get_loss(prediction, batch_y) | ||||
| self.grad_backward(loss) | self.grad_backward(loss) | ||||
| # if torch.rand(1).item() < 0.001: | |||||
| # print('[grads at epoch: {:>3} step: {:>4}]'.format(kwargs['epoch'], step)) | |||||
| # for name, p in self._model.named_parameters(): | |||||
| # if p.requires_grad: | |||||
| # print('\t{} {} {}'.format(name, tuple(p.size()), torch.sum(p.grad).item())) | |||||
| self.update() | self.update() | ||||
| self._summary_writer.add_scalar("loss", loss.item(), global_step=step) | self._summary_writer.add_scalar("loss", loss.item(), global_step=step) | ||||
| for name, param in self._model.named_parameters(): | |||||
| if param.requires_grad: | |||||
| self._summary_writer.add_scalar(name + "_mean", param.mean(), global_step=step) | |||||
| self._summary_writer.add_scalar(name + "_std", param.std(), global_step=step) | |||||
| self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=step) | |||||
| if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0: | if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0: | ||||
| end = time.time() | end = time.time() | ||||
| @@ -193,14 +194,14 @@ class Trainer(object): | |||||
| def valid_model(self): | def valid_model(self): | ||||
| if self.dev_data is None: | if self.dev_data is None: | ||||
| raise RuntimeError( | |||||
| "self.validate is True in trainer, but dev_data is None. Please provide the validation data.") | |||||
| raise RuntimeError( | |||||
| "self.validate is True in trainer, but dev_data is None. Please provide the validation data.") | |||||
| logger.info("validation started") | logger.info("validation started") | ||||
| res = self.validator.test(self._model, self.dev_data) | res = self.validator.test(self._model, self.dev_data) | ||||
| if self.save_best_dev and self.best_eval_result(res): | if self.save_best_dev and self.best_eval_result(res): | ||||
| logger.info('save best result! {}'.format(res)) | logger.info('save best result! {}'.format(res)) | ||||
| print('save best result! {}'.format(res)) | print('save best result! {}'.format(res)) | ||||
| self.save_model(self._model, 'best_model_'+self.start_time) | |||||
| self.save_model(self._model, 'best_model_' + self.start_time) | |||||
| return res | return res | ||||
| def mode(self, model, is_test=False): | def mode(self, model, is_test=False): | ||||
| @@ -230,7 +231,6 @@ class Trainer(object): | |||||
| def update(self): | def update(self): | ||||
| """Perform weight update on a model. | """Perform weight update on a model. | ||||
| For PyTorch, just call optimizer to update. | |||||
| """ | """ | ||||
| self._optimizer.step() | self._optimizer.step() | ||||
| @@ -319,15 +319,17 @@ class Trainer(object): | |||||
| ModelSaver(os.path.join(self.pickle_path, model_name)).save_pytorch(network) | ModelSaver(os.path.join(self.pickle_path, model_name)).save_pytorch(network) | ||||
| def _create_validator(self, valid_args): | def _create_validator(self, valid_args): | ||||
| raise NotImplementedError | |||||
| return Tester(**valid_args) | |||||
| def set_validator(self, validor): | def set_validator(self, validor): | ||||
| self.validator = validor | self.validator = validor | ||||
| class SeqLabelTrainer(Trainer): | class SeqLabelTrainer(Trainer): | ||||
| """Trainer for Sequence Labeling | """Trainer for Sequence Labeling | ||||
| """ | """ | ||||
| def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
| print( | print( | ||||
| "[FastNLP Warning] SeqLabelTrainer will be deprecated. Please use Trainer directly.") | "[FastNLP Warning] SeqLabelTrainer will be deprecated. Please use Trainer directly.") | ||||
| @@ -116,11 +116,11 @@ class AdvSeqLabel(SeqLabeling): | |||||
| num_classes = args["num_classes"] | num_classes = args["num_classes"] | ||||
| self.Embedding = encoder.embedding.Embedding(vocab_size, word_emb_dim, init_emb=emb) | self.Embedding = encoder.embedding.Embedding(vocab_size, word_emb_dim, init_emb=emb) | ||||
| self.Rnn = encoder.lstm.LSTM(word_emb_dim, hidden_dim, num_layers=3, dropout=0.3, bidirectional=True) | |||||
| self.Rnn = encoder.lstm.LSTM(word_emb_dim, hidden_dim, num_layers=3, dropout=0.5, bidirectional=True) | |||||
| self.Linear1 = encoder.Linear(hidden_dim * 2, hidden_dim * 2 // 3) | self.Linear1 = encoder.Linear(hidden_dim * 2, hidden_dim * 2 // 3) | ||||
| self.batch_norm = torch.nn.BatchNorm1d(hidden_dim * 2 // 3) | self.batch_norm = torch.nn.BatchNorm1d(hidden_dim * 2 // 3) | ||||
| self.relu = torch.nn.ReLU() | self.relu = torch.nn.ReLU() | ||||
| self.drop = torch.nn.Dropout(0.3) | |||||
| self.drop = torch.nn.Dropout(0.5) | |||||
| self.Linear2 = encoder.Linear(hidden_dim * 2 // 3, num_classes) | self.Linear2 = encoder.Linear(hidden_dim * 2 // 3, num_classes) | ||||
| self.Crf = decoder.CRF.ConditionalRandomField(num_classes) | self.Crf = decoder.CRF.ConditionalRandomField(num_classes) | ||||
| @@ -135,7 +135,7 @@ class AdvSeqLabel(SeqLabeling): | |||||
| """ | """ | ||||
| word_seq = word_seq.long() | word_seq = word_seq.long() | ||||
| word_seq_origin_len = word_seq_origin_len.long() | word_seq_origin_len = word_seq_origin_len.long() | ||||
| truth = truth.long() | |||||
| truth = truth.long() if truth is not None else None | |||||
| self.mask = self.make_mask(word_seq, word_seq_origin_len) | self.mask = self.make_mask(word_seq, word_seq_origin_len) | ||||
| batch_size = word_seq.size(0) | batch_size = word_seq.size(0) | ||||
| @@ -3,6 +3,7 @@ from torch import nn | |||||
| from fastNLP.modules.utils import initial_parameter | from fastNLP.modules.utils import initial_parameter | ||||
| def log_sum_exp(x, dim=-1): | def log_sum_exp(x, dim=-1): | ||||
| max_value, _ = x.max(dim=dim, keepdim=True) | max_value, _ = x.max(dim=dim, keepdim=True) | ||||
| res = torch.log(torch.sum(torch.exp(x - max_value), dim=dim, keepdim=True)) + max_value | res = torch.log(torch.sum(torch.exp(x - max_value), dim=dim, keepdim=True)) + max_value | ||||
| @@ -91,7 +92,6 @@ class ConditionalRandomField(nn.Module): | |||||
| st_scores = self.start_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[0]] | st_scores = self.start_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[0]] | ||||
| last_idx = mask.long().sum(0) - 1 | last_idx = mask.long().sum(0) - 1 | ||||
| ed_scores = self.end_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[last_idx, batch_idx]] | ed_scores = self.end_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[last_idx, batch_idx]] | ||||
| print(score.size(), st_scores.size(), ed_scores.size()) | |||||
| score += st_scores + ed_scores | score += st_scores + ed_scores | ||||
| # return [B,] | # return [B,] | ||||
| return score | return score | ||||
| @@ -128,7 +128,7 @@ class ConditionalRandomField(nn.Module): | |||||
| vpath = data.new_zeros((seq_len, batch_size, n_tags), dtype=torch.long) | vpath = data.new_zeros((seq_len, batch_size, n_tags), dtype=torch.long) | ||||
| vscore = data[0] | vscore = data[0] | ||||
| if self.include_start_end_trans: | if self.include_start_end_trans: | ||||
| vscore += self.start_scores.view(1. -1) | |||||
| vscore += self.start_scores.view(1, -1) | |||||
| for i in range(1, seq_len): | for i in range(1, seq_len): | ||||
| prev_score = vscore.view(batch_size, n_tags, 1) | prev_score = vscore.view(batch_size, n_tags, 1) | ||||
| cur_score = data[i].view(batch_size, 1, n_tags) | cur_score = data[i].view(batch_size, 1, n_tags) | ||||
| @@ -1,6 +1,6 @@ | |||||
| [train] | [train] | ||||
| epochs = 30 | |||||
| batch_size = 64 | |||||
| epochs = 40 | |||||
| batch_size = 8 | |||||
| pickle_path = "./save/" | pickle_path = "./save/" | ||||
| validate = true | validate = true | ||||
| save_best_dev = true | save_best_dev = true | ||||
| @@ -1,6 +1,6 @@ | |||||
| [train] | [train] | ||||
| epochs = 5 | epochs = 5 | ||||
| batch_size = 2 | |||||
| batch_size = 64 | |||||
| pickle_path = "./save/" | pickle_path = "./save/" | ||||
| validate = false | validate = false | ||||
| save_best_dev = true | save_best_dev = true | ||||
| @@ -1,3 +1,4 @@ | |||||
| import copy | |||||
| import os | import os | ||||
| import torch | import torch | ||||
| @@ -6,15 +7,20 @@ from fastNLP.api.pipeline import Pipeline | |||||
| from fastNLP.api.processor import VocabProcessor, IndexerProcessor, SeqLenProcessor | from fastNLP.api.processor import VocabProcessor, IndexerProcessor, SeqLenProcessor | ||||
| from fastNLP.core.dataset import DataSet | from fastNLP.core.dataset import DataSet | ||||
| from fastNLP.core.instance import Instance | from fastNLP.core.instance import Instance | ||||
| from fastNLP.core.metrics import SeqLabelEvaluator | |||||
| from fastNLP.core.optimizer import Optimizer | |||||
| from fastNLP.core.trainer import Trainer | from fastNLP.core.trainer import Trainer | ||||
| from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | ||||
| from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader | from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader | ||||
| from fastNLP.models.sequence_modeling import AdvSeqLabel | from fastNLP.models.sequence_modeling import AdvSeqLabel | ||||
| cfgfile = './pos_tag.cfg' | cfgfile = './pos_tag.cfg' | ||||
| # datadir = "/home/zyfeng/data/" | |||||
| # data_name = "POS_PD_1998.txt" | |||||
| datadir = "/home/zyfeng/fastnlp_0.2.0/test/data_for_tests/" | datadir = "/home/zyfeng/fastnlp_0.2.0/test/data_for_tests/" | ||||
| data_name = "people_daily_raw.txt" | data_name = "people_daily_raw.txt" | ||||
| pos_tag_data_path = os.path.join(datadir, data_name) | pos_tag_data_path = os.path.join(datadir, data_name) | ||||
| pickle_path = "save" | pickle_path = "save" | ||||
| data_infer_path = os.path.join(datadir, "infer.utf8") | data_infer_path = os.path.join(datadir, "infer.utf8") | ||||
| @@ -53,6 +59,9 @@ def train(): | |||||
| seq_len_proc = SeqLenProcessor("word_seq", "word_seq_origin_len") | seq_len_proc = SeqLenProcessor("word_seq", "word_seq_origin_len") | ||||
| seq_len_proc(dataset) | seq_len_proc(dataset) | ||||
| dev_set = copy.deepcopy(dataset) | |||||
| dev_set.set_is_target(truth=True) | |||||
| print("processors defined") | print("processors defined") | ||||
| # dataset.set_is_target(tag_ids=True) | # dataset.set_is_target(tag_ids=True) | ||||
| model_param["vocab_size"] = len(word_vocab_proc.get_vocab()) | model_param["vocab_size"] = len(word_vocab_proc.get_vocab()) | ||||
| @@ -63,12 +72,17 @@ def train(): | |||||
| model = AdvSeqLabel(model_param) | model = AdvSeqLabel(model_param) | ||||
| # call trainer to train | # call trainer to train | ||||
| trainer = Trainer(**train_param.data) | |||||
| trainer.train(model, dataset) | |||||
| trainer = Trainer(epochs=train_param["epochs"], | |||||
| batch_size=train_param["batch_size"], | |||||
| validate=True, | |||||
| optimizer=Optimizer("SGD", lr=0.01, momentum=0.9), | |||||
| evaluator=SeqLabelEvaluator() | |||||
| ) | |||||
| trainer.train(model, dataset, dev_set) | |||||
| # save model & pipeline | # save model & pipeline | ||||
| pp = Pipeline([word_vocab_proc, word_indexer, seq_len_proc]) | |||||
| save_dict = {"pipeline": pp, "model": model} | |||||
| pp = Pipeline([word_indexer, seq_len_proc]) | |||||
| save_dict = {"pipeline": pp, "model": model, "tag_vocab": tag_vocab_proc.get_vocab()} | |||||
| torch.save(save_dict, "model_pp.pkl") | torch.save(save_dict, "model_pp.pkl") | ||||