From ec9fd32d6070330c8b8a6499113ee8d5abf91b21 Mon Sep 17 00:00:00 2001 From: FengZiYjun Date: Sat, 10 Nov 2018 18:49:22 +0800 Subject: [PATCH 1/2] improve trainer: log mean and std of model params, and sum of gradients --- fastNLP/core/trainer.py | 28 +++++++++++---------- fastNLP/modules/decoder/CRF.py | 2 +- reproduction/chinese_word_segment/cws.cfg | 4 +-- reproduction/pos_tag_model/pos_tag.cfg | 4 +-- reproduction/pos_tag_model/train_pos_tag.py | 7 +++++- 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/fastNLP/core/trainer.py b/fastNLP/core/trainer.py index d1881297..a8f0e3c2 100644 --- a/fastNLP/core/trainer.py +++ b/fastNLP/core/trainer.py @@ -17,6 +17,7 @@ from fastNLP.saver.model_saver import ModelSaver logger = create_logger(__name__, "./train_test.log") logger.disabled = True + class Trainer(object): """Operations of training a model, including data loading, gradient descent, and validation. @@ -138,9 +139,7 @@ class Trainer(object): print("training epochs started " + self.start_time) logger.info("training epochs started " + self.start_time) epoch, iters = 1, 0 - while(1): - if self.n_epochs != -1 and epoch > self.n_epochs: - break + while epoch <= self.n_epochs: logger.info("training epoch {}".format(epoch)) # prepare mini-batch iterator @@ -149,12 +148,13 @@ class Trainer(object): logger.info("prepared data iterator") # one forward and backward pass - iters = self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch, step=iters, dev_data=dev_data) + iters = self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch, + step=iters, dev_data=dev_data) # validation if self.validate: self.valid_model() - self.save_model(self._model, 'training_model_'+self.start_time) + self.save_model(self._model, 'training_model_' + self.start_time) epoch += 1 def _train_step(self, data_iterator, network, **kwargs): @@ -171,13 +171,13 @@ class Trainer(object): loss = self.get_loss(prediction, batch_y) self.grad_backward(loss) - # if torch.rand(1).item() < 0.001: - # print('[grads at epoch: {:>3} step: {:>4}]'.format(kwargs['epoch'], step)) - # for name, p in self._model.named_parameters(): - # if p.requires_grad: - # print('\t{} {} {}'.format(name, tuple(p.size()), torch.sum(p.grad).item())) self.update() self._summary_writer.add_scalar("loss", loss.item(), global_step=step) + for name, param in self._model.named_parameters(): + if param.requires_grad: + self._summary_writer.add_scalar(name + "_mean", param.mean(), global_step=step) + self._summary_writer.add_scalar(name + "_std", param.std(), global_step=step) + self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=step) if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0: end = time.time() @@ -193,14 +193,14 @@ class Trainer(object): def valid_model(self): if self.dev_data is None: - raise RuntimeError( - "self.validate is True in trainer, but dev_data is None. Please provide the validation data.") + raise RuntimeError( + "self.validate is True in trainer, but dev_data is None. Please provide the validation data.") logger.info("validation started") res = self.validator.test(self._model, self.dev_data) if self.save_best_dev and self.best_eval_result(res): logger.info('save best result! {}'.format(res)) print('save best result! {}'.format(res)) - self.save_model(self._model, 'best_model_'+self.start_time) + self.save_model(self._model, 'best_model_' + self.start_time) return res def mode(self, model, is_test=False): @@ -324,10 +324,12 @@ class Trainer(object): def set_validator(self, validor): self.validator = validor + class SeqLabelTrainer(Trainer): """Trainer for Sequence Labeling """ + def __init__(self, **kwargs): print( "[FastNLP Warning] SeqLabelTrainer will be deprecated. Please use Trainer directly.") diff --git a/fastNLP/modules/decoder/CRF.py b/fastNLP/modules/decoder/CRF.py index e24f4d27..30279a61 100644 --- a/fastNLP/modules/decoder/CRF.py +++ b/fastNLP/modules/decoder/CRF.py @@ -3,6 +3,7 @@ from torch import nn from fastNLP.modules.utils import initial_parameter + def log_sum_exp(x, dim=-1): max_value, _ = x.max(dim=dim, keepdim=True) res = torch.log(torch.sum(torch.exp(x - max_value), dim=dim, keepdim=True)) + max_value @@ -91,7 +92,6 @@ class ConditionalRandomField(nn.Module): st_scores = self.start_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[0]] last_idx = mask.long().sum(0) - 1 ed_scores = self.end_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[last_idx, batch_idx]] - print(score.size(), st_scores.size(), ed_scores.size()) score += st_scores + ed_scores # return [B,] return score diff --git a/reproduction/chinese_word_segment/cws.cfg b/reproduction/chinese_word_segment/cws.cfg index 033d3967..d2263353 100644 --- a/reproduction/chinese_word_segment/cws.cfg +++ b/reproduction/chinese_word_segment/cws.cfg @@ -1,6 +1,6 @@ [train] -epochs = 30 -batch_size = 64 +epochs = 40 +batch_size = 8 pickle_path = "./save/" validate = true save_best_dev = true diff --git a/reproduction/pos_tag_model/pos_tag.cfg b/reproduction/pos_tag_model/pos_tag.cfg index 2e1f37b6..2a08f6da 100644 --- a/reproduction/pos_tag_model/pos_tag.cfg +++ b/reproduction/pos_tag_model/pos_tag.cfg @@ -1,6 +1,6 @@ [train] -epochs = 5 -batch_size = 2 +epochs = 20 +batch_size = 32 pickle_path = "./save/" validate = false save_best_dev = true diff --git a/reproduction/pos_tag_model/train_pos_tag.py b/reproduction/pos_tag_model/train_pos_tag.py index 027358ef..8936bac8 100644 --- a/reproduction/pos_tag_model/train_pos_tag.py +++ b/reproduction/pos_tag_model/train_pos_tag.py @@ -6,6 +6,7 @@ from fastNLP.api.pipeline import Pipeline from fastNLP.api.processor import VocabProcessor, IndexerProcessor, SeqLenProcessor from fastNLP.core.dataset import DataSet from fastNLP.core.instance import Instance +from fastNLP.core.optimizer import Optimizer from fastNLP.core.trainer import Trainer from fastNLP.loader.config_loader import ConfigLoader, ConfigSection from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader @@ -63,7 +64,11 @@ def train(): model = AdvSeqLabel(model_param) # call trainer to train - trainer = Trainer(**train_param.data) + trainer = Trainer(epochs=train_param["epochs"], + batch_size=train_param["batch_size"], + validate=False, + optimizer=Optimizer("SGD", lr=0.01, momentum=0.9), + ) trainer.train(model, dataset) # save model & pipeline From 5dd0f74d6d67397d9907ecae94abb4109268e35e Mon Sep 17 00:00:00 2001 From: FengZiYjun Date: Sat, 10 Nov 2018 21:20:16 +0800 Subject: [PATCH 2/2] =?UTF-8?q?-=20=E6=B7=BB=E5=8A=A0pos=5Ftagger=20API?= =?UTF-8?q?=EF=BC=8C=20pipeline=E8=B7=91=E9=80=9A=20-=20=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?processor=E7=9A=84bug=20-=20=E6=9B=B4=E6=96=B0core/=E7=9A=84?= =?UTF-8?q?=E8=8B=A5=E5=B9=B2=E7=BB=84=E4=BB=B6,=20=E5=8E=BB=E9=99=A4batch?= =?UTF-8?q?=E7=9A=84=E5=86=97=E4=BD=99=E5=8F=82=E6=95=B0=20-=20CRF?= =?UTF-8?q?=E6=9C=89=E4=B8=AA=E6=89=93=E5=AD=97=E9=94=99=E8=AF=AF=EF=BC=9F?= =?UTF-8?q?=E5=B7=B2=E4=BF=AE=E5=A4=8D=20-=20=E6=9B=B4=E6=96=B0pos=20tag?= =?UTF-8?q?=20=E8=AE=AD=E7=BB=83=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fastNLP/api/api.py | 52 ++++++++++++++++++++- fastNLP/api/pipeline.py | 4 +- fastNLP/core/batch.py | 4 +- fastNLP/core/dataset.py | 2 +- fastNLP/core/metrics.py | 7 ++- fastNLP/core/tester.py | 2 +- fastNLP/core/trainer.py | 6 +-- fastNLP/models/sequence_modeling.py | 6 +-- fastNLP/modules/decoder/CRF.py | 2 +- reproduction/pos_tag_model/pos_tag.cfg | 4 +- reproduction/pos_tag_model/train_pos_tag.py | 17 +++++-- 11 files changed, 80 insertions(+), 26 deletions(-) diff --git a/fastNLP/api/api.py b/fastNLP/api/api.py index 996d0b17..c7d48326 100644 --- a/fastNLP/api/api.py +++ b/fastNLP/api/api.py @@ -1,14 +1,18 @@ import torch +from fastNLP.core.dataset import DataSet +from fastNLP.core.instance import Instance +from fastNLP.core.predictor import Predictor + class API: def __init__(self): self.pipeline = None self.model = None - def predict(self): - pass + def predict(self, *args, **kwargs): + raise NotImplementedError def load(self, name): _dict = torch.load(name) @@ -19,3 +23,47 @@ class API: _dict = {'pipeline': self.pipeline, 'model': self.model} torch.save(_dict, path) + + +class POS_tagger(API): + """FastNLP API for Part-Of-Speech tagging. + + """ + + def __init__(self): + super(POS_tagger, self).__init__() + + def predict(self, query): + """ + + :param query: list of list of str. Each string is a token(word). + :return answer: list of list of str. Each string is a tag. + """ + self.load("/home/zyfeng/fastnlp_0.2.0/reproduction/pos_tag_model/model_pp.pkl") + + data = DataSet() + for example in query: + data.append(Instance(words=example)) + + data = self.pipeline(data) + + predictor = Predictor() + outputs = predictor.predict(self.model, data) + + answers = [] + for out in outputs: + out = out.numpy() + for sent in out: + answers.append([self.tag_vocab.to_word(tag) for tag in sent]) + return answers + + def load(self, name): + _dict = torch.load(name) + self.pipeline = _dict['pipeline'] + self.model = _dict['model'] + self.tag_vocab = _dict["tag_vocab"] + + +if __name__ == "__main__": + tagger = POS_tagger() + print(tagger.predict([["我", "是", "学生", "。"], ["我", "是", "学生", "。"]])) diff --git a/fastNLP/api/pipeline.py b/fastNLP/api/pipeline.py index 1315412a..0c567678 100644 --- a/fastNLP/api/pipeline.py +++ b/fastNLP/api/pipeline.py @@ -11,7 +11,7 @@ class Pipeline: self.pipeline = [] if isinstance(processors, list): for proc in processors: - assert isinstance(proc, Processor), "Must be a Processor, not {}.".format(type(processor)) + assert isinstance(proc, Processor), "Must be a Processor, not {}.".format(type(proc)) self.pipeline = processors def add_processor(self, processor): @@ -21,7 +21,7 @@ class Pipeline: def process(self, dataset): assert len(self.pipeline) != 0, "You need to add some processor first." - for proc_name, proc in self.pipeline: + for proc in self.pipeline: dataset = proc(dataset) return dataset diff --git a/fastNLP/core/batch.py b/fastNLP/core/batch.py index bc19ffb2..29ed4c8a 100644 --- a/fastNLP/core/batch.py +++ b/fastNLP/core/batch.py @@ -9,7 +9,7 @@ class Batch(object): """ - def __init__(self, dataset, batch_size, sampler, use_cuda, sort_in_batch=False, sort_key=None): + def __init__(self, dataset, batch_size, sampler, use_cuda): """ :param dataset: a DataSet object @@ -22,8 +22,6 @@ class Batch(object): self.batch_size = batch_size self.sampler = sampler self.use_cuda = use_cuda - self.sort_in_batch = sort_in_batch - self.sort_key = sort_key if sort_key is not None else 'word_seq' self.idx_list = None self.curidx = 0 diff --git a/fastNLP/core/dataset.py b/fastNLP/core/dataset.py index 4935da96..0b4dfc18 100644 --- a/fastNLP/core/dataset.py +++ b/fastNLP/core/dataset.py @@ -119,7 +119,7 @@ class DataSet(object): assert isinstance(val, bool) self.field_arrays[name].is_target = val else: - raise KeyError + raise KeyError("{} is not a valid field name.".format(name)) return self def set_need_tensor(self, **kwargs): diff --git a/fastNLP/core/metrics.py b/fastNLP/core/metrics.py index d4bf475a..6fe47d72 100644 --- a/fastNLP/core/metrics.py +++ b/fastNLP/core/metrics.py @@ -43,12 +43,11 @@ class SeqLabelEvaluator(Evaluator): :return accuracy: """ truth = [item["truth"] for item in truth] - total_correct, total_count= 0., 0. + total_correct, total_count = 0., 0. for x, y in zip(predict, truth): - x = torch.Tensor(x) + x = torch.tensor(x) y = y.to(x) # make sure they are in the same device - mask = x.ge(1).float() - # correct = torch.sum(x * mask.float() == (y * mask.long()).float()) + mask = x.ge(1).long() correct = torch.sum(x * mask == y * mask) correct -= torch.sum(x.le(0)) total_correct += float(correct) diff --git a/fastNLP/core/tester.py b/fastNLP/core/tester.py index 4c0cfb41..51f84691 100644 --- a/fastNLP/core/tester.py +++ b/fastNLP/core/tester.py @@ -74,7 +74,7 @@ class Tester(object): output_list = [] truth_list = [] - data_iterator = Batch(dev_data, self.batch_size, sampler=RandomSampler(), use_cuda=self.use_cuda, sort_in_batch=True, sort_key='word_seq') + data_iterator = Batch(dev_data, self.batch_size, sampler=RandomSampler(), use_cuda=self.use_cuda) with torch.no_grad(): for batch_x, batch_y in data_iterator: diff --git a/fastNLP/core/trainer.py b/fastNLP/core/trainer.py index a8f0e3c2..e124ad11 100644 --- a/fastNLP/core/trainer.py +++ b/fastNLP/core/trainer.py @@ -11,6 +11,7 @@ from fastNLP.core.metrics import Evaluator from fastNLP.core.optimizer import Optimizer from fastNLP.core.sampler import RandomSampler from fastNLP.core.tester import SeqLabelTester, ClassificationTester, SNLITester +from fastNLP.core.tester import Tester from fastNLP.saver.logger import create_logger from fastNLP.saver.model_saver import ModelSaver @@ -144,7 +145,7 @@ class Trainer(object): # prepare mini-batch iterator data_iterator = Batch(train_data, batch_size=self.batch_size, sampler=RandomSampler(), - use_cuda=self.use_cuda, sort_in_batch=True, sort_key='word_seq') + use_cuda=self.use_cuda) logger.info("prepared data iterator") # one forward and backward pass @@ -230,7 +231,6 @@ class Trainer(object): def update(self): """Perform weight update on a model. - For PyTorch, just call optimizer to update. """ self._optimizer.step() @@ -319,7 +319,7 @@ class Trainer(object): ModelSaver(os.path.join(self.pickle_path, model_name)).save_pytorch(network) def _create_validator(self, valid_args): - raise NotImplementedError + return Tester(**valid_args) def set_validator(self, validor): self.validator = validor diff --git a/fastNLP/models/sequence_modeling.py b/fastNLP/models/sequence_modeling.py index 822c9286..8b2375ae 100644 --- a/fastNLP/models/sequence_modeling.py +++ b/fastNLP/models/sequence_modeling.py @@ -116,11 +116,11 @@ class AdvSeqLabel(SeqLabeling): num_classes = args["num_classes"] self.Embedding = encoder.embedding.Embedding(vocab_size, word_emb_dim, init_emb=emb) - self.Rnn = encoder.lstm.LSTM(word_emb_dim, hidden_dim, num_layers=3, dropout=0.3, bidirectional=True) + self.Rnn = encoder.lstm.LSTM(word_emb_dim, hidden_dim, num_layers=3, dropout=0.5, bidirectional=True) self.Linear1 = encoder.Linear(hidden_dim * 2, hidden_dim * 2 // 3) self.batch_norm = torch.nn.BatchNorm1d(hidden_dim * 2 // 3) self.relu = torch.nn.ReLU() - self.drop = torch.nn.Dropout(0.3) + self.drop = torch.nn.Dropout(0.5) self.Linear2 = encoder.Linear(hidden_dim * 2 // 3, num_classes) self.Crf = decoder.CRF.ConditionalRandomField(num_classes) @@ -135,7 +135,7 @@ class AdvSeqLabel(SeqLabeling): """ word_seq = word_seq.long() word_seq_origin_len = word_seq_origin_len.long() - truth = truth.long() + truth = truth.long() if truth is not None else None self.mask = self.make_mask(word_seq, word_seq_origin_len) batch_size = word_seq.size(0) diff --git a/fastNLP/modules/decoder/CRF.py b/fastNLP/modules/decoder/CRF.py index 30279a61..8532fa46 100644 --- a/fastNLP/modules/decoder/CRF.py +++ b/fastNLP/modules/decoder/CRF.py @@ -128,7 +128,7 @@ class ConditionalRandomField(nn.Module): vpath = data.new_zeros((seq_len, batch_size, n_tags), dtype=torch.long) vscore = data[0] if self.include_start_end_trans: - vscore += self.start_scores.view(1. -1) + vscore += self.start_scores.view(1, -1) for i in range(1, seq_len): prev_score = vscore.view(batch_size, n_tags, 1) cur_score = data[i].view(batch_size, 1, n_tags) diff --git a/reproduction/pos_tag_model/pos_tag.cfg b/reproduction/pos_tag_model/pos_tag.cfg index 2a08f6da..40639d7b 100644 --- a/reproduction/pos_tag_model/pos_tag.cfg +++ b/reproduction/pos_tag_model/pos_tag.cfg @@ -1,6 +1,6 @@ [train] -epochs = 20 -batch_size = 32 +epochs = 5 +batch_size = 64 pickle_path = "./save/" validate = false save_best_dev = true diff --git a/reproduction/pos_tag_model/train_pos_tag.py b/reproduction/pos_tag_model/train_pos_tag.py index 8936bac8..6b8b1d7f 100644 --- a/reproduction/pos_tag_model/train_pos_tag.py +++ b/reproduction/pos_tag_model/train_pos_tag.py @@ -1,3 +1,4 @@ +import copy import os import torch @@ -6,6 +7,7 @@ from fastNLP.api.pipeline import Pipeline from fastNLP.api.processor import VocabProcessor, IndexerProcessor, SeqLenProcessor from fastNLP.core.dataset import DataSet from fastNLP.core.instance import Instance +from fastNLP.core.metrics import SeqLabelEvaluator from fastNLP.core.optimizer import Optimizer from fastNLP.core.trainer import Trainer from fastNLP.loader.config_loader import ConfigLoader, ConfigSection @@ -13,9 +15,12 @@ from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader from fastNLP.models.sequence_modeling import AdvSeqLabel cfgfile = './pos_tag.cfg' +# datadir = "/home/zyfeng/data/" +# data_name = "POS_PD_1998.txt" datadir = "/home/zyfeng/fastnlp_0.2.0/test/data_for_tests/" data_name = "people_daily_raw.txt" + pos_tag_data_path = os.path.join(datadir, data_name) pickle_path = "save" data_infer_path = os.path.join(datadir, "infer.utf8") @@ -54,6 +59,9 @@ def train(): seq_len_proc = SeqLenProcessor("word_seq", "word_seq_origin_len") seq_len_proc(dataset) + dev_set = copy.deepcopy(dataset) + dev_set.set_is_target(truth=True) + print("processors defined") # dataset.set_is_target(tag_ids=True) model_param["vocab_size"] = len(word_vocab_proc.get_vocab()) @@ -66,14 +74,15 @@ def train(): # call trainer to train trainer = Trainer(epochs=train_param["epochs"], batch_size=train_param["batch_size"], - validate=False, + validate=True, optimizer=Optimizer("SGD", lr=0.01, momentum=0.9), + evaluator=SeqLabelEvaluator() ) - trainer.train(model, dataset) + trainer.train(model, dataset, dev_set) # save model & pipeline - pp = Pipeline([word_vocab_proc, word_indexer, seq_len_proc]) - save_dict = {"pipeline": pp, "model": model} + pp = Pipeline([word_indexer, seq_len_proc]) + save_dict = {"pipeline": pp, "model": model, "tag_vocab": tag_vocab_proc.get_vocab()} torch.save(save_dict, "model_pp.pkl")