@@ -1,14 +1,18 @@ | |||
import torch | |||
from fastNLP.core.dataset import DataSet | |||
from fastNLP.core.instance import Instance | |||
from fastNLP.core.predictor import Predictor | |||
class API: | |||
def __init__(self): | |||
self.pipeline = None | |||
self.model = None | |||
def predict(self): | |||
pass | |||
def predict(self, *args, **kwargs): | |||
raise NotImplementedError | |||
def load(self, name): | |||
_dict = torch.load(name) | |||
@@ -19,3 +23,47 @@ class API: | |||
_dict = {'pipeline': self.pipeline, | |||
'model': self.model} | |||
torch.save(_dict, path) | |||
class POS_tagger(API): | |||
"""FastNLP API for Part-Of-Speech tagging. | |||
""" | |||
def __init__(self): | |||
super(POS_tagger, self).__init__() | |||
def predict(self, query): | |||
""" | |||
:param query: list of list of str. Each string is a token(word). | |||
:return answer: list of list of str. Each string is a tag. | |||
""" | |||
self.load("/home/zyfeng/fastnlp_0.2.0/reproduction/pos_tag_model/model_pp.pkl") | |||
data = DataSet() | |||
for example in query: | |||
data.append(Instance(words=example)) | |||
data = self.pipeline(data) | |||
predictor = Predictor() | |||
outputs = predictor.predict(self.model, data) | |||
answers = [] | |||
for out in outputs: | |||
out = out.numpy() | |||
for sent in out: | |||
answers.append([self.tag_vocab.to_word(tag) for tag in sent]) | |||
return answers | |||
def load(self, name): | |||
_dict = torch.load(name) | |||
self.pipeline = _dict['pipeline'] | |||
self.model = _dict['model'] | |||
self.tag_vocab = _dict["tag_vocab"] | |||
if __name__ == "__main__": | |||
tagger = POS_tagger() | |||
print(tagger.predict([["我", "是", "学生", "。"], ["我", "是", "学生", "。"]])) |
@@ -11,7 +11,7 @@ class Pipeline: | |||
self.pipeline = [] | |||
if isinstance(processors, list): | |||
for proc in processors: | |||
assert isinstance(proc, Processor), "Must be a Processor, not {}.".format(type(processor)) | |||
assert isinstance(proc, Processor), "Must be a Processor, not {}.".format(type(proc)) | |||
self.pipeline = processors | |||
def add_processor(self, processor): | |||
@@ -9,7 +9,7 @@ class Batch(object): | |||
""" | |||
def __init__(self, dataset, batch_size, sampler, use_cuda, sort_in_batch=False, sort_key=None): | |||
def __init__(self, dataset, batch_size, sampler, use_cuda): | |||
""" | |||
:param dataset: a DataSet object | |||
@@ -22,8 +22,6 @@ class Batch(object): | |||
self.batch_size = batch_size | |||
self.sampler = sampler | |||
self.use_cuda = use_cuda | |||
self.sort_in_batch = sort_in_batch | |||
self.sort_key = sort_key if sort_key is not None else 'word_seq' | |||
self.idx_list = None | |||
self.curidx = 0 | |||
@@ -119,7 +119,7 @@ class DataSet(object): | |||
assert isinstance(val, bool) | |||
self.field_arrays[name].is_target = val | |||
else: | |||
raise KeyError | |||
raise KeyError("{} is not a valid field name.".format(name)) | |||
return self | |||
def set_need_tensor(self, **kwargs): | |||
@@ -43,12 +43,11 @@ class SeqLabelEvaluator(Evaluator): | |||
:return accuracy: | |||
""" | |||
truth = [item["truth"] for item in truth] | |||
total_correct, total_count= 0., 0. | |||
total_correct, total_count = 0., 0. | |||
for x, y in zip(predict, truth): | |||
x = torch.Tensor(x) | |||
x = torch.tensor(x) | |||
y = y.to(x) # make sure they are in the same device | |||
mask = x.ge(1).float() | |||
# correct = torch.sum(x * mask.float() == (y * mask.long()).float()) | |||
mask = x.ge(1).long() | |||
correct = torch.sum(x * mask == y * mask) | |||
correct -= torch.sum(x.le(0)) | |||
total_correct += float(correct) | |||
@@ -74,7 +74,7 @@ class Tester(object): | |||
output_list = [] | |||
truth_list = [] | |||
data_iterator = Batch(dev_data, self.batch_size, sampler=RandomSampler(), use_cuda=self.use_cuda, sort_in_batch=True, sort_key='word_seq') | |||
data_iterator = Batch(dev_data, self.batch_size, sampler=RandomSampler(), use_cuda=self.use_cuda) | |||
with torch.no_grad(): | |||
for batch_x, batch_y in data_iterator: | |||
@@ -11,12 +11,14 @@ from fastNLP.core.metrics import Evaluator | |||
from fastNLP.core.optimizer import Optimizer | |||
from fastNLP.core.sampler import RandomSampler | |||
from fastNLP.core.tester import SeqLabelTester, ClassificationTester, SNLITester | |||
from fastNLP.core.tester import Tester | |||
from fastNLP.saver.logger import create_logger | |||
from fastNLP.saver.model_saver import ModelSaver | |||
logger = create_logger(__name__, "./train_test.log") | |||
logger.disabled = True | |||
class Trainer(object): | |||
"""Operations of training a model, including data loading, gradient descent, and validation. | |||
@@ -138,23 +140,22 @@ class Trainer(object): | |||
print("training epochs started " + self.start_time) | |||
logger.info("training epochs started " + self.start_time) | |||
epoch, iters = 1, 0 | |||
while(1): | |||
if self.n_epochs != -1 and epoch > self.n_epochs: | |||
break | |||
while epoch <= self.n_epochs: | |||
logger.info("training epoch {}".format(epoch)) | |||
# prepare mini-batch iterator | |||
data_iterator = Batch(train_data, batch_size=self.batch_size, sampler=RandomSampler(), | |||
use_cuda=self.use_cuda, sort_in_batch=True, sort_key='word_seq') | |||
use_cuda=self.use_cuda) | |||
logger.info("prepared data iterator") | |||
# one forward and backward pass | |||
iters = self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch, step=iters, dev_data=dev_data) | |||
iters = self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch, | |||
step=iters, dev_data=dev_data) | |||
# validation | |||
if self.validate: | |||
self.valid_model() | |||
self.save_model(self._model, 'training_model_'+self.start_time) | |||
self.save_model(self._model, 'training_model_' + self.start_time) | |||
epoch += 1 | |||
def _train_step(self, data_iterator, network, **kwargs): | |||
@@ -171,13 +172,13 @@ class Trainer(object): | |||
loss = self.get_loss(prediction, batch_y) | |||
self.grad_backward(loss) | |||
# if torch.rand(1).item() < 0.001: | |||
# print('[grads at epoch: {:>3} step: {:>4}]'.format(kwargs['epoch'], step)) | |||
# for name, p in self._model.named_parameters(): | |||
# if p.requires_grad: | |||
# print('\t{} {} {}'.format(name, tuple(p.size()), torch.sum(p.grad).item())) | |||
self.update() | |||
self._summary_writer.add_scalar("loss", loss.item(), global_step=step) | |||
for name, param in self._model.named_parameters(): | |||
if param.requires_grad: | |||
self._summary_writer.add_scalar(name + "_mean", param.mean(), global_step=step) | |||
self._summary_writer.add_scalar(name + "_std", param.std(), global_step=step) | |||
self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=step) | |||
if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0: | |||
end = time.time() | |||
@@ -193,14 +194,14 @@ class Trainer(object): | |||
def valid_model(self): | |||
if self.dev_data is None: | |||
raise RuntimeError( | |||
"self.validate is True in trainer, but dev_data is None. Please provide the validation data.") | |||
raise RuntimeError( | |||
"self.validate is True in trainer, but dev_data is None. Please provide the validation data.") | |||
logger.info("validation started") | |||
res = self.validator.test(self._model, self.dev_data) | |||
if self.save_best_dev and self.best_eval_result(res): | |||
logger.info('save best result! {}'.format(res)) | |||
print('save best result! {}'.format(res)) | |||
self.save_model(self._model, 'best_model_'+self.start_time) | |||
self.save_model(self._model, 'best_model_' + self.start_time) | |||
return res | |||
def mode(self, model, is_test=False): | |||
@@ -230,7 +231,6 @@ class Trainer(object): | |||
def update(self): | |||
"""Perform weight update on a model. | |||
For PyTorch, just call optimizer to update. | |||
""" | |||
self._optimizer.step() | |||
@@ -319,15 +319,17 @@ class Trainer(object): | |||
ModelSaver(os.path.join(self.pickle_path, model_name)).save_pytorch(network) | |||
def _create_validator(self, valid_args): | |||
raise NotImplementedError | |||
return Tester(**valid_args) | |||
def set_validator(self, validor): | |||
self.validator = validor | |||
class SeqLabelTrainer(Trainer): | |||
"""Trainer for Sequence Labeling | |||
""" | |||
def __init__(self, **kwargs): | |||
print( | |||
"[FastNLP Warning] SeqLabelTrainer will be deprecated. Please use Trainer directly.") | |||
@@ -116,11 +116,11 @@ class AdvSeqLabel(SeqLabeling): | |||
num_classes = args["num_classes"] | |||
self.Embedding = encoder.embedding.Embedding(vocab_size, word_emb_dim, init_emb=emb) | |||
self.Rnn = encoder.lstm.LSTM(word_emb_dim, hidden_dim, num_layers=3, dropout=0.3, bidirectional=True) | |||
self.Rnn = encoder.lstm.LSTM(word_emb_dim, hidden_dim, num_layers=3, dropout=0.5, bidirectional=True) | |||
self.Linear1 = encoder.Linear(hidden_dim * 2, hidden_dim * 2 // 3) | |||
self.batch_norm = torch.nn.BatchNorm1d(hidden_dim * 2 // 3) | |||
self.relu = torch.nn.ReLU() | |||
self.drop = torch.nn.Dropout(0.3) | |||
self.drop = torch.nn.Dropout(0.5) | |||
self.Linear2 = encoder.Linear(hidden_dim * 2 // 3, num_classes) | |||
self.Crf = decoder.CRF.ConditionalRandomField(num_classes) | |||
@@ -135,7 +135,7 @@ class AdvSeqLabel(SeqLabeling): | |||
""" | |||
word_seq = word_seq.long() | |||
word_seq_origin_len = word_seq_origin_len.long() | |||
truth = truth.long() | |||
truth = truth.long() if truth is not None else None | |||
self.mask = self.make_mask(word_seq, word_seq_origin_len) | |||
batch_size = word_seq.size(0) | |||
@@ -3,6 +3,7 @@ from torch import nn | |||
from fastNLP.modules.utils import initial_parameter | |||
def log_sum_exp(x, dim=-1): | |||
max_value, _ = x.max(dim=dim, keepdim=True) | |||
res = torch.log(torch.sum(torch.exp(x - max_value), dim=dim, keepdim=True)) + max_value | |||
@@ -91,7 +92,6 @@ class ConditionalRandomField(nn.Module): | |||
st_scores = self.start_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[0]] | |||
last_idx = mask.long().sum(0) - 1 | |||
ed_scores = self.end_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[last_idx, batch_idx]] | |||
print(score.size(), st_scores.size(), ed_scores.size()) | |||
score += st_scores + ed_scores | |||
# return [B,] | |||
return score | |||
@@ -128,7 +128,7 @@ class ConditionalRandomField(nn.Module): | |||
vpath = data.new_zeros((seq_len, batch_size, n_tags), dtype=torch.long) | |||
vscore = data[0] | |||
if self.include_start_end_trans: | |||
vscore += self.start_scores.view(1. -1) | |||
vscore += self.start_scores.view(1, -1) | |||
for i in range(1, seq_len): | |||
prev_score = vscore.view(batch_size, n_tags, 1) | |||
cur_score = data[i].view(batch_size, 1, n_tags) | |||
@@ -1,6 +1,6 @@ | |||
[train] | |||
epochs = 30 | |||
batch_size = 64 | |||
epochs = 40 | |||
batch_size = 8 | |||
pickle_path = "./save/" | |||
validate = true | |||
save_best_dev = true | |||
@@ -1,6 +1,6 @@ | |||
[train] | |||
epochs = 5 | |||
batch_size = 2 | |||
batch_size = 64 | |||
pickle_path = "./save/" | |||
validate = false | |||
save_best_dev = true | |||
@@ -1,3 +1,4 @@ | |||
import copy | |||
import os | |||
import torch | |||
@@ -6,15 +7,20 @@ from fastNLP.api.pipeline import Pipeline | |||
from fastNLP.api.processor import VocabProcessor, IndexerProcessor, SeqLenProcessor | |||
from fastNLP.core.dataset import DataSet | |||
from fastNLP.core.instance import Instance | |||
from fastNLP.core.metrics import SeqLabelEvaluator | |||
from fastNLP.core.optimizer import Optimizer | |||
from fastNLP.core.trainer import Trainer | |||
from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | |||
from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader | |||
from fastNLP.models.sequence_modeling import AdvSeqLabel | |||
cfgfile = './pos_tag.cfg' | |||
# datadir = "/home/zyfeng/data/" | |||
# data_name = "POS_PD_1998.txt" | |||
datadir = "/home/zyfeng/fastnlp_0.2.0/test/data_for_tests/" | |||
data_name = "people_daily_raw.txt" | |||
pos_tag_data_path = os.path.join(datadir, data_name) | |||
pickle_path = "save" | |||
data_infer_path = os.path.join(datadir, "infer.utf8") | |||
@@ -53,6 +59,9 @@ def train(): | |||
seq_len_proc = SeqLenProcessor("word_seq", "word_seq_origin_len") | |||
seq_len_proc(dataset) | |||
dev_set = copy.deepcopy(dataset) | |||
dev_set.set_is_target(truth=True) | |||
print("processors defined") | |||
# dataset.set_is_target(tag_ids=True) | |||
model_param["vocab_size"] = len(word_vocab_proc.get_vocab()) | |||
@@ -63,12 +72,17 @@ def train(): | |||
model = AdvSeqLabel(model_param) | |||
# call trainer to train | |||
trainer = Trainer(**train_param.data) | |||
trainer.train(model, dataset) | |||
trainer = Trainer(epochs=train_param["epochs"], | |||
batch_size=train_param["batch_size"], | |||
validate=True, | |||
optimizer=Optimizer("SGD", lr=0.01, momentum=0.9), | |||
evaluator=SeqLabelEvaluator() | |||
) | |||
trainer.train(model, dataset, dev_set) | |||
# save model & pipeline | |||
pp = Pipeline([word_vocab_proc, word_indexer, seq_len_proc]) | |||
save_dict = {"pipeline": pp, "model": model} | |||
pp = Pipeline([word_indexer, seq_len_proc]) | |||
save_dict = {"pipeline": pp, "model": model, "tag_vocab": tag_vocab_proc.get_vocab()} | |||
torch.save(save_dict, "model_pp.pkl") | |||