@@ -1,11 +1,7 @@ | |||||
import torch | import torch | ||||
from fastNLP.core.dataset import DataSet | from fastNLP.core.dataset import DataSet | ||||
from fastNLP.core.instance import Instance | from fastNLP.core.instance import Instance | ||||
from fastNLP.core.predictor import Predictor | |||||
from fastNLP.api.model_zoo import load_url | |||||
model_urls = { | model_urls = { | ||||
'cws': "", | 'cws': "", | ||||
@@ -48,23 +44,13 @@ class POS_tagger(API): | |||||
for example in query: | for example in query: | ||||
data.append(Instance(words=example)) | data.append(Instance(words=example)) | ||||
data = self.pipeline(data) | |||||
predictor = Predictor() | |||||
outputs = predictor.predict(self.model, data) | |||||
out = self.pipeline(data) | |||||
answers = [] | |||||
for out in outputs: | |||||
out = out.numpy() | |||||
for sent in out: | |||||
answers.append([self.tag_vocab.to_word(tag) for tag in sent]) | |||||
return answers | |||||
return [x["outputs"] for x in out] | |||||
def load(self, name): | def load(self, name): | ||||
_dict = torch.load(name) | _dict = torch.load(name) | ||||
self.pipeline = _dict['pipeline'] | self.pipeline = _dict['pipeline'] | ||||
self.model = _dict['model'] | |||||
self.tag_vocab = _dict["tag_vocab"] | |||||
@@ -38,18 +38,18 @@ class SeqLabelEvaluator(Evaluator): | |||||
def __call__(self, predict, truth): | def __call__(self, predict, truth): | ||||
""" | """ | ||||
:param predict: list of List, the network outputs from all batches. | |||||
:param predict: list of dict, the network outputs from all batches. | |||||
:param truth: list of dict, the ground truths from all batch_y. | :param truth: list of dict, the ground truths from all batch_y. | ||||
:return accuracy: | :return accuracy: | ||||
""" | """ | ||||
truth = [item["truth"] for item in truth] | truth = [item["truth"] for item in truth] | ||||
predict = [item["predict"] for item in predict] | |||||
total_correct, total_count = 0., 0. | total_correct, total_count = 0., 0. | ||||
for x, y in zip(predict, truth): | for x, y in zip(predict, truth): | ||||
x = torch.tensor(x) | |||||
# x = torch.tensor(x) | |||||
y = y.to(x) # make sure they are in the same device | y = y.to(x) # make sure they are in the same device | ||||
mask = x.ge(1).long() | mask = x.ge(1).long() | ||||
correct = torch.sum(x * mask == y * mask) | |||||
correct -= torch.sum(x.le(0)) | |||||
correct = torch.sum(x * mask == y * mask) - torch.sum(x.le(0)) | |||||
total_correct += float(correct) | total_correct += float(correct) | ||||
total_count += float(torch.sum(mask)) | total_count += float(torch.sum(mask)) | ||||
accuracy = total_correct / total_count | accuracy = total_correct / total_count | ||||
@@ -9,7 +9,7 @@ from fastNLP.core.batch import Batch | |||||
from fastNLP.core.loss import Loss | from fastNLP.core.loss import Loss | ||||
from fastNLP.core.metrics import Evaluator | from fastNLP.core.metrics import Evaluator | ||||
from fastNLP.core.optimizer import Optimizer | from fastNLP.core.optimizer import Optimizer | ||||
from fastNLP.core.sampler import RandomSampler | |||||
from fastNLP.core.sampler import BucketSampler | |||||
from fastNLP.core.tester import SeqLabelTester, ClassificationTester, SNLITester | from fastNLP.core.tester import SeqLabelTester, ClassificationTester, SNLITester | ||||
from fastNLP.core.tester import Tester | from fastNLP.core.tester import Tester | ||||
from fastNLP.saver.logger import create_logger | from fastNLP.saver.logger import create_logger | ||||
@@ -144,7 +144,8 @@ class Trainer(object): | |||||
logger.info("training epoch {}".format(epoch)) | logger.info("training epoch {}".format(epoch)) | ||||
# prepare mini-batch iterator | # prepare mini-batch iterator | ||||
data_iterator = Batch(train_data, batch_size=self.batch_size, sampler=RandomSampler(), | |||||
data_iterator = Batch(train_data, batch_size=self.batch_size, | |||||
sampler=BucketSampler(10, self.batch_size, "word_seq_origin_len"), | |||||
use_cuda=self.use_cuda) | use_cuda=self.use_cuda) | ||||
logger.info("prepared data iterator") | logger.info("prepared data iterator") | ||||
@@ -170,15 +171,19 @@ class Trainer(object): | |||||
for batch_x, batch_y in data_iterator: | for batch_x, batch_y in data_iterator: | ||||
prediction = self.data_forward(network, batch_x) | prediction = self.data_forward(network, batch_x) | ||||
loss = self.get_loss(prediction, batch_y) | |||||
# TODO: refactor self.get_loss | |||||
loss = prediction["loss"] if "loss" in prediction else self.get_loss(prediction, batch_y) | |||||
# acc = self._evaluator([{"predict": prediction["predict"]}], [{"truth": batch_x["truth"]}]) | |||||
self.grad_backward(loss) | self.grad_backward(loss) | ||||
self.update() | self.update() | ||||
self._summary_writer.add_scalar("loss", loss.item(), global_step=step) | self._summary_writer.add_scalar("loss", loss.item(), global_step=step) | ||||
for name, param in self._model.named_parameters(): | for name, param in self._model.named_parameters(): | ||||
if param.requires_grad: | if param.requires_grad: | ||||
self._summary_writer.add_scalar(name + "_mean", param.mean(), global_step=step) | |||||
self._summary_writer.add_scalar(name + "_std", param.std(), global_step=step) | |||||
self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=step) | |||||
# self._summary_writer.add_scalar(name + "_mean", param.mean(), global_step=step) | |||||
# self._summary_writer.add_scalar(name + "_std", param.std(), global_step=step) | |||||
# self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=step) | |||||
pass | |||||
if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0: | if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0: | ||||
end = time.time() | end = time.time() | ||||
@@ -361,10 +361,11 @@ class PeopleDailyCorpusLoader(DataSetLoader): | |||||
pos_tag_examples = [] | pos_tag_examples = [] | ||||
ner_examples = [] | ner_examples = [] | ||||
for sent in sents: | for sent in sents: | ||||
if len(sent) <= 2: | |||||
continue | |||||
inside_ne = False | inside_ne = False | ||||
sent_pos_tag = [] | sent_pos_tag = [] | ||||
sent_words = [] | sent_words = [] | ||||
sent_word = [] | |||||
sent_ner = [] | sent_ner = [] | ||||
words = sent.strip().split()[1:] | words = sent.strip().split()[1:] | ||||
for word in words: | for word in words: | ||||
@@ -389,23 +390,10 @@ class PeopleDailyCorpusLoader(DataSetLoader): | |||||
ner_tag = "O" | ner_tag = "O" | ||||
tmp = word.split("/") | tmp = word.split("/") | ||||
token, pos = tmp[0], tmp[1] | token, pos = tmp[0], tmp[1] | ||||
pos_tag = [] | |||||
for single_token in token: | |||||
if len(token) == 1: | |||||
single_pos = "S-" + pos | |||||
else: | |||||
single_pos = "M-" + pos | |||||
pos_tag.append(single_pos) | |||||
sent_word.append(single_token) | |||||
if len(token) > 1: | |||||
pos_tag[0] = "B-" + pos | |||||
pos_tag[-1] = "E-" + pos | |||||
sent_pos_tag += pos_tag | |||||
sent_ner.append(ner_tag) | sent_ner.append(ner_tag) | ||||
sent_pos_tag.append(pos) | |||||
sent_words.append(token) | sent_words.append(token) | ||||
pos_tag_examples.append([sent_word, sent_pos_tag]) | |||||
pos_tag_examples.append([sent_words, sent_pos_tag]) | |||||
ner_examples.append([sent_words, sent_ner]) | ner_examples.append([sent_words, sent_ner]) | ||||
# List[List[List[str], List[str]]] | # List[List[List[str], List[str]]] | ||||
return pos_tag_examples, ner_examples | return pos_tag_examples, ner_examples | ||||
@@ -14,5 +14,5 @@ class BaseModel(torch.nn.Module): | |||||
trainer = Trainer(**train_args) | trainer = Trainer(**train_args) | ||||
trainer.train(self, train_data, dev_data) | trainer.train(self, train_data, dev_data) | ||||
def predict(self): | |||||
pass | |||||
def predict(self, *args, **kwargs): | |||||
raise NotImplementedError |
@@ -1,3 +1,4 @@ | |||||
import numpy as np | |||||
import torch | import torch | ||||
from fastNLP.models.base_model import BaseModel | from fastNLP.models.base_model import BaseModel | ||||
@@ -55,10 +56,8 @@ class SeqLabeling(BaseModel): | |||||
# [batch_size, max_len, hidden_size * direction] | # [batch_size, max_len, hidden_size * direction] | ||||
x = self.Linear(x) | x = self.Linear(x) | ||||
# [batch_size, max_len, num_classes] | # [batch_size, max_len, num_classes] | ||||
if truth is not None: | |||||
return self._internal_loss(x, truth) | |||||
else: | |||||
return self.decode(x) | |||||
return {"loss": self._internal_loss(x, truth) if truth is not None else None, | |||||
"predict": self.decode(x)} | |||||
def loss(self, x, y): | def loss(self, x, y): | ||||
""" Since the loss has been computed in forward(), this function simply returns x.""" | """ Since the loss has been computed in forward(), this function simply returns x.""" | ||||
@@ -116,7 +115,7 @@ class AdvSeqLabel(SeqLabeling): | |||||
num_classes = args["num_classes"] | num_classes = args["num_classes"] | ||||
self.Embedding = encoder.embedding.Embedding(vocab_size, word_emb_dim, init_emb=emb) | self.Embedding = encoder.embedding.Embedding(vocab_size, word_emb_dim, init_emb=emb) | ||||
self.Rnn = encoder.lstm.LSTM(word_emb_dim, hidden_dim, num_layers=3, dropout=0.5, bidirectional=True) | |||||
self.Rnn = encoder.lstm.LSTM(word_emb_dim, hidden_dim, num_layers=1, dropout=0.5, bidirectional=True) | |||||
self.Linear1 = encoder.Linear(hidden_dim * 2, hidden_dim * 2 // 3) | self.Linear1 = encoder.Linear(hidden_dim * 2, hidden_dim * 2 // 3) | ||||
self.batch_norm = torch.nn.BatchNorm1d(hidden_dim * 2 // 3) | self.batch_norm = torch.nn.BatchNorm1d(hidden_dim * 2 // 3) | ||||
self.relu = torch.nn.ReLU() | self.relu = torch.nn.ReLU() | ||||
@@ -128,32 +127,56 @@ class AdvSeqLabel(SeqLabeling): | |||||
def forward(self, word_seq, word_seq_origin_len, truth=None): | def forward(self, word_seq, word_seq_origin_len, truth=None): | ||||
""" | """ | ||||
:param word_seq: LongTensor, [batch_size, mex_len] | :param word_seq: LongTensor, [batch_size, mex_len] | ||||
:param word_seq_origin_len: list of int. | |||||
:param word_seq_origin_len: LongTensor, [batch_size, ] | |||||
:param truth: LongTensor, [batch_size, max_len] | :param truth: LongTensor, [batch_size, max_len] | ||||
:return y: If truth is None, return list of [decode path(list)]. Used in testing and predicting. | :return y: If truth is None, return list of [decode path(list)]. Used in testing and predicting. | ||||
If truth is not None, return loss, a scalar. Used in training. | If truth is not None, return loss, a scalar. Used in training. | ||||
""" | """ | ||||
word_seq = word_seq.long() | word_seq = word_seq.long() | ||||
word_seq_origin_len = word_seq_origin_len.long() | |||||
truth = truth.long() if truth is not None else None | |||||
self.mask = self.make_mask(word_seq, word_seq_origin_len) | self.mask = self.make_mask(word_seq, word_seq_origin_len) | ||||
word_seq_origin_len = word_seq_origin_len.cpu().numpy() | |||||
sent_len, idx_sort = np.sort(word_seq_origin_len)[::-1], np.argsort(-word_seq_origin_len) | |||||
idx_unsort = np.argsort(idx_sort) | |||||
idx_sort = torch.from_numpy(idx_sort) | |||||
idx_unsort = torch.from_numpy(idx_unsort) | |||||
# word_seq_origin_len = word_seq_origin_len.long() | |||||
truth = truth.long() if truth is not None else None | |||||
batch_size = word_seq.size(0) | batch_size = word_seq.size(0) | ||||
max_len = word_seq.size(1) | max_len = word_seq.size(1) | ||||
if next(self.parameters()).is_cuda: | |||||
word_seq = word_seq.cuda() | |||||
idx_sort = idx_sort.cuda() | |||||
idx_unsort = idx_unsort.cuda() | |||||
self.mask = self.mask.cuda() | |||||
truth = truth.cuda() if truth is not None else None | |||||
x = self.Embedding(word_seq) | x = self.Embedding(word_seq) | ||||
# [batch_size, max_len, word_emb_dim] | # [batch_size, max_len, word_emb_dim] | ||||
x = self.Rnn(x) | |||||
sent_variable = x.index_select(0, idx_sort) | |||||
sent_packed = torch.nn.utils.rnn.pack_padded_sequence(sent_variable, sent_len, batch_first=True) | |||||
x = self.Rnn(sent_packed) | |||||
# [batch_size, max_len, hidden_size * direction] | # [batch_size, max_len, hidden_size * direction] | ||||
sent_output = torch.nn.utils.rnn.pad_packed_sequence(x, batch_first=True)[0] | |||||
x = sent_output.index_select(0, idx_unsort) | |||||
x = x.contiguous() | x = x.contiguous() | ||||
x = x.view(batch_size * max_len, -1) | x = x.view(batch_size * max_len, -1) | ||||
x = self.Linear1(x) | x = self.Linear1(x) | ||||
x = self.batch_norm(x) | |||||
# x = self.batch_norm(x) | |||||
x = self.relu(x) | x = self.relu(x) | ||||
x = self.drop(x) | x = self.drop(x) | ||||
x = self.Linear2(x) | x = self.Linear2(x) | ||||
x = x.view(batch_size, max_len, -1) | x = x.view(batch_size, max_len, -1) | ||||
# [batch_size, max_len, num_classes] | # [batch_size, max_len, num_classes] | ||||
if truth is not None: | |||||
return self._internal_loss(x, truth) | |||||
else: | |||||
return self.decode(x) | |||||
return {"loss": self._internal_loss(x, truth) if truth is not None else None, | |||||
"predict": self.decode(x)} | |||||
def predict(self, **x): | |||||
out = self.forward(**x) | |||||
return {"predict": out["predict"]} |
@@ -1,6 +1,6 @@ | |||||
[train] | [train] | ||||
epochs = 5 | |||||
batch_size = 64 | |||||
epochs = 300 | |||||
batch_size = 32 | |||||
pickle_path = "./save/" | pickle_path = "./save/" | ||||
validate = false | validate = false | ||||
save_best_dev = true | save_best_dev = true | ||||
@@ -1,11 +1,14 @@ | |||||
import copy | import copy | ||||
import os | import os | ||||
import sys | |||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) | |||||
print(sys.path) | |||||
import torch | import torch | ||||
from fastNLP.api.pipeline import Pipeline | |||||
from fastNLP.api.processor import VocabProcessor, IndexerProcessor, SeqLenProcessor | |||||
from fastNLP.core.dataset import DataSet | from fastNLP.core.dataset import DataSet | ||||
from fastNLP.api.pipeline import Pipeline | |||||
from fastNLP.api.processor import VocabProcessor, IndexerProcessor, SeqLenProcessor, ModelProcessor, Index2WordProcessor | |||||
from fastNLP.core.instance import Instance | from fastNLP.core.instance import Instance | ||||
from fastNLP.core.metrics import SeqLabelEvaluator | from fastNLP.core.metrics import SeqLabelEvaluator | ||||
from fastNLP.core.optimizer import Optimizer | from fastNLP.core.optimizer import Optimizer | ||||
@@ -14,11 +17,12 @@ from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | |||||
from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader | from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader | ||||
from fastNLP.models.sequence_modeling import AdvSeqLabel | from fastNLP.models.sequence_modeling import AdvSeqLabel | ||||
cfgfile = './pos_tag.cfg' | cfgfile = './pos_tag.cfg' | ||||
# datadir = "/home/zyfeng/data/" | |||||
# data_name = "POS_PD_1998.txt" | |||||
datadir = "/home/zyfeng/fastnlp_0.2.0/test/data_for_tests/" | |||||
data_name = "people_daily_raw.txt" | |||||
datadir = "/home/zyfeng/data/" | |||||
data_name = "CWS_POS_TAG_NER_people_daily.txt" | |||||
# datadir = "/home/zyfeng/env/fastnlp_v_2/test/data_for_tests" | |||||
# data_name = "people_daily_raw.txt" | |||||
pos_tag_data_path = os.path.join(datadir, data_name) | pos_tag_data_path = os.path.join(datadir, data_name) | ||||
@@ -58,6 +62,7 @@ def train(): | |||||
tag_indexer(dataset) | tag_indexer(dataset) | ||||
seq_len_proc = SeqLenProcessor("word_seq", "word_seq_origin_len") | seq_len_proc = SeqLenProcessor("word_seq", "word_seq_origin_len") | ||||
seq_len_proc(dataset) | seq_len_proc(dataset) | ||||
#torch.save(dataset, "data_set.pkl") | |||||
dev_set = copy.deepcopy(dataset) | dev_set = copy.deepcopy(dataset) | ||||
dev_set.set_is_target(truth=True) | dev_set.set_is_target(truth=True) | ||||
@@ -75,14 +80,21 @@ def train(): | |||||
trainer = Trainer(epochs=train_param["epochs"], | trainer = Trainer(epochs=train_param["epochs"], | ||||
batch_size=train_param["batch_size"], | batch_size=train_param["batch_size"], | ||||
validate=True, | validate=True, | ||||
optimizer=Optimizer("SGD", lr=0.01, momentum=0.9), | |||||
evaluator=SeqLabelEvaluator() | |||||
optimizer=Optimizer("Adam", lr=0.01, weight_decay=0.9), | |||||
evaluator=SeqLabelEvaluator(), | |||||
use_cuda=True | |||||
) | ) | ||||
trainer.train(model, dataset, dev_set) | trainer.train(model, dataset, dev_set) | ||||
model_proc = ModelProcessor(model, "word_seq_origin_len") | |||||
dataset.set_is_target(truth=True) | |||||
res = model_proc.process(dataset) | |||||
decoder = Index2WordProcessor(tag_vocab_proc.get_vocab(), "predict", "outputs") | |||||
# save model & pipeline | # save model & pipeline | ||||
pp = Pipeline([word_indexer, seq_len_proc]) | |||||
save_dict = {"pipeline": pp, "model": model, "tag_vocab": tag_vocab_proc.get_vocab()} | |||||
pp = Pipeline([word_indexer, seq_len_proc, model_proc, decoder]) | |||||
save_dict = {"pipeline": pp} | |||||
torch.save(save_dict, "model_pp.pkl") | torch.save(save_dict, "model_pp.pkl") | ||||
@@ -1,22 +1,22 @@ | |||||
import os | import os | ||||
from fastNLP.core.vocabulary import Vocabulary | |||||
from fastNLP.loader.dataset_loader import TokenizeDataSetLoader | |||||
from fastNLP.core.metrics import SeqLabelEvaluator | from fastNLP.core.metrics import SeqLabelEvaluator | ||||
from fastNLP.core.optimizer import Optimizer | from fastNLP.core.optimizer import Optimizer | ||||
from fastNLP.core.preprocess import save_pickle | from fastNLP.core.preprocess import save_pickle | ||||
from fastNLP.core.tester import SeqLabelTester | from fastNLP.core.tester import SeqLabelTester | ||||
from fastNLP.core.trainer import SeqLabelTrainer | from fastNLP.core.trainer import SeqLabelTrainer | ||||
from fastNLP.core.vocabulary import Vocabulary | |||||
from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | ||||
from fastNLP.loader.dataset_loader import TokenizeDataSetLoader | |||||
from fastNLP.loader.model_loader import ModelLoader | from fastNLP.loader.model_loader import ModelLoader | ||||
from fastNLP.models.sequence_modeling import SeqLabeling | from fastNLP.models.sequence_modeling import SeqLabeling | ||||
from fastNLP.saver.model_saver import ModelSaver | from fastNLP.saver.model_saver import ModelSaver | ||||
pickle_path = "./seq_label/" | pickle_path = "./seq_label/" | ||||
model_name = "seq_label_model.pkl" | model_name = "seq_label_model.pkl" | ||||
config_dir = "test/data_for_tests/config" | |||||
data_path = "test/data_for_tests/people.txt" | |||||
data_infer_path = "test/data_for_tests/people_infer.txt" | |||||
config_dir = "../data_for_tests/config" | |||||
data_path = "../data_for_tests/people.txt" | |||||
data_infer_path = "../data_for_tests/people_infer.txt" | |||||
def test_training(): | def test_training(): | ||||
@@ -84,3 +84,7 @@ def test_training(): | |||||
# Start testing with validation data | # Start testing with validation data | ||||
data_dev.set_target(truth=True) | data_dev.set_target(truth=True) | ||||
tester.test(model, data_dev) | tester.test(model, data_dev) | ||||
if __name__ == "__main__": | |||||
test_training() |