@@ -17,9 +17,9 @@ class Tester(object): | |||
""" | |||
super(Tester, self).__init__() | |||
""" | |||
"default_args" provides default value for important settings. | |||
The initialization arguments "kwargs" with the same key (name) will override the default value. | |||
"kwargs" must have the same type as "default_args" on corresponding keys. | |||
"default_args" provides default value for important settings. | |||
The initialization arguments "kwargs" with the same key (name) will override the default value. | |||
"kwargs" must have the same type as "default_args" on corresponding keys. | |||
Otherwise, error will raise. | |||
""" | |||
default_args = {"batch_size": 8, | |||
@@ -29,8 +29,8 @@ class Tester(object): | |||
"evaluator": Evaluator() | |||
} | |||
""" | |||
"required_args" is the collection of arguments that users must pass to Trainer explicitly. | |||
This is used to warn users of essential settings in the training. | |||
"required_args" is the collection of arguments that users must pass to Trainer explicitly. | |||
This is used to warn users of essential settings in the training. | |||
Specially, "required_args" does not have default value, so they have nothing to do with "default_args". | |||
""" | |||
required_args = {} | |||
@@ -76,14 +76,17 @@ class Tester(object): | |||
data_iterator = Batch(dev_data, self.batch_size, sampler=RandomSampler(), use_cuda=self.use_cuda) | |||
for batch_x, batch_y in data_iterator: | |||
with torch.no_grad(): | |||
with torch.no_grad(): | |||
for batch_x, batch_y in data_iterator: | |||
prediction = self.data_forward(network, batch_x) | |||
output_list.append(prediction) | |||
truth_list.append(batch_y) | |||
eval_results = self.evaluate(output_list, truth_list) | |||
output_list.append(prediction) | |||
truth_list.append(batch_y) | |||
eval_results = self.evaluate(output_list, truth_list) | |||
print("[tester] {}".format(self.print_eval_results(eval_results))) | |||
logger.info("[tester] {}".format(self.print_eval_results(eval_results))) | |||
self.mode(network, is_test=False) | |||
self.metrics = eval_results | |||
return eval_results | |||
def mode(self, model, is_test=False): | |||
"""Train mode or Test mode. This is for PyTorch currently. | |||
@@ -35,20 +35,21 @@ class Trainer(object): | |||
super(Trainer, self).__init__() | |||
""" | |||
"default_args" provides default value for important settings. | |||
The initialization arguments "kwargs" with the same key (name) will override the default value. | |||
"kwargs" must have the same type as "default_args" on corresponding keys. | |||
"default_args" provides default value for important settings. | |||
The initialization arguments "kwargs" with the same key (name) will override the default value. | |||
"kwargs" must have the same type as "default_args" on corresponding keys. | |||
Otherwise, error will raise. | |||
""" | |||
default_args = {"epochs": 1, "batch_size": 2, "validate": False, "use_cuda": False, "pickle_path": "./save/", | |||
"save_best_dev": False, "model_name": "default_model_name.pkl", "print_every_step": 1, | |||
"valid_step": 500, "eval_sort_key": None, | |||
"loss": Loss(None), # used to pass type check | |||
"optimizer": Optimizer("Adam", lr=0.001, weight_decay=0), | |||
"evaluator": Evaluator() | |||
} | |||
""" | |||
"required_args" is the collection of arguments that users must pass to Trainer explicitly. | |||
This is used to warn users of essential settings in the training. | |||
"required_args" is the collection of arguments that users must pass to Trainer explicitly. | |||
This is used to warn users of essential settings in the training. | |||
Specially, "required_args" does not have default value, so they have nothing to do with "default_args". | |||
""" | |||
required_args = {} | |||
@@ -70,16 +71,20 @@ class Trainer(object): | |||
else: | |||
# Trainer doesn't care about extra arguments | |||
pass | |||
print(default_args) | |||
print("Training Args {}".format(default_args)) | |||
logger.info("Training Args {}".format(default_args)) | |||
self.n_epochs = default_args["epochs"] | |||
self.batch_size = default_args["batch_size"] | |||
self.n_epochs = int(default_args["epochs"]) | |||
self.batch_size = int(default_args["batch_size"]) | |||
self.pickle_path = default_args["pickle_path"] | |||
self.validate = default_args["validate"] | |||
self.save_best_dev = default_args["save_best_dev"] | |||
self.use_cuda = default_args["use_cuda"] | |||
self.model_name = default_args["model_name"] | |||
self.print_every_step = default_args["print_every_step"] | |||
self.print_every_step = int(default_args["print_every_step"]) | |||
self.valid_step = int(default_args["valid_step"]) | |||
if self.validate is not None: | |||
assert self.valid_step > 0 | |||
self._model = None | |||
self._loss_func = default_args["loss"].get() # return a pytorch loss function or None | |||
@@ -89,6 +94,8 @@ class Trainer(object): | |||
self._summary_writer = SummaryWriter(self.pickle_path + 'tensorboard_logs') | |||
self._graph_summaried = False | |||
self._best_accuracy = 0.0 | |||
self.eval_sort_key = default_args['eval_sort_key'] | |||
self.validator = None | |||
def train(self, network, train_data, dev_data=None): | |||
"""General Training Procedure | |||
@@ -108,8 +115,9 @@ class Trainer(object): | |||
if self.validate: | |||
default_valid_args = {"batch_size": self.batch_size, "pickle_path": self.pickle_path, | |||
"use_cuda": self.use_cuda, "evaluator": self._evaluator} | |||
validator = self._create_validator(default_valid_args) | |||
logger.info("validator defined as {}".format(str(validator))) | |||
if self.validator is None: | |||
self.validator = self._create_validator(default_valid_args) | |||
logger.info("validator defined as {}".format(str(self.validator))) | |||
# optimizer and loss | |||
self.define_optimizer() | |||
@@ -117,29 +125,31 @@ class Trainer(object): | |||
self.define_loss() | |||
logger.info("loss function defined as {}".format(str(self._loss_func))) | |||
# turn on network training mode | |||
self.mode(network, is_test=False) | |||
# main training procedure | |||
start = time.time() | |||
logger.info("training epochs started") | |||
for epoch in range(1, self.n_epochs + 1): | |||
self.start_time = str(start) | |||
logger.info("training epochs started " + self.start_time) | |||
epoch, iters = 1, 0 | |||
while(1): | |||
if self.n_epochs != -1 and epoch > self.n_epochs: | |||
break | |||
logger.info("training epoch {}".format(epoch)) | |||
# turn on network training mode | |||
self.mode(network, is_test=False) | |||
# prepare mini-batch iterator | |||
data_iterator = Batch(train_data, batch_size=self.batch_size, sampler=RandomSampler(), | |||
use_cuda=self.use_cuda) | |||
logger.info("prepared data iterator") | |||
# one forward and backward pass | |||
self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch) | |||
iters += self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch, step=iters, dev_data=dev_data) | |||
# validation | |||
if self.validate: | |||
if dev_data is None: | |||
raise RuntimeError( | |||
"self.validate is True in trainer, but dev_data is None. Please provide the validation data.") | |||
logger.info("validation started") | |||
validator.test(network, dev_data) | |||
self.valid_model() | |||
def _train_step(self, data_iterator, network, **kwargs): | |||
"""Training process in one epoch. | |||
@@ -149,7 +159,8 @@ class Trainer(object): | |||
- start: time.time(), the starting time of this step. | |||
- epoch: int, | |||
""" | |||
step = 0 | |||
step = kwargs['step'] | |||
dev_data = kwargs['dev_data'] | |||
for batch_x, batch_y in data_iterator: | |||
prediction = self.data_forward(network, batch_x) | |||
@@ -166,7 +177,21 @@ class Trainer(object): | |||
kwargs["epoch"], step, loss.data, diff) | |||
print(print_output) | |||
logger.info(print_output) | |||
if self.validate and self.valid_step > 0 and step > 0 and step % self.valid_step == 0: | |||
self.valid_model() | |||
step += 1 | |||
return step | |||
def valid_model(self): | |||
if dev_data is None: | |||
raise RuntimeError( | |||
"self.validate is True in trainer, but dev_data is None. Please provide the validation data.") | |||
logger.info("validation started") | |||
res = self.validator.test(network, dev_data) | |||
if self.save_best_dev and self.best_eval_result(res): | |||
logger.info('save best result! {}'.format(res)) | |||
self.save_model(self._model, 'best_model_'+self.start_time) | |||
return res | |||
def mode(self, model, is_test=False): | |||
"""Train mode or Test mode. This is for PyTorch currently. | |||
@@ -180,11 +205,17 @@ class Trainer(object): | |||
else: | |||
model.train() | |||
def define_optimizer(self): | |||
def define_optimizer(self, optim=None): | |||
"""Define framework-specific optimizer specified by the models. | |||
""" | |||
self._optimizer = self._optimizer_proto.construct_from_pytorch(self._model.parameters()) | |||
if optim is not None: | |||
# optimizer constructed by user | |||
self._optimizer = optim | |||
elif self._optimizer is None: | |||
# optimizer constructed by proto | |||
self._optimizer = self._optimizer_proto.construct_from_pytorch(self._model.parameters()) | |||
return self._optimizer | |||
def update(self): | |||
"""Perform weight update on a model. | |||
@@ -217,6 +248,8 @@ class Trainer(object): | |||
:param truth: ground truth label vector | |||
:return: a scalar | |||
""" | |||
if isinstance(predict, dict) and isinstance(truth, dict): | |||
return self._loss_func(**predict, **truth) | |||
if len(truth) > 1: | |||
raise NotImplementedError("Not ready to handle multi-labels.") | |||
truth = list(truth.values())[0] if len(truth) > 0 else None | |||
@@ -241,13 +274,27 @@ class Trainer(object): | |||
raise ValueError("Please specify a loss function.") | |||
logger.info("The model didn't define loss, use Trainer's loss.") | |||
def best_eval_result(self, validator): | |||
def best_eval_result(self, metrics): | |||
"""Check if the current epoch yields better validation results. | |||
:param validator: a Tester instance | |||
:return: bool, True means current results on dev set is the best. | |||
""" | |||
loss, accuracy = validator.metrics | |||
if isinstance(metrics, tuple): | |||
loss, metrics = metrics | |||
else: | |||
metrics = validator.metrics | |||
if isinstance(metrics, dict): | |||
if len(metrics) == 1: | |||
accuracy = list(metrics.values())[0] | |||
elif self.eval_sort_key is None: | |||
raise ValueError('dict format metrics should provide sort key for eval best result') | |||
else: | |||
accuracy = metrics[self.eval_sort_key] | |||
else: | |||
accuracy = metrics | |||
if accuracy > self._best_accuracy: | |||
self._best_accuracy = accuracy | |||
return True | |||
@@ -268,6 +315,8 @@ class Trainer(object): | |||
def _create_validator(self, valid_args): | |||
raise NotImplementedError | |||
def set_validator(self, validor): | |||
self.validator = validor | |||
class SeqLabelTrainer(Trainer): | |||
"""Trainer for Sequence Labeling | |||
@@ -243,6 +243,9 @@ class BiaffineParser(GraphParser): | |||
self.normal_dropout = nn.Dropout(p=dropout) | |||
self.use_greedy_infer = use_greedy_infer | |||
initial_parameter(self) | |||
self.word_norm = nn.LayerNorm(word_emb_dim) | |||
self.pos_norm = nn.LayerNorm(pos_emb_dim) | |||
self.lstm_norm = nn.LayerNorm(rnn_out_size) | |||
def forward(self, word_seq, pos_seq, word_seq_origin_len, gold_heads=None, **_): | |||
""" | |||
@@ -266,10 +269,12 @@ class BiaffineParser(GraphParser): | |||
word = self.normal_dropout(self.word_embedding(word_seq)) # [N,L] -> [N,L,C_0] | |||
pos = self.normal_dropout(self.pos_embedding(pos_seq)) # [N,L] -> [N,L,C_1] | |||
word, pos = self.word_norm(word), self.pos_norm(pos) | |||
x = torch.cat([word, pos], dim=2) # -> [N,L,C] | |||
# lstm, extract features | |||
feat, _ = self.lstm(x) # -> [N,L,C] | |||
feat = self.lstm_norm(feat) | |||
# for arc biaffine | |||
# mlp, reduce dim | |||
@@ -292,6 +297,7 @@ class BiaffineParser(GraphParser): | |||
heads = self._mst_decoder(arc_pred, seq_mask) | |||
head_pred = heads | |||
else: | |||
assert self.training # must be training mode | |||
head_pred = None | |||
heads = gold_heads | |||
@@ -331,40 +337,4 @@ class BiaffineParser(GraphParser): | |||
label_nll = -(label_loss*float_mask).sum() / length | |||
return arc_nll + label_nll | |||
def evaluate(self, arc_pred, label_pred, head_indices, head_labels, seq_mask, **kwargs): | |||
""" | |||
Evaluate the performance of prediction. | |||
:return dict: performance results. | |||
head_pred_corrct: number of correct predicted heads. | |||
label_pred_correct: number of correct predicted labels. | |||
total_tokens: number of predicted tokens | |||
""" | |||
if 'head_pred' in kwargs: | |||
head_pred = kwargs['head_pred'] | |||
elif self.use_greedy_infer: | |||
head_pred = self._greedy_decoder(arc_pred, seq_mask) | |||
else: | |||
head_pred = self._mst_decoder(arc_pred, seq_mask) | |||
head_pred_correct = (head_pred == head_indices).long() * seq_mask | |||
_, label_preds = torch.max(label_pred, dim=2) | |||
label_pred_correct = (label_preds == head_labels).long() * head_pred_correct | |||
return {"head_pred_correct": head_pred_correct.sum(dim=1), | |||
"label_pred_correct": label_pred_correct.sum(dim=1), | |||
"total_tokens": seq_mask.sum(dim=1)} | |||
def metrics(self, head_pred_correct, label_pred_correct, total_tokens, **_): | |||
""" | |||
Compute the metrics of model | |||
:param head_pred_corrct: number of correct predicted heads. | |||
:param label_pred_correct: number of correct predicted labels. | |||
:param total_tokens: number of predicted tokens | |||
:return dict: the metrics results | |||
UAS: the head predicted accuracy | |||
LAS: the label predicted accuracy | |||
""" | |||
return {"UAS": head_pred_correct.sum().float() / total_tokens.sum().float() * 100, | |||
"LAS": label_pred_correct.sum().float() / total_tokens.sum().float() * 100} | |||
@@ -1,23 +1,25 @@ | |||
[train] | |||
epochs = -1 | |||
<<<<<<< HEAD | |||
batch_size = 16 | |||
======= | |||
batch_size = 32 | |||
>>>>>>> update biaffine | |||
pickle_path = "./save/" | |||
validate = true | |||
save_best_dev = false | |||
save_best_dev = true | |||
eval_sort_key = "UAS" | |||
use_cuda = true | |||
model_saved_path = "./save/" | |||
task = "parse" | |||
[test] | |||
save_output = true | |||
validate_in_training = true | |||
save_dev_input = false | |||
save_loss = true | |||
batch_size = 16 | |||
batch_size = 64 | |||
pickle_path = "./save/" | |||
use_cuda = true | |||
task = "parse" | |||
[model] | |||
word_vocab_size = -1 | |||
@@ -8,12 +8,14 @@ import math | |||
import torch | |||
from fastNLP.core.trainer import Trainer | |||
from fastNLP.core.metrics import Evaluator | |||
from fastNLP.core.instance import Instance | |||
from fastNLP.core.vocabulary import Vocabulary | |||
from fastNLP.core.dataset import DataSet | |||
from fastNLP.core.batch import Batch | |||
from fastNLP.core.sampler import SequentialSampler | |||
from fastNLP.core.field import TextField, SeqLabelField | |||
from fastNLP.core.preprocess import load_pickle | |||
from fastNLP.core.tester import Tester | |||
from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | |||
from fastNLP.loader.model_loader import ModelLoader | |||
@@ -111,9 +113,10 @@ class CTBDataLoader(object): | |||
# emb_file_name = '/home/yfshao/glove.6B.100d.txt' | |||
# loader = ConlluDataLoader() | |||
datadir = "/home/yfshao/parser-data" | |||
datadir = '/home/yfshao/workdir/parser-data/' | |||
train_data_name = "train_ctb5.txt" | |||
dev_data_name = "dev_ctb5.txt" | |||
test_data_name = "test_ctb5.txt" | |||
emb_file_name = "/home/yfshao/parser-data/word_OOVthr_30_100v.txt" | |||
loader = CTBDataLoader() | |||
@@ -148,37 +151,33 @@ def load_data(dirpath): | |||
datas[name] = _pickle.load(f) | |||
return datas | |||
class MyTester(object): | |||
def __init__(self, batch_size, use_cuda=False, **kwagrs): | |||
self.batch_size = batch_size | |||
self.use_cuda = use_cuda | |||
def test(self, model, dataset): | |||
self.model = model.cuda() if self.use_cuda else model | |||
self.model.eval() | |||
batchiter = Batch(dataset, self.batch_size, SequentialSampler(), self.use_cuda) | |||
eval_res = defaultdict(list) | |||
i = 0 | |||
for batch_x, batch_y in batchiter: | |||
with torch.no_grad(): | |||
pred_y = self.model(**batch_x) | |||
eval_one = self.model.evaluate(**pred_y, **batch_y) | |||
i += self.batch_size | |||
for eval_name, tensor in eval_one.items(): | |||
eval_res[eval_name].append(tensor) | |||
tmp = {} | |||
for eval_name, tensorlist in eval_res.items(): | |||
tmp[eval_name] = torch.cat(tensorlist, dim=0) | |||
self.res = self.model.metrics(**tmp) | |||
print(self.show_metrics()) | |||
def show_metrics(self): | |||
s = "" | |||
for name, val in self.res.items(): | |||
s += '{}: {:.2f}\t'.format(name, val) | |||
return s | |||
class ParserEvaluator(Evaluator): | |||
def __init__(self): | |||
super(ParserEvaluator, self).__init__() | |||
def __call__(self, predict_list, truth_list): | |||
head_all, label_all, total_all = 0, 0, 0 | |||
for pred, truth in zip(predict_list, truth_list): | |||
head, label, total = self.evaluate(**pred, **truth) | |||
head_all += head | |||
label_all += label | |||
total_all += total | |||
return {'UAS': head_all*1.0 / total_all, 'LAS': label_all*1.0 / total_all} | |||
def evaluate(self, head_pred, label_pred, head_indices, head_labels, seq_mask, **_): | |||
""" | |||
Evaluate the performance of prediction. | |||
:return : performance results. | |||
head_pred_corrct: number of correct predicted heads. | |||
label_pred_correct: number of correct predicted labels. | |||
total_tokens: number of predicted tokens | |||
""" | |||
head_pred_correct = (head_pred == head_indices).long() * seq_mask | |||
_, label_preds = torch.max(label_pred, dim=2) | |||
label_pred_correct = (label_preds == head_labels).long() * head_pred_correct | |||
return head_pred_correct.sum().item(), label_pred_correct.sum().item(), seq_mask.sum().item() | |||
try: | |||
data_dict = load_data(processed_datadir) | |||
@@ -196,6 +195,7 @@ except Exception as _: | |||
tag_v = Vocabulary(need_default=False) | |||
train_data = loader.load(os.path.join(datadir, train_data_name)) | |||
dev_data = loader.load(os.path.join(datadir, dev_data_name)) | |||
test_data = loader.load(os.path.join(datadir, test_data_name)) | |||
train_data.update_vocab(word_seq=word_v, pos_seq=pos_v, head_labels=tag_v) | |||
save_data(processed_datadir, word_v=word_v, pos_v=pos_v, tag_v=tag_v, train_data=train_data, dev_data=dev_data) | |||
@@ -207,8 +207,6 @@ dev_data.set_origin_len("word_seq") | |||
print(train_data[:3]) | |||
print(len(train_data)) | |||
print(len(dev_data)) | |||
ep = train_args['epochs'] | |||
train_args['epochs'] = math.ceil(50000.0 / len(train_data) * train_args['batch_size']) if ep <= 0 else ep | |||
model_args['word_vocab_size'] = len(word_v) | |||
model_args['pos_vocab_size'] = len(pos_v) | |||
model_args['num_label'] = len(tag_v) | |||
@@ -220,7 +218,7 @@ def train(): | |||
def _define_optim(obj): | |||
obj._optimizer = torch.optim.Adam(obj._model.parameters(), **optim_args.data) | |||
obj._scheduler = torch.optim.lr_scheduler.LambdaLR(obj._optimizer, lambda ep: .75 ** (ep / 5e4)) | |||
obj._scheduler = torch.optim.lr_scheduler.LambdaLR(obj._optimizer, lambda ep: max(.75 ** (ep / 5e4), 0.05)) | |||
def _update(obj): | |||
obj._scheduler.step() | |||
@@ -228,8 +226,7 @@ def train(): | |||
trainer.define_optimizer = lambda: _define_optim(trainer) | |||
trainer.update = lambda: _update(trainer) | |||
trainer.get_loss = lambda predict, truth: trainer._loss_func(**predict, **truth) | |||
trainer._create_validator = lambda x: MyTester(**test_args.data) | |||
trainer.set_validator(Tester(**test_args.data, evaluator=ParserEvaluator())) | |||
# Model | |||
model = BiaffineParser(**model_args.data) | |||
@@ -238,6 +235,7 @@ def train(): | |||
word_v.unknown_label = "<OOV>" | |||
embed, _ = EmbedLoader.load_embedding(model_args['word_emb_dim'], emb_file_name, 'glove', word_v, os.path.join(processed_datadir, 'word_emb.pkl')) | |||
model.word_embedding = torch.nn.Embedding.from_pretrained(embed, freeze=False) | |||
model.word_embedding.padding_idx = word_v.padding_idx | |||
model.word_embedding.weight.data[word_v.padding_idx].fill_(0) | |||
model.pos_embedding.padding_idx = pos_v.padding_idx | |||
@@ -262,7 +260,7 @@ def train(): | |||
def test(): | |||
# Tester | |||
tester = MyTester(**test_args.data) | |||
tester = Tester(**test_args.data, evaluator=ParserEvaluator()) | |||
# Model | |||
model = BiaffineParser(**model_args.data) | |||
@@ -275,9 +273,10 @@ def test(): | |||
raise | |||
# Start training | |||
print("Testing Dev data") | |||
tester.test(model, dev_data) | |||
print(tester.show_metrics()) | |||
print("Testing finished!") | |||
print("Testing Test data") | |||
tester.test(model, test_data) | |||