Browse Source

improve trainer: log mean and std of model params, and sum of gradients

tags/v0.2.0
FengZiYjun 5 years ago
parent
commit
ec9fd32d60
5 changed files with 26 additions and 19 deletions
  1. +15
    -13
      fastNLP/core/trainer.py
  2. +1
    -1
      fastNLP/modules/decoder/CRF.py
  3. +2
    -2
      reproduction/chinese_word_segment/cws.cfg
  4. +2
    -2
      reproduction/pos_tag_model/pos_tag.cfg
  5. +6
    -1
      reproduction/pos_tag_model/train_pos_tag.py

+ 15
- 13
fastNLP/core/trainer.py View File

@@ -17,6 +17,7 @@ from fastNLP.saver.model_saver import ModelSaver
logger = create_logger(__name__, "./train_test.log") logger = create_logger(__name__, "./train_test.log")
logger.disabled = True logger.disabled = True



class Trainer(object): class Trainer(object):
"""Operations of training a model, including data loading, gradient descent, and validation. """Operations of training a model, including data loading, gradient descent, and validation.


@@ -138,9 +139,7 @@ class Trainer(object):
print("training epochs started " + self.start_time) print("training epochs started " + self.start_time)
logger.info("training epochs started " + self.start_time) logger.info("training epochs started " + self.start_time)
epoch, iters = 1, 0 epoch, iters = 1, 0
while(1):
if self.n_epochs != -1 and epoch > self.n_epochs:
break
while epoch <= self.n_epochs:
logger.info("training epoch {}".format(epoch)) logger.info("training epoch {}".format(epoch))


# prepare mini-batch iterator # prepare mini-batch iterator
@@ -149,12 +148,13 @@ class Trainer(object):
logger.info("prepared data iterator") logger.info("prepared data iterator")


# one forward and backward pass # one forward and backward pass
iters = self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch, step=iters, dev_data=dev_data)
iters = self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch,
step=iters, dev_data=dev_data)


# validation # validation
if self.validate: if self.validate:
self.valid_model() self.valid_model()
self.save_model(self._model, 'training_model_'+self.start_time)
self.save_model(self._model, 'training_model_' + self.start_time)
epoch += 1 epoch += 1


def _train_step(self, data_iterator, network, **kwargs): def _train_step(self, data_iterator, network, **kwargs):
@@ -171,13 +171,13 @@ class Trainer(object):


loss = self.get_loss(prediction, batch_y) loss = self.get_loss(prediction, batch_y)
self.grad_backward(loss) self.grad_backward(loss)
# if torch.rand(1).item() < 0.001:
# print('[grads at epoch: {:>3} step: {:>4}]'.format(kwargs['epoch'], step))
# for name, p in self._model.named_parameters():
# if p.requires_grad:
# print('\t{} {} {}'.format(name, tuple(p.size()), torch.sum(p.grad).item()))
self.update() self.update()
self._summary_writer.add_scalar("loss", loss.item(), global_step=step) self._summary_writer.add_scalar("loss", loss.item(), global_step=step)
for name, param in self._model.named_parameters():
if param.requires_grad:
self._summary_writer.add_scalar(name + "_mean", param.mean(), global_step=step)
self._summary_writer.add_scalar(name + "_std", param.std(), global_step=step)
self._summary_writer.add_scalar(name + "_grad_sum", param.sum(), global_step=step)


if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0: if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0:
end = time.time() end = time.time()
@@ -193,14 +193,14 @@ class Trainer(object):


def valid_model(self): def valid_model(self):
if self.dev_data is None: if self.dev_data is None:
raise RuntimeError(
"self.validate is True in trainer, but dev_data is None. Please provide the validation data.")
raise RuntimeError(
"self.validate is True in trainer, but dev_data is None. Please provide the validation data.")
logger.info("validation started") logger.info("validation started")
res = self.validator.test(self._model, self.dev_data) res = self.validator.test(self._model, self.dev_data)
if self.save_best_dev and self.best_eval_result(res): if self.save_best_dev and self.best_eval_result(res):
logger.info('save best result! {}'.format(res)) logger.info('save best result! {}'.format(res))
print('save best result! {}'.format(res)) print('save best result! {}'.format(res))
self.save_model(self._model, 'best_model_'+self.start_time)
self.save_model(self._model, 'best_model_' + self.start_time)
return res return res


def mode(self, model, is_test=False): def mode(self, model, is_test=False):
@@ -324,10 +324,12 @@ class Trainer(object):
def set_validator(self, validor): def set_validator(self, validor):
self.validator = validor self.validator = validor



class SeqLabelTrainer(Trainer): class SeqLabelTrainer(Trainer):
"""Trainer for Sequence Labeling """Trainer for Sequence Labeling


""" """

def __init__(self, **kwargs): def __init__(self, **kwargs):
print( print(
"[FastNLP Warning] SeqLabelTrainer will be deprecated. Please use Trainer directly.") "[FastNLP Warning] SeqLabelTrainer will be deprecated. Please use Trainer directly.")


+ 1
- 1
fastNLP/modules/decoder/CRF.py View File

@@ -3,6 +3,7 @@ from torch import nn


from fastNLP.modules.utils import initial_parameter from fastNLP.modules.utils import initial_parameter



def log_sum_exp(x, dim=-1): def log_sum_exp(x, dim=-1):
max_value, _ = x.max(dim=dim, keepdim=True) max_value, _ = x.max(dim=dim, keepdim=True)
res = torch.log(torch.sum(torch.exp(x - max_value), dim=dim, keepdim=True)) + max_value res = torch.log(torch.sum(torch.exp(x - max_value), dim=dim, keepdim=True)) + max_value
@@ -91,7 +92,6 @@ class ConditionalRandomField(nn.Module):
st_scores = self.start_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[0]] st_scores = self.start_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[0]]
last_idx = mask.long().sum(0) - 1 last_idx = mask.long().sum(0) - 1
ed_scores = self.end_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[last_idx, batch_idx]] ed_scores = self.end_scores.view(1, -1).repeat(batch_size, 1)[batch_idx, tags[last_idx, batch_idx]]
print(score.size(), st_scores.size(), ed_scores.size())
score += st_scores + ed_scores score += st_scores + ed_scores
# return [B,] # return [B,]
return score return score


+ 2
- 2
reproduction/chinese_word_segment/cws.cfg View File

@@ -1,6 +1,6 @@
[train] [train]
epochs = 30
batch_size = 64
epochs = 40
batch_size = 8
pickle_path = "./save/" pickle_path = "./save/"
validate = true validate = true
save_best_dev = true save_best_dev = true


+ 2
- 2
reproduction/pos_tag_model/pos_tag.cfg View File

@@ -1,6 +1,6 @@
[train] [train]
epochs = 5
batch_size = 2
epochs = 20
batch_size = 32
pickle_path = "./save/" pickle_path = "./save/"
validate = false validate = false
save_best_dev = true save_best_dev = true


+ 6
- 1
reproduction/pos_tag_model/train_pos_tag.py View File

@@ -6,6 +6,7 @@ from fastNLP.api.pipeline import Pipeline
from fastNLP.api.processor import VocabProcessor, IndexerProcessor, SeqLenProcessor from fastNLP.api.processor import VocabProcessor, IndexerProcessor, SeqLenProcessor
from fastNLP.core.dataset import DataSet from fastNLP.core.dataset import DataSet
from fastNLP.core.instance import Instance from fastNLP.core.instance import Instance
from fastNLP.core.optimizer import Optimizer
from fastNLP.core.trainer import Trainer from fastNLP.core.trainer import Trainer
from fastNLP.loader.config_loader import ConfigLoader, ConfigSection from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader from fastNLP.loader.dataset_loader import PeopleDailyCorpusLoader
@@ -63,7 +64,11 @@ def train():
model = AdvSeqLabel(model_param) model = AdvSeqLabel(model_param)


# call trainer to train # call trainer to train
trainer = Trainer(**train_param.data)
trainer = Trainer(epochs=train_param["epochs"],
batch_size=train_param["batch_size"],
validate=False,
optimizer=Optimizer("SGD", lr=0.01, momentum=0.9),
)
trainer.train(model, dataset) trainer.train(model, dataset)


# save model & pipeline # save model & pipeline


Loading…
Cancel
Save