Browse Source

add logging in Trainer & Tester

- see fastNLp/saver/logger.py to know how to create and use a logger
- a log file named "train_test.log" will be created in the same dir as the main file where the program starts
- this file records all important events happened in Trainer & Tester's methods
tags/v0.1.0
FengZiYjun 6 years ago
parent
commit
80baf35765
7 changed files with 67 additions and 28 deletions
  1. +0
    -0
      .github/CODE_OF_CONDUCT.md
  2. +10
    -3
      fastNLP/core/tester.py
  3. +22
    -6
      fastNLP/core/trainer.py
  4. +0
    -5
      fastNLP/saver/base_saver.py
  5. +30
    -8
      fastNLP/saver/logger.py
  6. +3
    -4
      fastNLP/saver/model_saver.py
  7. +2
    -2
      test/seq_labeling.py

CODE_OF_CONDUCT.md → .github/CODE_OF_CONDUCT.md View File


+ 10
- 3
fastNLP/core/tester.py View File

@@ -6,6 +6,9 @@ import torch
from fastNLP.core.action import Action from fastNLP.core.action import Action
from fastNLP.core.action import RandomSampler, Batchifier from fastNLP.core.action import RandomSampler, Batchifier
from fastNLP.modules import utils from fastNLP.modules import utils
from fastNLP.saver.logger import create_logger

logger = create_logger(__name__, "./train_test.log")




class BaseTester(object): class BaseTester(object):
@@ -43,10 +46,11 @@ class BaseTester(object):
self.batch_output.clear() self.batch_output.clear()


dev_data = self.prepare_input(self.pickle_path) dev_data = self.prepare_input(self.pickle_path)
logger.info("validation data loaded")


iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True)) iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True))
n_batches = len(dev_data) // self.batch_size n_batches = len(dev_data) // self.batch_size
n_print = 1
print_every_step = 1
step = 0 step = 0


for batch_x, batch_y in self.make_batch(iterator, dev_data): for batch_x, batch_y in self.make_batch(iterator, dev_data):
@@ -58,8 +62,11 @@ class BaseTester(object):
self.batch_output.append(prediction) self.batch_output.append(prediction)
if self.save_loss: if self.save_loss:
self.eval_history.append(eval_results) self.eval_history.append(eval_results)
if step % n_print == 0:
print('[test step: {:>4}]'.format(step))

print_output = "[test step {}] {}".format(step, eval_results)
logger.info(print_output)
if step % print_every_step == 0:
print(print_output)
step += 1 step += 1


def prepare_input(self, data_path): def prepare_input(self, data_path):


+ 22
- 6
fastNLP/core/trainer.py View File

@@ -2,7 +2,6 @@ import _pickle
import os import os
import time import time
from datetime import timedelta from datetime import timedelta
from time import time


import numpy as np import numpy as np
import torch import torch
@@ -12,9 +11,11 @@ from fastNLP.core.action import Action
from fastNLP.core.action import RandomSampler, Batchifier from fastNLP.core.action import RandomSampler, Batchifier
from fastNLP.core.tester import SeqLabelTester, ClassificationTester from fastNLP.core.tester import SeqLabelTester, ClassificationTester
from fastNLP.modules import utils from fastNLP.modules import utils
from fastNLP.saver.logger import create_logger
from fastNLP.saver.model_saver import ModelSaver from fastNLP.saver.model_saver import ModelSaver


DEFAULT_QUEUE_SIZE = 300 DEFAULT_QUEUE_SIZE = 300
logger = create_logger(__name__, "./train_test.log")




class BaseTrainer(object): class BaseTrainer(object):
@@ -73,6 +74,7 @@ class BaseTrainer(object):
self.model = network self.model = network


data_train = self.load_train_data(self.pickle_path) data_train = self.load_train_data(self.pickle_path)
logger.info("training data loaded")


# define tester over dev data # define tester over dev data
if self.validate: if self.validate:
@@ -80,33 +82,42 @@ class BaseTrainer(object):
"save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path, "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path,
"use_cuda": self.use_cuda} "use_cuda": self.use_cuda}
validator = self._create_validator(default_valid_args) validator = self._create_validator(default_valid_args)
logger.info("validator defined as {}".format(str(validator)))


self.define_optimizer() self.define_optimizer()
logger.info("optimizer defined as {}".format(str(self.optimizer)))


# main training epochs # main training epochs
start = time.time()
n_samples = len(data_train) n_samples = len(data_train)
n_batches = n_samples // self.batch_size n_batches = n_samples // self.batch_size
n_print = 1 n_print = 1
start = time.time()
logger.info("training epochs started")


for epoch in range(1, self.n_epochs + 1): for epoch in range(1, self.n_epochs + 1):
logger.info("training epoch {}".format(epoch))


# turn on network training mode # turn on network training mode
self.mode(network, test=False) self.mode(network, test=False)
# prepare mini-batch iterator # prepare mini-batch iterator
data_iterator = iter(Batchifier(RandomSampler(data_train), self.batch_size, drop_last=False)) data_iterator = iter(Batchifier(RandomSampler(data_train), self.batch_size, drop_last=False))
logger.info("prepared data iterator")


self._train_step(data_iterator, network, start=start, n_print=n_print, epoch=epoch) self._train_step(data_iterator, network, start=start, n_print=n_print, epoch=epoch)


if self.validate: if self.validate:
logger.info("validation started")
validator.test(network) validator.test(network)


if self.save_best_dev and self.best_eval_result(validator): if self.save_best_dev and self.best_eval_result(validator):
self.save_model(network) self.save_model(network)
print("saved better model selected by dev") print("saved better model selected by dev")
logger.info("saved better model selected by dev")


print("[epoch {}]".format(epoch), end=" ")
print(validator.show_matrices())
valid_results = validator.show_matrices()
print("[epoch {}] {}".format(epoch, valid_results))
logger.info("[epoch {}] {}".format(epoch, valid_results))


def _train_step(self, data_iterator, network, **kwargs): def _train_step(self, data_iterator, network, **kwargs):
"""Training process in one epoch.""" """Training process in one epoch."""
@@ -122,8 +133,10 @@ class BaseTrainer(object):
if step % kwargs["n_print"] == 0: if step % kwargs["n_print"] == 0:
end = time.time() end = time.time()
diff = timedelta(seconds=round(end - kwargs["start"])) diff = timedelta(seconds=round(end - kwargs["start"]))
print("[epoch: {:>3} step: {:>4}] train loss: {:>4.2} time: {}".format(
kwargs["epoch"], step, loss.data, diff))
print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.2} time: {}".format(
kwargs["epoch"], step, loss.data, diff)
print(print_output)
logger.info(print_output)
step += 1 step += 1


def load_train_data(self, pickle_path): def load_train_data(self, pickle_path):
@@ -137,6 +150,7 @@ class BaseTrainer(object):
with open(file_path, 'rb') as f: with open(file_path, 'rb') as f:
data = _pickle.load(f) data = _pickle.load(f)
else: else:
logger.error("cannot find training data {}. invalid input path for training data.".format(file_path))
raise RuntimeError("cannot find training data {}".format(file_path)) raise RuntimeError("cannot find training data {}".format(file_path))
return data return data


@@ -182,7 +196,9 @@ class BaseTrainer(object):
if self.loss_func is None: if self.loss_func is None:
if hasattr(self.model, "loss"): if hasattr(self.model, "loss"):
self.loss_func = self.model.loss self.loss_func = self.model.loss
logger.info("The model has a loss function, use it.")
else: else:
logger.info("The model didn't define loss, use Trainer's loss.")
self.define_loss() self.define_loss()
return self.loss_func(predict, truth) return self.loss_func(predict, truth)




+ 0
- 5
fastNLP/saver/base_saver.py View File

@@ -1,5 +0,0 @@
class BaseSaver(object):
"""base class for all savers"""

def __init__(self, save_path):
self.save_path = save_path

+ 30
- 8
fastNLP/saver/logger.py View File

@@ -1,12 +1,34 @@
from saver.base_saver import BaseSaver
import logging
import os




class Logger(BaseSaver):
"""Logging"""
def create_logger(logger_name, log_path, log_format=None, log_level=logging.INFO):
"""Return a logger.


def __init__(self, save_path):
super(Logger, self).__init__(save_path)
:param logger_name: str
:param log_path: str
:param log_format:
:param log_level:
:return: logger


def log(self, string):
with open(self.save_path, "a") as f:
f.write(string)
to use a logger:
logger.debug("this is a debug message")
logger.info("this is a info message")
logger.warning("this is a warning message")
logger.error("this is an error message")
"""
logger = logging.getLogger(logger_name)
logger.setLevel(log_level)
if log_path is None:
handler = logging.StreamHandler()
else:
os.stat(os.path.dirname(os.path.abspath(log_path)))
handler = logging.FileHandler(log_path)
handler.setLevel(log_level)
if log_format is None:
log_format = "[%(asctime)s %(name)-13s %(levelname)s %(process)d %(thread)d " \
"%(filename)s:%(lineno)-5d] %(message)s"
formatter = logging.Formatter(log_format)
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger

+ 3
- 4
fastNLP/saver/model_saver.py View File

@@ -1,13 +1,12 @@
import torch import torch


from fastNLP.saver.base_saver import BaseSaver



class ModelSaver(BaseSaver):
class ModelSaver(object):
"""Save a models""" """Save a models"""


def __init__(self, save_path): def __init__(self, save_path):
super(ModelSaver, self).__init__(save_path)
self.save_path = save_path
# TODO: check whether the path exist, if not exist, create it.


def save_pytorch(self, model): def save_pytorch(self, model):
""" """


+ 2
- 2
test/seq_labeling.py View File

@@ -112,5 +112,5 @@ def train_and_test():




if __name__ == "__main__": if __name__ == "__main__":
# train_and_test()
infer()
train_and_test()
# infer()

Loading…
Cancel
Save