diff --git a/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md similarity index 100% rename from CODE_OF_CONDUCT.md rename to .github/CODE_OF_CONDUCT.md diff --git a/fastNLP/core/tester.py b/fastNLP/core/tester.py index 211397d6..313b7dcb 100644 --- a/fastNLP/core/tester.py +++ b/fastNLP/core/tester.py @@ -6,6 +6,9 @@ import torch from fastNLP.core.action import Action from fastNLP.core.action import RandomSampler, Batchifier from fastNLP.modules import utils +from fastNLP.saver.logger import create_logger + +logger = create_logger(__name__, "./train_test.log") class BaseTester(object): @@ -43,10 +46,11 @@ class BaseTester(object): self.batch_output.clear() dev_data = self.prepare_input(self.pickle_path) + logger.info("validation data loaded") iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True)) n_batches = len(dev_data) // self.batch_size - n_print = 1 + print_every_step = 1 step = 0 for batch_x, batch_y in self.make_batch(iterator, dev_data): @@ -58,8 +62,11 @@ class BaseTester(object): self.batch_output.append(prediction) if self.save_loss: self.eval_history.append(eval_results) - if step % n_print == 0: - print('[test step: {:>4}]'.format(step)) + + print_output = "[test step {}] {}".format(step, eval_results) + logger.info(print_output) + if step % print_every_step == 0: + print(print_output) step += 1 def prepare_input(self, data_path): diff --git a/fastNLP/core/trainer.py b/fastNLP/core/trainer.py index 77bb0757..a9e74e22 100644 --- a/fastNLP/core/trainer.py +++ b/fastNLP/core/trainer.py @@ -2,7 +2,6 @@ import _pickle import os import time from datetime import timedelta -from time import time import numpy as np import torch @@ -12,9 +11,11 @@ from fastNLP.core.action import Action from fastNLP.core.action import RandomSampler, Batchifier from fastNLP.core.tester import SeqLabelTester, ClassificationTester from fastNLP.modules import utils +from fastNLP.saver.logger import create_logger from fastNLP.saver.model_saver import ModelSaver DEFAULT_QUEUE_SIZE = 300 +logger = create_logger(__name__, "./train_test.log") class BaseTrainer(object): @@ -73,6 +74,7 @@ class BaseTrainer(object): self.model = network data_train = self.load_train_data(self.pickle_path) + logger.info("training data loaded") # define tester over dev data if self.validate: @@ -80,33 +82,42 @@ class BaseTrainer(object): "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path, "use_cuda": self.use_cuda} validator = self._create_validator(default_valid_args) + logger.info("validator defined as {}".format(str(validator))) self.define_optimizer() + logger.info("optimizer defined as {}".format(str(self.optimizer))) # main training epochs - start = time.time() + n_samples = len(data_train) n_batches = n_samples // self.batch_size n_print = 1 + start = time.time() + logger.info("training epochs started") for epoch in range(1, self.n_epochs + 1): + logger.info("training epoch {}".format(epoch)) # turn on network training mode self.mode(network, test=False) # prepare mini-batch iterator data_iterator = iter(Batchifier(RandomSampler(data_train), self.batch_size, drop_last=False)) + logger.info("prepared data iterator") self._train_step(data_iterator, network, start=start, n_print=n_print, epoch=epoch) if self.validate: + logger.info("validation started") validator.test(network) if self.save_best_dev and self.best_eval_result(validator): self.save_model(network) print("saved better model selected by dev") + logger.info("saved better model selected by dev") - print("[epoch {}]".format(epoch), end=" ") - print(validator.show_matrices()) + valid_results = validator.show_matrices() + print("[epoch {}] {}".format(epoch, valid_results)) + logger.info("[epoch {}] {}".format(epoch, valid_results)) def _train_step(self, data_iterator, network, **kwargs): """Training process in one epoch.""" @@ -122,8 +133,10 @@ class BaseTrainer(object): if step % kwargs["n_print"] == 0: end = time.time() diff = timedelta(seconds=round(end - kwargs["start"])) - print("[epoch: {:>3} step: {:>4}] train loss: {:>4.2} time: {}".format( - kwargs["epoch"], step, loss.data, diff)) + print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.2} time: {}".format( + kwargs["epoch"], step, loss.data, diff) + print(print_output) + logger.info(print_output) step += 1 def load_train_data(self, pickle_path): @@ -137,6 +150,7 @@ class BaseTrainer(object): with open(file_path, 'rb') as f: data = _pickle.load(f) else: + logger.error("cannot find training data {}. invalid input path for training data.".format(file_path)) raise RuntimeError("cannot find training data {}".format(file_path)) return data @@ -182,7 +196,9 @@ class BaseTrainer(object): if self.loss_func is None: if hasattr(self.model, "loss"): self.loss_func = self.model.loss + logger.info("The model has a loss function, use it.") else: + logger.info("The model didn't define loss, use Trainer's loss.") self.define_loss() return self.loss_func(predict, truth) diff --git a/fastNLP/saver/base_saver.py b/fastNLP/saver/base_saver.py deleted file mode 100644 index 3a350c0b..00000000 --- a/fastNLP/saver/base_saver.py +++ /dev/null @@ -1,5 +0,0 @@ -class BaseSaver(object): - """base class for all savers""" - - def __init__(self, save_path): - self.save_path = save_path diff --git a/fastNLP/saver/logger.py b/fastNLP/saver/logger.py index be38de40..19b2b2c3 100644 --- a/fastNLP/saver/logger.py +++ b/fastNLP/saver/logger.py @@ -1,12 +1,34 @@ -from saver.base_saver import BaseSaver +import logging +import os -class Logger(BaseSaver): - """Logging""" +def create_logger(logger_name, log_path, log_format=None, log_level=logging.INFO): + """Return a logger. - def __init__(self, save_path): - super(Logger, self).__init__(save_path) + :param logger_name: str + :param log_path: str + :param log_format: + :param log_level: + :return: logger - def log(self, string): - with open(self.save_path, "a") as f: - f.write(string) + to use a logger: + logger.debug("this is a debug message") + logger.info("this is a info message") + logger.warning("this is a warning message") + logger.error("this is an error message") + """ + logger = logging.getLogger(logger_name) + logger.setLevel(log_level) + if log_path is None: + handler = logging.StreamHandler() + else: + os.stat(os.path.dirname(os.path.abspath(log_path))) + handler = logging.FileHandler(log_path) + handler.setLevel(log_level) + if log_format is None: + log_format = "[%(asctime)s %(name)-13s %(levelname)s %(process)d %(thread)d " \ + "%(filename)s:%(lineno)-5d] %(message)s" + formatter = logging.Formatter(log_format) + handler.setFormatter(formatter) + logger.addHandler(handler) + return logger diff --git a/fastNLP/saver/model_saver.py b/fastNLP/saver/model_saver.py index 97675142..81690740 100644 --- a/fastNLP/saver/model_saver.py +++ b/fastNLP/saver/model_saver.py @@ -1,13 +1,12 @@ import torch -from fastNLP.saver.base_saver import BaseSaver - -class ModelSaver(BaseSaver): +class ModelSaver(object): """Save a models""" def __init__(self, save_path): - super(ModelSaver, self).__init__(save_path) + self.save_path = save_path + # TODO: check whether the path exist, if not exist, create it. def save_pytorch(self, model): """ diff --git a/test/seq_labeling.py b/test/seq_labeling.py index a90dc75e..79f542fb 100644 --- a/test/seq_labeling.py +++ b/test/seq_labeling.py @@ -112,5 +112,5 @@ def train_and_test(): if __name__ == "__main__": - # train_and_test() - infer() + train_and_test() + # infer()