diff --git a/fastNLP/core/loss.py b/fastNLP/core/loss.py new file mode 100644 index 00000000..f83b4959 --- /dev/null +++ b/fastNLP/core/loss.py @@ -0,0 +1,27 @@ +import torch + + +class Loss(object): + """Loss function of the algorithm, + either the wrapper of a loss function from framework, or a user-defined loss (need pytorch auto_grad support) + + """ + + def __init__(self, args): + if args is None: + # this is useful when + self._loss = None + elif isinstance(args, str): + self._loss = self._borrow_from_pytorch(args) + else: + raise NotImplementedError + + def get(self): + return self._loss + + @staticmethod + def _borrow_from_pytorch(loss_name): + if loss_name == "cross_entropy": + return torch.nn.CrossEntropyLoss() + else: + raise NotImplementedError diff --git a/fastNLP/core/optimizer.py b/fastNLP/core/optimizer.py index fbef289a..e106fde0 100644 --- a/fastNLP/core/optimizer.py +++ b/fastNLP/core/optimizer.py @@ -1,3 +1,54 @@ -""" -use optimizer from Pytorch -""" +import torch + + +class Optimizer(object): + """Wrapper of optimizer from framework + + names: arguments (type) + 1. Adam: lr (float), weight_decay (float) + 2. AdaGrad + 3. RMSProp + 4. SGD: lr (float), momentum (float) + + """ + + def __init__(self, optimizer_name, **kwargs): + """ + :param optimizer_name: str, the name of the optimizer + :param kwargs: the arguments + """ + self.optim_name = optimizer_name + self.kwargs = kwargs + + @property + def name(self): + return self.optim_name + + @property + def params(self): + return self.kwargs + + def construct_from_pytorch(self, model_params): + """construct a optimizer from framework over given model parameters""" + + if self.optim_name in ["SGD", "sgd"]: + if "lr" in self.kwargs: + if "momentum" not in self.kwargs: + self.kwargs["momentum"] = 0 + optimizer = torch.optim.SGD(model_params, lr=self.kwargs["lr"], momentum=self.kwargs["momentum"]) + else: + raise ValueError("requires learning rate for SGD optimizer") + + elif self.optim_name in ["adam", "Adam"]: + if "lr" in self.kwargs: + if "weight_decay" not in self.kwargs: + self.kwargs["weight_decay"] = 0 + optimizer = torch.optim.Adam(model_params, lr=self.kwargs["lr"], + weight_decay=self.kwargs["weight_decay"]) + else: + raise ValueError("requires learning rate for Adam optimizer") + + else: + raise NotImplementedError + + return optimizer diff --git a/fastNLP/core/tester.py b/fastNLP/core/tester.py index bafc0b82..a3844991 100644 --- a/fastNLP/core/tester.py +++ b/fastNLP/core/tester.py @@ -1,5 +1,3 @@ -import _pickle - import numpy as np import torch @@ -14,43 +12,78 @@ logger = create_logger(__name__, "./train_test.log") class BaseTester(object): """An collection of model inference and evaluation of performance, used over validation/dev set and test set. """ - def __init__(self, test_args): + def __init__(self, **kwargs): """ - :param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]" + :param kwargs: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]" """ super(BaseTester, self).__init__() - self.validate_in_training = test_args["validate_in_training"] - self.save_dev_data = None - self.save_output = test_args["save_output"] - self.output = None - self.save_loss = test_args["save_loss"] - self.mean_loss = None - self.batch_size = test_args["batch_size"] - self.pickle_path = test_args["pickle_path"] - self.iterator = None - self.use_cuda = test_args["use_cuda"] - - self.model = None + """ + "default_args" provides default value for important settings. + The initialization arguments "kwargs" with the same key (name) will override the default value. + "kwargs" must have the same type as "default_args" on corresponding keys. + Otherwise, error will raise. + """ + default_args = {"save_output": False, # collect outputs of validation set + "save_loss": False, # collect losses in validation + "save_best_dev": False, # save best model during validation + "batch_size": 8, + "use_cuda": True, + "pickle_path": "./save/", + "model_name": "dev_best_model.pkl", + "print_every_step": 1, + } + """ + "required_args" is the collection of arguments that users must pass to Trainer explicitly. + This is used to warn users of essential settings in the training. + Obviously, "required_args" is the subset of "default_args". + The value in "default_args" to the keys in "required_args" is simply for type check. + """ + # TODO: required arguments + required_args = {} + + for req_key in required_args: + if req_key not in kwargs: + logger.error("Tester lacks argument {}".format(req_key)) + raise ValueError("Tester lacks argument {}".format(req_key)) + + for key in default_args: + if key in kwargs: + if isinstance(kwargs[key], type(default_args[key])): + default_args[key] = kwargs[key] + else: + msg = "Argument %s type mismatch: expected %s while get %s" % ( + key, type(default_args[key]), type(kwargs[key])) + logger.error(msg) + raise ValueError(msg) + else: + # BeseTester doesn't care about extra arguments + pass + print(default_args) + + self.save_output = default_args["save_output"] + self.save_best_dev = default_args["save_best_dev"] + self.save_loss = default_args["save_loss"] + self.batch_size = default_args["batch_size"] + self.pickle_path = default_args["pickle_path"] + self.use_cuda = default_args["use_cuda"] + self.print_every_step = default_args["print_every_step"] + + self._model = None self.eval_history = [] self.batch_output = [] def test(self, network, dev_data): if torch.cuda.is_available() and self.use_cuda: - self.model = network.cuda() + self._model = network.cuda() else: - self.model = network + self._model = network # turn on the testing mode; clean up the history self.mode(network, test=True) self.eval_history.clear() self.batch_output.clear() - # dev_data = self.prepare_input(self.pickle_path) - # logger.info("validation data loaded") - iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True)) - n_batches = len(dev_data) // self.batch_size - print_every_step = 1 step = 0 for batch_x, batch_y in self.make_batch(iterator, dev_data): @@ -65,21 +98,10 @@ class BaseTester(object): print_output = "[test step {}] {}".format(step, eval_results) logger.info(print_output) - if step % print_every_step == 0: + if step % self.print_every_step == 0: print(print_output) step += 1 - def prepare_input(self, data_path): - """Save the dev data once it is loaded. Can return directly next time. - - :param data_path: str, the path to the pickle data for dev - :return save_dev_data: list. Each entry is a sample, which is also a list of features and label(s). - """ - if self.save_dev_data is None: - data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb")) - self.save_dev_data = data_dev - return self.save_dev_data - def mode(self, model, test): """Train mode or Test mode. This is for PyTorch currently. @@ -117,15 +139,14 @@ class SeqLabelTester(BaseTester): Tester for sequence labeling. """ - def __init__(self, test_args): + def __init__(self, **test_args): """ :param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]" """ - super(SeqLabelTester, self).__init__(test_args) + super(SeqLabelTester, self).__init__(**test_args) self.max_len = None self.mask = None self.seq_len = None - self.batch_result = None def data_forward(self, network, inputs): """This is only for sequence labeling with CRF decoder. @@ -159,10 +180,10 @@ class SeqLabelTester(BaseTester): :return: """ batch_size, max_len = predict.size(0), predict.size(1) - loss = self.model.loss(predict, truth, self.mask) / batch_size + loss = self._model.loss(predict, truth, self.mask) / batch_size - prediction = self.model.prediction(predict, self.mask) - results = torch.Tensor(prediction).view(-1,) + prediction = self._model.prediction(predict, self.mask) + results = torch.Tensor(prediction).view(-1, ) # make sure "results" is in the same device as "truth" results = results.to(truth) accuracy = torch.sum(results == truth.view((-1,))).to(torch.float) / results.shape[0] @@ -184,21 +205,16 @@ class SeqLabelTester(BaseTester): def make_batch(self, iterator, data): return Action.make_batch(iterator, use_cuda=self.use_cuda, output_length=True) + class ClassificationTester(BaseTester): """Tester for classification.""" - def __init__(self, test_args): + def __init__(self, **test_args): """ :param test_args: a dict-like object that has __getitem__ method, \ can be accessed by "test_args["key_str"]" """ - super(ClassificationTester, self).__init__(test_args) - self.pickle_path = test_args["pickle_path"] - - self.save_dev_data = None - self.output = None - self.mean_loss = None - self.iterator = None + super(ClassificationTester, self).__init__(**test_args) def make_batch(self, iterator, data, max_len=None): return Action.make_batch(iterator, use_cuda=self.use_cuda, max_len=max_len) @@ -221,4 +237,3 @@ class ClassificationTester(BaseTester): y_true = torch.cat(y_true, dim=0) acc = float(torch.sum(y_pred == y_true)) / len(y_true) return y_true.cpu().numpy(), y_prob.cpu().numpy(), acc - diff --git a/fastNLP/core/trainer.py b/fastNLP/core/trainer.py index 4b3e5de1..52872d1d 100644 --- a/fastNLP/core/trainer.py +++ b/fastNLP/core/trainer.py @@ -6,10 +6,11 @@ from datetime import timedelta import numpy as np import torch -import torch.nn as nn from fastNLP.core.action import Action from fastNLP.core.action import RandomSampler, Batchifier +from fastNLP.core.loss import Loss +from fastNLP.core.optimizer import Optimizer from fastNLP.core.tester import SeqLabelTester, ClassificationTester from fastNLP.modules import utils from fastNLP.saver.logger import create_logger @@ -23,14 +24,13 @@ class BaseTrainer(object): """Operations to train a model, including data loading, SGD, and validation. Subclasses must implement the following abstract methods: - - define_optimizer - grad_backward - get_loss """ - def __init__(self, train_args): + def __init__(self, **kwargs): """ - :param train_args: dict of (key, value), or dict-like object. key is str. + :param kwargs: dict of (key, value), or dict-like object. key is str. The base trainer requires the following keys: - epochs: int, the number of epochs in training @@ -39,19 +39,58 @@ class BaseTrainer(object): - pickle_path: str, the path to pickle files for pre-processing """ super(BaseTrainer, self).__init__() - self.n_epochs = train_args["epochs"] - self.batch_size = train_args["batch_size"] - self.pickle_path = train_args["pickle_path"] - self.validate = train_args["validate"] - self.save_best_dev = train_args["save_best_dev"] - self.model_saved_path = train_args["model_saved_path"] - self.use_cuda = train_args["use_cuda"] - - self.model = None - self.iterator = None - self.loss_func = None - self.optimizer = None + """ + "default_args" provides default value for important settings. + The initialization arguments "kwargs" with the same key (name) will override the default value. + "kwargs" must have the same type as "default_args" on corresponding keys. + Otherwise, error will raise. + """ + default_args = {"epochs": 3, "batch_size": 8, "validate": True, "use_cuda": True, "pickle_path": "./save/", + "save_best_dev": True, "model_name": "default_model_name.pkl", + "loss": Loss(None), + "optimizer": Optimizer("Adam", lr=0.001, weight_decay=0) + } + """ + "required_args" is the collection of arguments that users must pass to Trainer explicitly. + This is used to warn users of essential settings in the training. + Obviously, "required_args" is the subset of "default_args". + The value in "default_args" to the keys in "required_args" is simply for type check. + """ + # TODO: required arguments + required_args = {} + + for req_key in required_args: + if req_key not in kwargs: + logger.error("Trainer lacks argument {}".format(req_key)) + raise ValueError("Trainer lacks argument {}".format(req_key)) + + for key in default_args: + if key in kwargs: + if isinstance(kwargs[key], type(default_args[key])): + default_args[key] = kwargs[key] + else: + msg = "Argument %s type mismatch: expected %s while get %s" % ( + key, type(default_args[key]), type(kwargs[key])) + logger.error(msg) + raise ValueError(msg) + else: + # BaseTrainer doesn't care about extra arguments + pass + print(default_args) + + self.n_epochs = default_args["epochs"] + self.batch_size = default_args["batch_size"] + self.pickle_path = default_args["pickle_path"] + self.validate = default_args["validate"] + self.save_best_dev = default_args["save_best_dev"] + self.use_cuda = default_args["use_cuda"] + self.model_name = default_args["model_name"] + + self._model = None + self._loss_func = default_args["loss"].get() # return a pytorch loss function or None + self._optimizer = None + self._optimizer_proto = default_args["optimizer"] def train(self, network, train_data, dev_data=None): """General Training Steps @@ -72,12 +111,9 @@ class BaseTrainer(object): """ # prepare model and data, transfer model to gpu if available if torch.cuda.is_available() and self.use_cuda: - self.model = network.cuda() + self._model = network.cuda() else: - self.model = network - - # train_data = self.load_train_data(self.pickle_path) - # logger.info("training data loaded") + self._model = network # define tester over dev data if self.validate: @@ -88,7 +124,9 @@ class BaseTrainer(object): logger.info("validator defined as {}".format(str(validator))) self.define_optimizer() - logger.info("optimizer defined as {}".format(str(self.optimizer))) + logger.info("optimizer defined as {}".format(str(self._optimizer))) + self.define_loss() + logger.info("loss function defined as {}".format(str(self._loss_func))) # main training epochs n_samples = len(train_data) @@ -113,7 +151,7 @@ class BaseTrainer(object): validator.test(network, dev_data) if self.save_best_dev and self.best_eval_result(validator): - self.save_model(network) + self.save_model(network, self.model_name) print("saved better model selected by dev") logger.info("saved better model selected by dev") @@ -153,6 +191,11 @@ class BaseTrainer(object): logger.error("the number of folds in train and dev data unequals {}!={}".format(len(train_data_cv), len(dev_data_cv))) raise RuntimeError("the number of folds in train and dev data unequals") + if self.validate is False: + logger.warn("Cross validation requires self.validate to be True. Please turn it on. ") + print("[warning] Cross validation requires self.validate to be True. Please turn it on. ") + self.validate = True + n_fold = len(train_data_cv) logger.info("perform {} folds cross validation.".format(n_fold)) for i in range(n_fold): @@ -186,7 +229,7 @@ class BaseTrainer(object): """ Define framework-specific optimizer specified by the models. """ - raise NotImplementedError + self._optimizer = self._optimizer_proto.construct_from_pytorch(self._model.parameters()) def update(self): """ @@ -194,7 +237,7 @@ class BaseTrainer(object): For PyTorch, just call optimizer to update. """ - raise NotImplementedError + self._optimizer.step() def data_forward(self, network, x): raise NotImplementedError @@ -206,7 +249,8 @@ class BaseTrainer(object): For PyTorch, just do "loss.backward()" """ - raise NotImplementedError + self._model.zero_grad() + loss.backward() def get_loss(self, predict, truth): """ @@ -215,21 +259,25 @@ class BaseTrainer(object): :param truth: ground truth label vector :return: a scalar """ - if self.loss_func is None: - if hasattr(self.model, "loss"): - self.loss_func = self.model.loss - logger.info("The model has a loss function, use it.") - else: - logger.info("The model didn't define loss, use Trainer's loss.") - self.define_loss() - return self.loss_func(predict, truth) + return self._loss_func(predict, truth) def define_loss(self): """ - Assign an instance of loss function to self.loss_func - E.g. self.loss_func = nn.CrossEntropyLoss() + if the model defines a loss, use model's loss. + Otherwise, Trainer must has a loss argument, use it as loss. + These two losses cannot be defined at the same time. + Trainer does not handle loss definition or choose default losses. """ - raise NotImplementedError + if hasattr(self._model, "loss") and self._loss_func is not None: + raise ValueError("Both the model and Trainer define loss. Please take out your loss.") + + if hasattr(self._model, "loss"): + self._loss_func = self._model.loss + logger.info("The model has a loss function, use it.") + else: + if self._loss_func is None: + raise ValueError("Please specify a loss function.") + logger.info("The model didn't define loss, use Trainer's loss.") def best_eval_result(self, validator): """ @@ -238,12 +286,15 @@ class BaseTrainer(object): """ raise NotImplementedError - def save_model(self, network): + def save_model(self, network, model_name): """ :param network: the PyTorch model + :param model_name: str model_best_dev.pkl may be overwritten by a better model in future epochs. """ - ModelSaver(self.model_saved_path + "model_best_dev.pkl").save_pytorch(network) + if model_name[-4:] != ".pkl": + model_name += ".pkl" + ModelSaver(self.pickle_path + model_name).save_pytorch(network) def _create_validator(self, valid_args): raise NotImplementedError @@ -266,18 +317,12 @@ class ToyTrainer(BaseTrainer): return network(x) def grad_backward(self, loss): - self.model.zero_grad() + self._model.zero_grad() loss.backward() def get_loss(self, pred, truth): return np.mean(np.square(pred - truth)) - def define_optimizer(self): - self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01) - - def update(self): - self.optimizer.step() - class SeqLabelTrainer(BaseTrainer): """ @@ -285,24 +330,14 @@ class SeqLabelTrainer(BaseTrainer): """ - def __init__(self, train_args): - super(SeqLabelTrainer, self).__init__(train_args) - self.vocab_size = train_args["vocab_size"] - self.num_classes = train_args["num_classes"] + def __init__(self, **kwargs): + super(SeqLabelTrainer, self).__init__(**kwargs) + # self.vocab_size = kwargs["vocab_size"] + # self.num_classes = kwargs["num_classes"] self.max_len = None self.mask = None self.best_accuracy = 0.0 - def define_optimizer(self): - self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9) - - def grad_backward(self, loss): - self.model.zero_grad() - loss.backward() - - def update(self): - self.optimizer.step() - def data_forward(self, network, inputs): if not isinstance(inputs, tuple): raise RuntimeError("output_length must be true for sequence modeling. Receive {}".format(type(inputs[0]))) @@ -330,7 +365,7 @@ class SeqLabelTrainer(BaseTrainer): batch_size, max_len = predict.size(0), predict.size(1) assert truth.shape == (batch_size, max_len) - loss = self.model.loss(predict, truth, self.mask) + loss = self._model.loss(predict, truth, self.mask) return loss def best_eval_result(self, validator): @@ -345,48 +380,25 @@ class SeqLabelTrainer(BaseTrainer): return Action.make_batch(iterator, output_length=True, use_cuda=self.use_cuda) def _create_validator(self, valid_args): - return SeqLabelTester(valid_args) + return SeqLabelTester(**valid_args) class ClassificationTrainer(BaseTrainer): """Trainer for classification.""" - def __init__(self, train_args): - super(ClassificationTrainer, self).__init__(train_args) - self.learn_rate = train_args["learn_rate"] - self.momentum = train_args["momentum"] + def __init__(self, **train_args): + super(ClassificationTrainer, self).__init__(**train_args) self.iterator = None self.loss_func = None self.optimizer = None self.best_accuracy = 0 - def define_loss(self): - self.loss_func = nn.CrossEntropyLoss() - - def define_optimizer(self): - """ - Define framework-specific optimizer specified by the models. - """ - self.optimizer = torch.optim.SGD( - self.model.parameters(), - lr=self.learn_rate, - momentum=self.momentum) - def data_forward(self, network, x): """Forward through network.""" logits = network(x) return logits - def grad_backward(self, loss): - """Compute gradient backward.""" - self.model.zero_grad() - loss.backward() - - def update(self): - """Apply gradient.""" - self.optimizer.step() - def make_batch(self, iterator): return Action.make_batch(iterator, output_length=False, use_cuda=self.use_cuda) @@ -404,4 +416,4 @@ class ClassificationTrainer(BaseTrainer): return False def _create_validator(self, valid_args): - return ClassificationTester(valid_args) + return ClassificationTester(**valid_args) diff --git a/fastNLP/loader/config_loader.py b/fastNLP/loader/config_loader.py index 079755e2..9e3ebc1c 100644 --- a/fastNLP/loader/config_loader.py +++ b/fastNLP/loader/config_loader.py @@ -94,6 +94,10 @@ class ConfigSection(object): def __contains__(self, item): return item in self.__dict__.keys() + @property + def data(self): + return self.__dict__ + if __name__ == "__main__": config = ConfigLoader('configLoader', 'there is no data') diff --git a/reproduction/LSTM+self_attention_sentiment_analysis/example.py b/reproduction/LSTM+self_attention_sentiment_analysis/example.py index 74af3c2f..5270d673 100644 --- a/reproduction/LSTM+self_attention_sentiment_analysis/example.py +++ b/reproduction/LSTM+self_attention_sentiment_analysis/example.py @@ -18,7 +18,6 @@ MLP_HIDDEN = 2000 CLASSES_NUM = 5 from fastNLP.models.base_model import BaseModel -from fastNLP.core.trainer import BaseTrainer class MyNet(BaseModel): @@ -60,18 +59,6 @@ class Net(nn.Module): return x, penalty -class MyTrainer(BaseTrainer): - def __init__(self, args): - super(MyTrainer, self).__init__(args) - self.optimizer = None - - def define_optimizer(self): - self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9) - - def define_loss(self): - self.loss_func = nn.CrossEntropyLoss() - - def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10): """ diff --git a/test/data_for_tests/config b/test/data_for_tests/config index 2ffdcf3b..3f4ff7af 100644 --- a/test/data_for_tests/config +++ b/test/data_for_tests/config @@ -1,65 +1,11 @@ -[General] -revision = "first" -datapath = "./data/smallset/imdb/" -embed_path = "./data/smallset/imdb/embedding.txt" -optimizer = "adam" -attn_mode = "rout" -seq_encoder = "bilstm" -out_caps_num = 5 -rout_iter = 3 -max_snt_num = 40 -max_wd_num = 40 -max_epochs = 50 -pre_trained = true -batch_sz = 32 -batch_sz_min = 32 -bucket_sz = 5000 -partial_update_until_epoch = 2 -embed_size = 300 -hidden_size = 200 -dense_hidden = [300, 10] -lr = 0.0002 -decay_steps = 1000 -decay_rate = 0.9 -dropout = 0.2 -early_stopping = 7 -reg = 1e-06 - -[My] -datapath = "./data/smallset/imdb/" -embed_path = "./data/smallset/imdb/embedding.txt" -optimizer = "adam" -attn_mode = "rout" -seq_encoder = "bilstm" -out_caps_num = 5 -rout_iter = 3 -max_snt_num = 40 -max_wd_num = 40 -max_epochs = 50 -pre_trained = true -batch_sz = 32 -batch_sz_min = 32 -bucket_sz = 5000 -partial_update_until_epoch = 2 -embed_size = 300 -hidden_size = 200 -dense_hidden = [300, 10] -lr = 0.0002 -decay_steps = 1000 -decay_rate = 0.9 -dropout = 0.2 -early_stopping = 70 -reg = 1e-05 -test = 5 -new_attr = 40 - -[POS] +[test_seq_label_trainer] epochs = 1 batch_size = 32 -pickle_path = "./data_for_tests/" validate = true save_best_dev = true -model_saved_path = "./" +use_cuda = true + +[test_seq_label_model] rnn_hidden_units = 100 rnn_layers = 1 rnn_bi_direction = true @@ -68,13 +14,12 @@ dropout = 0.5 use_crf = true use_cuda = true -[POS_test] +[test_seq_label_tester] save_output = true validate_in_training = true save_dev_input = false save_loss = true batch_size = 1 -pickle_path = "./data_for_tests/" rnn_hidden_units = 100 rnn_layers = 1 rnn_bi_direction = true @@ -84,7 +29,6 @@ use_crf = true use_cuda = true [POS_infer] -pickle_path = "./data_for_tests/" rnn_hidden_units = 100 rnn_layers = 1 rnn_bi_direction = true @@ -95,14 +39,9 @@ num_classes = 27 [text_class] epochs = 1 batch_size = 10 -pickle_path = "./save_path/" validate = false save_best_dev = false -model_saved_path = "./save_path/" use_cuda = true learn_rate = 1e-3 momentum = 0.9 - -[text_class_model] -vocab_size = 867 -num_classes = 18 \ No newline at end of file +model_name = "class_model.pkl" diff --git a/test/ner.py b/test/ner.py index accf92c2..150bd8c7 100644 --- a/test/ner.py +++ b/test/ner.py @@ -20,7 +20,7 @@ class MyNERTrainer(SeqLabelTrainer): override :return: """ - self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001) + self.optimizer = torch.optim.Adam(self._model.parameters(), lr=0.001) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=3000, gamma=0.5) def update(self): diff --git a/test/seq_labeling.py b/test/seq_labeling.py index b4007092..4cf5e86f 100644 --- a/test/seq_labeling.py +++ b/test/seq_labeling.py @@ -1,7 +1,7 @@ +import os import sys - sys.path.append("..") - +import argparse from fastNLP.loader.config_loader import ConfigLoader, ConfigSection from fastNLP.core.trainer import SeqLabelTrainer from fastNLP.loader.dataset_loader import POSDatasetLoader, BaseLoader @@ -11,17 +11,29 @@ from fastNLP.loader.model_loader import ModelLoader from fastNLP.core.tester import SeqLabelTester from fastNLP.models.sequence_modeling import SeqLabeling from fastNLP.core.predictor import SeqLabelInfer +from fastNLP.core.optimizer import Optimizer + +parser = argparse.ArgumentParser() +parser.add_argument("-s", "--save", type=str, default="./seq_label/", help="path to save pickle files") +parser.add_argument("-t", "--train", type=str, default="./data_for_tests/people.txt", + help="path to the training data") +parser.add_argument("-c", "--config", type=str, default="./data_for_tests/config", help="path to the config file") +parser.add_argument("-m", "--model_name", type=str, default="seq_label_model.pkl", help="the name of the model") +parser.add_argument("-i", "--infer", type=str, default="data_for_tests/people_infer.txt", + help="data used for inference") -data_name = "people.txt" -data_path = "data_for_tests/people.txt" -pickle_path = "seq_label/" -data_infer_path = "data_for_tests/people_infer.txt" +args = parser.parse_args() +pickle_path = args.save +model_name = args.model_name +config_dir = args.config +data_path = args.train +data_infer_path = args.infer def infer(): # Load infer configuration, the same as test test_args = ConfigSection() - ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args}) + ConfigLoader("config.cfg", "").load_config(config_dir, {"POS_infer": test_args}) # fetch dictionary size and number of labels from pickle files word2index = load_pickle(pickle_path, "word2id.pkl") @@ -33,11 +45,11 @@ def infer(): model = SeqLabeling(test_args) # Dump trained parameters into the model - ModelLoader.load_pytorch(model, pickle_path + "saved_model.pkl") + ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name)) print("model loaded!") # Data Loader - raw_data_loader = BaseLoader(data_name, data_infer_path) + raw_data_loader = BaseLoader("xxx", data_infer_path) infer_data = raw_data_loader.load_lines() # Inference interface @@ -51,49 +63,72 @@ def infer(): def train_and_test(): # Config Loader - train_args = ConfigSection() - ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args}) + trainer_args = ConfigSection() + model_args = ConfigSection() + ConfigLoader("config.cfg", "").load_config(config_dir, { + "test_seq_label_trainer": trainer_args, "test_seq_label_model": model_args}) # Data Loader - pos_loader = POSDatasetLoader(data_name, data_path) + pos_loader = POSDatasetLoader("xxx", data_path) train_data = pos_loader.load_lines() # Preprocessor p = SeqLabelPreprocess() data_train, data_dev = p.run(train_data, pickle_path=pickle_path, train_dev_split=0.5) - train_args["vocab_size"] = p.vocab_size - train_args["num_classes"] = p.num_classes - - # Trainer - trainer = SeqLabelTrainer(train_args) + model_args["vocab_size"] = p.vocab_size + model_args["num_classes"] = p.num_classes + + # Trainer: two definition styles + # 1 + # trainer = SeqLabelTrainer(trainer_args.data) + + # 2 + trainer = SeqLabelTrainer( + epochs=trainer_args["epochs"], + batch_size=trainer_args["batch_size"], + validate=trainer_args["validate"], + use_cuda=trainer_args["use_cuda"], + pickle_path=pickle_path, + save_best_dev=trainer_args["save_best_dev"], + model_name=model_name, + optimizer=Optimizer("SGD", lr=0.01, momentum=0.9), + ) # Model - model = SeqLabeling(train_args) + model = SeqLabeling(model_args) # Start training trainer.train(model, data_train, data_dev) print("Training finished!") # Saver - saver = ModelSaver(pickle_path + "saved_model.pkl") + saver = ModelSaver(os.path.join(pickle_path, model_name)) saver.save_pytorch(model) print("Model saved!") del model, trainer, pos_loader # Define the same model - model = SeqLabeling(train_args) + model = SeqLabeling(model_args) # Dump trained parameters into the model - ModelLoader.load_pytorch(model, pickle_path + "saved_model.pkl") + ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name)) print("model loaded!") # Load test configuration - test_args = ConfigSection() - ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args}) + tester_args = ConfigSection() + ConfigLoader("config.cfg", "").load_config(config_dir, {"test_seq_label_tester": tester_args}) # Tester - tester = SeqLabelTester(test_args) + tester = SeqLabelTester(save_output=False, + save_loss=False, + save_best_dev=False, + batch_size=8, + use_cuda=False, + pickle_path=pickle_path, + model_name="seq_label_in_test.pkl", + print_every_step=1 + ) # Start testing with validation data tester.test(model, data_dev) @@ -105,4 +140,4 @@ def train_and_test(): if __name__ == "__main__": train_and_test() - # infer() + infer() diff --git a/test/text_classify.py b/test/text_classify.py index c452e86c..64294d37 100644 --- a/test/text_classify.py +++ b/test/text_classify.py @@ -1,6 +1,7 @@ # Python: 3.5 # encoding: utf-8 +import argparse import os import sys @@ -13,75 +14,105 @@ from fastNLP.loader.model_loader import ModelLoader from fastNLP.core.preprocess import ClassPreprocess from fastNLP.models.cnn_text_classification import CNNText from fastNLP.saver.model_saver import ModelSaver +from fastNLP.core.optimizer import Optimizer +from fastNLP.core.loss import Loss -save_path = "./test_classification/" -data_dir = "./data_for_tests/" -train_file = 'text_classify.txt' -model_name = "model_class.pkl" +parser = argparse.ArgumentParser() +parser.add_argument("-s", "--save", type=str, default="./test_classification/", help="path to save pickle files") +parser.add_argument("-t", "--train", type=str, default="./data_for_tests/text_classify.txt", + help="path to the training data") +parser.add_argument("-c", "--config", type=str, default="./data_for_tests/config", help="path to the config file") +parser.add_argument("-m", "--model_name", type=str, default="classify_model.pkl", help="the name of the model") + +args = parser.parse_args() +save_dir = args.save +train_data_dir = args.train +model_name = args.model_name +config_dir = args.config def infer(): # load dataset print("Loading data...") - ds_loader = ClassDatasetLoader("train", os.path.join(data_dir, train_file)) + ds_loader = ClassDatasetLoader("train", train_data_dir) data = ds_loader.load() unlabeled_data = [x[0] for x in data] # pre-process data pre = ClassPreprocess() - vocab_size, n_classes = pre.run(data, pickle_path=save_path) - print("vocabulary size:", vocab_size) - print("number of classes:", n_classes) + data = pre.run(data, pickle_path=save_dir) + print("vocabulary size:", pre.vocab_size) + print("number of classes:", pre.num_classes) model_args = ConfigSection() - ConfigLoader.load_config("data_for_tests/config", {"text_class_model": model_args}) + # TODO: load from config file + model_args["vocab_size"] = pre.vocab_size + model_args["num_classes"] = pre.num_classes + # ConfigLoader.load_config(config_dir, {"text_class_model": model_args}) # construct model print("Building model...") cnn = CNNText(model_args) # Dump trained parameters into the model - ModelLoader.load_pytorch(cnn, "./data_for_tests/saved_model.pkl") + ModelLoader.load_pytorch(cnn, os.path.join(save_dir, model_name)) print("model loaded!") - infer = ClassificationInfer(data_dir) + infer = ClassificationInfer(pickle_path=save_dir) results = infer.predict(cnn, unlabeled_data) print(results) def train(): train_args, model_args = ConfigSection(), ConfigSection() - ConfigLoader.load_config("data_for_tests/config", {"text_class": train_args, "text_class_model": model_args}) + ConfigLoader.load_config(config_dir, {"text_class": train_args}) # load dataset print("Loading data...") - ds_loader = ClassDatasetLoader("train", os.path.join(data_dir, train_file)) + ds_loader = ClassDatasetLoader("train", train_data_dir) data = ds_loader.load() print(data[0]) # pre-process data pre = ClassPreprocess() - data_train = pre.run(data, pickle_path=save_path) + data_train = pre.run(data, pickle_path=save_dir) print("vocabulary size:", pre.vocab_size) print("number of classes:", pre.num_classes) + model_args["num_classes"] = pre.num_classes + model_args["vocab_size"] = pre.vocab_size + # construct model print("Building model...") model = CNNText(model_args) + # ConfigSaver().save_config(config_dir, {"text_class_model": model_args}) + # train print("Training...") - trainer = ClassificationTrainer(train_args) + # 1 + # trainer = ClassificationTrainer(train_args) + + # 2 + trainer = ClassificationTrainer(epochs=train_args["epochs"], + batch_size=train_args["batch_size"], + validate=train_args["validate"], + use_cuda=train_args["use_cuda"], + pickle_path=save_dir, + save_best_dev=train_args["save_best_dev"], + model_name=model_name, + loss=Loss("cross_entropy"), + optimizer=Optimizer("SGD", lr=0.001, momentum=0.9)) trainer.train(model, data_train) print("Training finished!") - saver = ModelSaver("./data_for_tests/saved_model.pkl") + saver = ModelSaver(os.path.join(save_dir, model_name)) saver.save_pytorch(model) print("Model saved!") if __name__ == "__main__": train() - # infer() + infer()