@@ -2,6 +2,9 @@ | |||||
[](https://travis-ci.org/fastnlp/fastNLP) | [](https://travis-ci.org/fastnlp/fastNLP) | ||||
[](https://codecov.io/gh/fastnlp/fastNLP) | [](https://codecov.io/gh/fastnlp/fastNLP) | ||||
[](https://badge.fury.io/py/fastNLP) | |||||
 | |||||
[](http://fastnlp.readthedocs.io/?badge=latest) | |||||
fastNLP is a modular Natural Language Processing system based on PyTorch, for fast development of NLP tools. It divides the NLP model based on deep learning into different modules. These modules fall into 4 categories: encoder, interaction, aggregation and decoder, while each category contains different implemented modules. Encoder modules encode the input into some abstract representation, interaction modules make the information in the representation interact with each other, aggregation modules aggregate and reduce information, and decoder modules decode the representation into the output. Most current NLP models could be built on these modules, which vastly simplifies the process of developing NLP models. The architecture of fastNLP is as the figure below: | fastNLP is a modular Natural Language Processing system based on PyTorch, for fast development of NLP tools. It divides the NLP model based on deep learning into different modules. These modules fall into 4 categories: encoder, interaction, aggregation and decoder, while each category contains different implemented modules. Encoder modules encode the input into some abstract representation, interaction modules make the information in the representation interact with each other, aggregation modules aggregate and reduce information, and decoder modules decode the representation into the output. Most current NLP models could be built on these modules, which vastly simplifies the process of developing NLP models. The architecture of fastNLP is as the figure below: | ||||
@@ -30,6 +33,7 @@ A typical fastNLP routine is composed of four phases: loading dataset, pre-proce | |||||
from fastNLP.models.base_model import BaseModel | from fastNLP.models.base_model import BaseModel | ||||
from fastNLP.modules import encoder | from fastNLP.modules import encoder | ||||
from fastNLP.modules import aggregation | from fastNLP.modules import aggregation | ||||
from fastNLP.modules import decoder | |||||
from fastNLP.loader.dataset_loader import ClassDatasetLoader | from fastNLP.loader.dataset_loader import ClassDatasetLoader | ||||
from fastNLP.loader.preprocess import ClassPreprocess | from fastNLP.loader.preprocess import ClassPreprocess | ||||
@@ -42,20 +46,20 @@ class ClassificationModel(BaseModel): | |||||
Simple text classification model based on CNN. | Simple text classification model based on CNN. | ||||
""" | """ | ||||
def __init__(self, class_num, vocab_size): | |||||
def __init__(self, num_classes, vocab_size): | |||||
super(ClassificationModel, self).__init__() | super(ClassificationModel, self).__init__() | ||||
self.embed = encoder.Embedding(nums=vocab_size, dims=300) | |||||
self.conv = encoder.Conv( | |||||
self.emb = encoder.Embedding(nums=vocab_size, dims=300) | |||||
self.enc = encoder.Conv( | |||||
in_channels=300, out_channels=100, kernel_size=3) | in_channels=300, out_channels=100, kernel_size=3) | ||||
self.pool = aggregation.MaxPool() | |||||
self.output = encoder.Linear(input_size=100, output_size=class_num) | |||||
self.agg = aggregation.MaxPool() | |||||
self.dec = decoder.MLP(100, num_classes=num_classes) | |||||
def forward(self, x): | def forward(self, x): | ||||
x = self.embed(x) # [N,L] -> [N,L,C] | |||||
x = self.conv(x) # [N,L,C_in] -> [N,L,C_out] | |||||
x = self.pool(x) # [N,L,C] -> [N,C] | |||||
x = self.output(x) # [N,C] -> [N, N_class] | |||||
x = self.emb(x) # [N,L] -> [N,L,C] | |||||
x = self.enc(x) # [N,L,C_in] -> [N,L,C_out] | |||||
x = self.agg(x) # [N,L,C] -> [N,C] | |||||
x = self.dec(x) # [N,C] -> [N, N_class] | |||||
return x | return x | ||||
@@ -75,7 +79,7 @@ model_args = { | |||||
'num_classes': n_classes, | 'num_classes': n_classes, | ||||
'vocab_size': vocab_size | 'vocab_size': vocab_size | ||||
} | } | ||||
model = ClassificationModel(class_num=n_classes, vocab_size=vocab_size) | |||||
model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size) | |||||
# train model | # train model | ||||
train_args = { | train_args = { | ||||
@@ -0,0 +1,27 @@ | |||||
import torch | |||||
class Loss(object): | |||||
"""Loss function of the algorithm, | |||||
either the wrapper of a loss function from framework, or a user-defined loss (need pytorch auto_grad support) | |||||
""" | |||||
def __init__(self, args): | |||||
if args is None: | |||||
# this is useful when | |||||
self._loss = None | |||||
elif isinstance(args, str): | |||||
self._loss = self._borrow_from_pytorch(args) | |||||
else: | |||||
raise NotImplementedError | |||||
def get(self): | |||||
return self._loss | |||||
@staticmethod | |||||
def _borrow_from_pytorch(loss_name): | |||||
if loss_name == "cross_entropy": | |||||
return torch.nn.CrossEntropyLoss() | |||||
else: | |||||
raise NotImplementedError |
@@ -1,3 +1,54 @@ | |||||
""" | |||||
use optimizer from Pytorch | |||||
""" | |||||
import torch | |||||
class Optimizer(object): | |||||
"""Wrapper of optimizer from framework | |||||
names: arguments (type) | |||||
1. Adam: lr (float), weight_decay (float) | |||||
2. AdaGrad | |||||
3. RMSProp | |||||
4. SGD: lr (float), momentum (float) | |||||
""" | |||||
def __init__(self, optimizer_name, **kwargs): | |||||
""" | |||||
:param optimizer_name: str, the name of the optimizer | |||||
:param kwargs: the arguments | |||||
""" | |||||
self.optim_name = optimizer_name | |||||
self.kwargs = kwargs | |||||
@property | |||||
def name(self): | |||||
return self.optim_name | |||||
@property | |||||
def params(self): | |||||
return self.kwargs | |||||
def construct_from_pytorch(self, model_params): | |||||
"""construct a optimizer from framework over given model parameters""" | |||||
if self.optim_name in ["SGD", "sgd"]: | |||||
if "lr" in self.kwargs: | |||||
if "momentum" not in self.kwargs: | |||||
self.kwargs["momentum"] = 0 | |||||
optimizer = torch.optim.SGD(model_params, lr=self.kwargs["lr"], momentum=self.kwargs["momentum"]) | |||||
else: | |||||
raise ValueError("requires learning rate for SGD optimizer") | |||||
elif self.optim_name in ["adam", "Adam"]: | |||||
if "lr" in self.kwargs: | |||||
if "weight_decay" not in self.kwargs: | |||||
self.kwargs["weight_decay"] = 0 | |||||
optimizer = torch.optim.Adam(model_params, lr=self.kwargs["lr"], | |||||
weight_decay=self.kwargs["weight_decay"]) | |||||
else: | |||||
raise ValueError("requires learning rate for Adam optimizer") | |||||
else: | |||||
raise NotImplementedError | |||||
return optimizer |
@@ -19,13 +19,13 @@ DEFAULT_WORD_TO_INDEX = {DEFAULT_PADDING_LABEL: 0, DEFAULT_UNKNOWN_LABEL: 1, | |||||
def save_pickle(obj, pickle_path, file_name): | def save_pickle(obj, pickle_path, file_name): | ||||
with open(os.path.join(pickle_path, file_name), "wb") as f: | with open(os.path.join(pickle_path, file_name), "wb") as f: | ||||
_pickle.dump(obj, f) | _pickle.dump(obj, f) | ||||
print("{} saved. ".format(file_name)) | |||||
print("{} saved in {}".format(file_name, pickle_path)) | |||||
def load_pickle(pickle_path, file_name): | def load_pickle(pickle_path, file_name): | ||||
with open(os.path.join(pickle_path, file_name), "rb") as f: | with open(os.path.join(pickle_path, file_name), "rb") as f: | ||||
obj = _pickle.load(f) | obj = _pickle.load(f) | ||||
print("{} loaded. ".format(file_name)) | |||||
print("{} loaded from {}".format(file_name, pickle_path)) | |||||
return obj | return obj | ||||
@@ -59,7 +59,6 @@ class BasePreprocess(object): | |||||
def run(self, train_dev_data, test_data=None, pickle_path="./", train_dev_split=0, cross_val=False, n_fold=10): | def run(self, train_dev_data, test_data=None, pickle_path="./", train_dev_split=0, cross_val=False, n_fold=10): | ||||
"""Main preprocessing pipeline. | """Main preprocessing pipeline. | ||||
:param train_dev_data: three-level list, with either single label or multiple labels in a sample. | :param train_dev_data: three-level list, with either single label or multiple labels in a sample. | ||||
:param test_data: three-level list, with either single label or multiple labels in a sample. (optional) | :param test_data: three-level list, with either single label or multiple labels in a sample. (optional) | ||||
:param pickle_path: str, the path to save the pickle files. | :param pickle_path: str, the path to save the pickle files. | ||||
@@ -98,6 +97,8 @@ class BasePreprocess(object): | |||||
save_pickle(data_train, pickle_path, "data_train.pkl") | save_pickle(data_train, pickle_path, "data_train.pkl") | ||||
else: | else: | ||||
data_train = load_pickle(pickle_path, "data_train.pkl") | data_train = load_pickle(pickle_path, "data_train.pkl") | ||||
if pickle_exist(pickle_path, "data_dev.pkl"): | |||||
data_dev = load_pickle(pickle_path, "data_dev.pkl") | |||||
else: | else: | ||||
# cross_val is True | # cross_val is True | ||||
if not pickle_exist(pickle_path, "data_train_0.pkl"): | if not pickle_exist(pickle_path, "data_train_0.pkl"): | ||||
@@ -1,5 +1,3 @@ | |||||
import _pickle | |||||
import numpy as np | import numpy as np | ||||
import torch | import torch | ||||
@@ -14,43 +12,78 @@ logger = create_logger(__name__, "./train_test.log") | |||||
class BaseTester(object): | class BaseTester(object): | ||||
"""An collection of model inference and evaluation of performance, used over validation/dev set and test set. """ | """An collection of model inference and evaluation of performance, used over validation/dev set and test set. """ | ||||
def __init__(self, test_args): | |||||
def __init__(self, **kwargs): | |||||
""" | """ | ||||
:param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]" | |||||
:param kwargs: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]" | |||||
""" | """ | ||||
super(BaseTester, self).__init__() | super(BaseTester, self).__init__() | ||||
self.validate_in_training = test_args["validate_in_training"] | |||||
self.save_dev_data = None | |||||
self.save_output = test_args["save_output"] | |||||
self.output = None | |||||
self.save_loss = test_args["save_loss"] | |||||
self.mean_loss = None | |||||
self.batch_size = test_args["batch_size"] | |||||
self.pickle_path = test_args["pickle_path"] | |||||
self.iterator = None | |||||
self.use_cuda = test_args["use_cuda"] | |||||
self.model = None | |||||
""" | |||||
"default_args" provides default value for important settings. | |||||
The initialization arguments "kwargs" with the same key (name) will override the default value. | |||||
"kwargs" must have the same type as "default_args" on corresponding keys. | |||||
Otherwise, error will raise. | |||||
""" | |||||
default_args = {"save_output": False, # collect outputs of validation set | |||||
"save_loss": False, # collect losses in validation | |||||
"save_best_dev": False, # save best model during validation | |||||
"batch_size": 8, | |||||
"use_cuda": True, | |||||
"pickle_path": "./save/", | |||||
"model_name": "dev_best_model.pkl", | |||||
"print_every_step": 1, | |||||
} | |||||
""" | |||||
"required_args" is the collection of arguments that users must pass to Trainer explicitly. | |||||
This is used to warn users of essential settings in the training. | |||||
Obviously, "required_args" is the subset of "default_args". | |||||
The value in "default_args" to the keys in "required_args" is simply for type check. | |||||
""" | |||||
# TODO: required arguments | |||||
required_args = {} | |||||
for req_key in required_args: | |||||
if req_key not in kwargs: | |||||
logger.error("Tester lacks argument {}".format(req_key)) | |||||
raise ValueError("Tester lacks argument {}".format(req_key)) | |||||
for key in default_args: | |||||
if key in kwargs: | |||||
if isinstance(kwargs[key], type(default_args[key])): | |||||
default_args[key] = kwargs[key] | |||||
else: | |||||
msg = "Argument %s type mismatch: expected %s while get %s" % ( | |||||
key, type(default_args[key]), type(kwargs[key])) | |||||
logger.error(msg) | |||||
raise ValueError(msg) | |||||
else: | |||||
# BeseTester doesn't care about extra arguments | |||||
pass | |||||
print(default_args) | |||||
self.save_output = default_args["save_output"] | |||||
self.save_best_dev = default_args["save_best_dev"] | |||||
self.save_loss = default_args["save_loss"] | |||||
self.batch_size = default_args["batch_size"] | |||||
self.pickle_path = default_args["pickle_path"] | |||||
self.use_cuda = default_args["use_cuda"] | |||||
self.print_every_step = default_args["print_every_step"] | |||||
self._model = None | |||||
self.eval_history = [] | self.eval_history = [] | ||||
self.batch_output = [] | self.batch_output = [] | ||||
def test(self, network, dev_data): | def test(self, network, dev_data): | ||||
if torch.cuda.is_available() and self.use_cuda: | if torch.cuda.is_available() and self.use_cuda: | ||||
self.model = network.cuda() | |||||
self._model = network.cuda() | |||||
else: | else: | ||||
self.model = network | |||||
self._model = network | |||||
# turn on the testing mode; clean up the history | # turn on the testing mode; clean up the history | ||||
self.mode(network, test=True) | self.mode(network, test=True) | ||||
self.eval_history.clear() | self.eval_history.clear() | ||||
self.batch_output.clear() | self.batch_output.clear() | ||||
# dev_data = self.prepare_input(self.pickle_path) | |||||
# logger.info("validation data loaded") | |||||
iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True)) | iterator = iter(Batchifier(RandomSampler(dev_data), self.batch_size, drop_last=True)) | ||||
n_batches = len(dev_data) // self.batch_size | |||||
print_every_step = 1 | |||||
step = 0 | step = 0 | ||||
for batch_x, batch_y in self.make_batch(iterator, dev_data): | for batch_x, batch_y in self.make_batch(iterator, dev_data): | ||||
@@ -65,21 +98,10 @@ class BaseTester(object): | |||||
print_output = "[test step {}] {}".format(step, eval_results) | print_output = "[test step {}] {}".format(step, eval_results) | ||||
logger.info(print_output) | logger.info(print_output) | ||||
if step % print_every_step == 0: | |||||
if self.print_every_step > 0 and step % self.print_every_step == 0: | |||||
print(print_output) | print(print_output) | ||||
step += 1 | step += 1 | ||||
def prepare_input(self, data_path): | |||||
"""Save the dev data once it is loaded. Can return directly next time. | |||||
:param data_path: str, the path to the pickle data for dev | |||||
:return save_dev_data: list. Each entry is a sample, which is also a list of features and label(s). | |||||
""" | |||||
if self.save_dev_data is None: | |||||
data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb")) | |||||
self.save_dev_data = data_dev | |||||
return self.save_dev_data | |||||
def mode(self, model, test): | def mode(self, model, test): | ||||
"""Train mode or Test mode. This is for PyTorch currently. | """Train mode or Test mode. This is for PyTorch currently. | ||||
@@ -117,15 +139,14 @@ class SeqLabelTester(BaseTester): | |||||
Tester for sequence labeling. | Tester for sequence labeling. | ||||
""" | """ | ||||
def __init__(self, test_args): | |||||
def __init__(self, **test_args): | |||||
""" | """ | ||||
:param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]" | :param test_args: a dict-like object that has __getitem__ method, can be accessed by "test_args["key_str"]" | ||||
""" | """ | ||||
super(SeqLabelTester, self).__init__(test_args) | |||||
super(SeqLabelTester, self).__init__(**test_args) | |||||
self.max_len = None | self.max_len = None | ||||
self.mask = None | self.mask = None | ||||
self.seq_len = None | self.seq_len = None | ||||
self.batch_result = None | |||||
def data_forward(self, network, inputs): | def data_forward(self, network, inputs): | ||||
"""This is only for sequence labeling with CRF decoder. | """This is only for sequence labeling with CRF decoder. | ||||
@@ -159,14 +180,14 @@ class SeqLabelTester(BaseTester): | |||||
:return: | :return: | ||||
""" | """ | ||||
batch_size, max_len = predict.size(0), predict.size(1) | batch_size, max_len = predict.size(0), predict.size(1) | ||||
loss = self.model.loss(predict, truth, self.mask) / batch_size | |||||
loss = self._model.loss(predict, truth, self.mask) / batch_size | |||||
prediction = self.model.prediction(predict, self.mask) | |||||
results = torch.Tensor(prediction).view(-1,) | |||||
prediction = self._model.prediction(predict, self.mask) | |||||
results = torch.Tensor(prediction).view(-1, ) | |||||
# make sure "results" is in the same device as "truth" | # make sure "results" is in the same device as "truth" | ||||
results = results.to(truth) | results = results.to(truth) | ||||
accuracy = torch.sum(results == truth.view((-1,))).to(torch.float) / results.shape[0] | accuracy = torch.sum(results == truth.view((-1,))).to(torch.float) / results.shape[0] | ||||
return [loss.data, accuracy.data] | |||||
return [float(loss), float(accuracy)] | |||||
def metrics(self): | def metrics(self): | ||||
batch_loss = np.mean([x[0] for x in self.eval_history]) | batch_loss = np.mean([x[0] for x in self.eval_history]) | ||||
@@ -184,21 +205,16 @@ class SeqLabelTester(BaseTester): | |||||
def make_batch(self, iterator, data): | def make_batch(self, iterator, data): | ||||
return Action.make_batch(iterator, use_cuda=self.use_cuda, output_length=True) | return Action.make_batch(iterator, use_cuda=self.use_cuda, output_length=True) | ||||
class ClassificationTester(BaseTester): | class ClassificationTester(BaseTester): | ||||
"""Tester for classification.""" | """Tester for classification.""" | ||||
def __init__(self, test_args): | |||||
def __init__(self, **test_args): | |||||
""" | """ | ||||
:param test_args: a dict-like object that has __getitem__ method, \ | :param test_args: a dict-like object that has __getitem__ method, \ | ||||
can be accessed by "test_args["key_str"]" | can be accessed by "test_args["key_str"]" | ||||
""" | """ | ||||
super(ClassificationTester, self).__init__(test_args) | |||||
self.pickle_path = test_args["pickle_path"] | |||||
self.save_dev_data = None | |||||
self.output = None | |||||
self.mean_loss = None | |||||
self.iterator = None | |||||
super(ClassificationTester, self).__init__(**test_args) | |||||
def make_batch(self, iterator, data, max_len=None): | def make_batch(self, iterator, data, max_len=None): | ||||
return Action.make_batch(iterator, use_cuda=self.use_cuda, max_len=max_len) | return Action.make_batch(iterator, use_cuda=self.use_cuda, max_len=max_len) | ||||
@@ -221,4 +237,3 @@ class ClassificationTester(BaseTester): | |||||
y_true = torch.cat(y_true, dim=0) | y_true = torch.cat(y_true, dim=0) | ||||
acc = float(torch.sum(y_pred == y_true)) / len(y_true) | acc = float(torch.sum(y_pred == y_true)) / len(y_true) | ||||
return y_true.cpu().numpy(), y_prob.cpu().numpy(), acc | return y_true.cpu().numpy(), y_prob.cpu().numpy(), acc | ||||
@@ -4,12 +4,12 @@ import os | |||||
import time | import time | ||||
from datetime import timedelta | from datetime import timedelta | ||||
import numpy as np | |||||
import torch | import torch | ||||
import torch.nn as nn | |||||
from fastNLP.core.action import Action | from fastNLP.core.action import Action | ||||
from fastNLP.core.action import RandomSampler, Batchifier | from fastNLP.core.action import RandomSampler, Batchifier | ||||
from fastNLP.core.loss import Loss | |||||
from fastNLP.core.optimizer import Optimizer | |||||
from fastNLP.core.tester import SeqLabelTester, ClassificationTester | from fastNLP.core.tester import SeqLabelTester, ClassificationTester | ||||
from fastNLP.modules import utils | from fastNLP.modules import utils | ||||
from fastNLP.saver.logger import create_logger | from fastNLP.saver.logger import create_logger | ||||
@@ -23,14 +23,13 @@ class BaseTrainer(object): | |||||
"""Operations to train a model, including data loading, SGD, and validation. | """Operations to train a model, including data loading, SGD, and validation. | ||||
Subclasses must implement the following abstract methods: | Subclasses must implement the following abstract methods: | ||||
- define_optimizer | |||||
- grad_backward | - grad_backward | ||||
- get_loss | - get_loss | ||||
""" | """ | ||||
def __init__(self, train_args): | |||||
def __init__(self, **kwargs): | |||||
""" | """ | ||||
:param train_args: dict of (key, value), or dict-like object. key is str. | |||||
:param kwargs: dict of (key, value), or dict-like object. key is str. | |||||
The base trainer requires the following keys: | The base trainer requires the following keys: | ||||
- epochs: int, the number of epochs in training | - epochs: int, the number of epochs in training | ||||
@@ -39,64 +38,90 @@ class BaseTrainer(object): | |||||
- pickle_path: str, the path to pickle files for pre-processing | - pickle_path: str, the path to pickle files for pre-processing | ||||
""" | """ | ||||
super(BaseTrainer, self).__init__() | super(BaseTrainer, self).__init__() | ||||
self.n_epochs = train_args["epochs"] | |||||
self.batch_size = train_args["batch_size"] | |||||
self.pickle_path = train_args["pickle_path"] | |||||
self.validate = train_args["validate"] | |||||
self.save_best_dev = train_args["save_best_dev"] | |||||
self.model_saved_path = train_args["model_saved_path"] | |||||
self.use_cuda = train_args["use_cuda"] | |||||
self.model = None | |||||
self.iterator = None | |||||
self.loss_func = None | |||||
self.optimizer = None | |||||
""" | |||||
"default_args" provides default value for important settings. | |||||
The initialization arguments "kwargs" with the same key (name) will override the default value. | |||||
"kwargs" must have the same type as "default_args" on corresponding keys. | |||||
Otherwise, error will raise. | |||||
""" | |||||
default_args = {"epochs": 3, "batch_size": 8, "validate": True, "use_cuda": True, "pickle_path": "./save/", | |||||
"save_best_dev": True, "model_name": "default_model_name.pkl", "print_every_step": 1, | |||||
"loss": Loss(None), | |||||
"optimizer": Optimizer("Adam", lr=0.001, weight_decay=0) | |||||
} | |||||
""" | |||||
"required_args" is the collection of arguments that users must pass to Trainer explicitly. | |||||
This is used to warn users of essential settings in the training. | |||||
Obviously, "required_args" is the subset of "default_args". | |||||
The value in "default_args" to the keys in "required_args" is simply for type check. | |||||
""" | |||||
# TODO: required arguments | |||||
required_args = {} | |||||
for req_key in required_args: | |||||
if req_key not in kwargs: | |||||
logger.error("Trainer lacks argument {}".format(req_key)) | |||||
raise ValueError("Trainer lacks argument {}".format(req_key)) | |||||
for key in default_args: | |||||
if key in kwargs: | |||||
if isinstance(kwargs[key], type(default_args[key])): | |||||
default_args[key] = kwargs[key] | |||||
else: | |||||
msg = "Argument %s type mismatch: expected %s while get %s" % ( | |||||
key, type(default_args[key]), type(kwargs[key])) | |||||
logger.error(msg) | |||||
raise ValueError(msg) | |||||
else: | |||||
# BaseTrainer doesn't care about extra arguments | |||||
pass | |||||
print(default_args) | |||||
self.n_epochs = default_args["epochs"] | |||||
self.batch_size = default_args["batch_size"] | |||||
self.pickle_path = default_args["pickle_path"] | |||||
self.validate = default_args["validate"] | |||||
self.save_best_dev = default_args["save_best_dev"] | |||||
self.use_cuda = default_args["use_cuda"] | |||||
self.model_name = default_args["model_name"] | |||||
self.print_every_step = default_args["print_every_step"] | |||||
self._model = None | |||||
self._loss_func = default_args["loss"].get() # return a pytorch loss function or None | |||||
self._optimizer = None | |||||
self._optimizer_proto = default_args["optimizer"] | |||||
def train(self, network, train_data, dev_data=None): | def train(self, network, train_data, dev_data=None): | ||||
"""General Training Steps | |||||
"""General Training Procedure | |||||
:param network: a model | :param network: a model | ||||
:param train_data: three-level list, the training set. | :param train_data: three-level list, the training set. | ||||
:param dev_data: three-level list, the validation data (optional) | :param dev_data: three-level list, the validation data (optional) | ||||
The method is framework independent. | |||||
Work by calling the following methods: | |||||
- prepare_input | |||||
- mode | |||||
- define_optimizer | |||||
- data_forward | |||||
- get_loss | |||||
- grad_backward | |||||
- update | |||||
Subclasses must implement these methods with a specific framework. | |||||
""" | """ | ||||
# prepare model and data, transfer model to gpu if available | |||||
# transfer model to gpu if available | |||||
if torch.cuda.is_available() and self.use_cuda: | if torch.cuda.is_available() and self.use_cuda: | ||||
self.model = network.cuda() | |||||
self._model = network.cuda() | |||||
# self._model is used to access model-specific loss | |||||
else: | else: | ||||
self.model = network | |||||
self._model = network | |||||
# train_data = self.load_train_data(self.pickle_path) | |||||
# logger.info("training data loaded") | |||||
# define tester over dev data | |||||
# define Tester over dev data | |||||
if self.validate: | if self.validate: | ||||
default_valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True, | default_valid_args = {"save_output": True, "validate_in_training": True, "save_dev_input": True, | ||||
"save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path, | "save_loss": True, "batch_size": self.batch_size, "pickle_path": self.pickle_path, | ||||
"use_cuda": self.use_cuda} | |||||
"use_cuda": self.use_cuda, "print_every_step": 0} | |||||
validator = self._create_validator(default_valid_args) | validator = self._create_validator(default_valid_args) | ||||
logger.info("validator defined as {}".format(str(validator))) | logger.info("validator defined as {}".format(str(validator))) | ||||
# optimizer and loss | |||||
self.define_optimizer() | self.define_optimizer() | ||||
logger.info("optimizer defined as {}".format(str(self.optimizer))) | |||||
logger.info("optimizer defined as {}".format(str(self._optimizer))) | |||||
self.define_loss() | |||||
logger.info("loss function defined as {}".format(str(self._loss_func))) | |||||
# main training epochs | |||||
n_samples = len(train_data) | |||||
n_batches = n_samples // self.batch_size | |||||
n_print = 1 | |||||
# main training procedure | |||||
start = time.time() | start = time.time() | ||||
logger.info("training epochs started") | logger.info("training epochs started") | ||||
for epoch in range(1, self.n_epochs + 1): | for epoch in range(1, self.n_epochs + 1): | ||||
logger.info("training epoch {}".format(epoch)) | logger.info("training epoch {}".format(epoch)) | ||||
@@ -106,23 +131,30 @@ class BaseTrainer(object): | |||||
data_iterator = iter(Batchifier(RandomSampler(train_data), self.batch_size, drop_last=False)) | data_iterator = iter(Batchifier(RandomSampler(train_data), self.batch_size, drop_last=False)) | ||||
logger.info("prepared data iterator") | logger.info("prepared data iterator") | ||||
self._train_step(data_iterator, network, start=start, n_print=n_print, epoch=epoch) | |||||
# one forward and backward pass | |||||
self._train_step(data_iterator, network, start=start, n_print=self.print_every_step, epoch=epoch) | |||||
# validation | |||||
if self.validate: | if self.validate: | ||||
logger.info("validation started") | logger.info("validation started") | ||||
validator.test(network, dev_data) | validator.test(network, dev_data) | ||||
if self.save_best_dev and self.best_eval_result(validator): | if self.save_best_dev and self.best_eval_result(validator): | ||||
self.save_model(network) | |||||
print("saved better model selected by dev") | |||||
logger.info("saved better model selected by dev") | |||||
self.save_model(network, self.model_name) | |||||
print("Saved better model selected by validation.") | |||||
logger.info("Saved better model selected by validation.") | |||||
valid_results = validator.show_matrices() | valid_results = validator.show_matrices() | ||||
print("[epoch {}] {}".format(epoch, valid_results)) | print("[epoch {}] {}".format(epoch, valid_results)) | ||||
logger.info("[epoch {}] {}".format(epoch, valid_results)) | logger.info("[epoch {}] {}".format(epoch, valid_results)) | ||||
def _train_step(self, data_iterator, network, **kwargs): | def _train_step(self, data_iterator, network, **kwargs): | ||||
"""Training process in one epoch.""" | |||||
"""Training process in one epoch. | |||||
kwargs should contain: | |||||
- n_print: int, print training information every n steps. | |||||
- start: time.time(), the starting time of this step. | |||||
- epoch: int, | |||||
""" | |||||
step = 0 | step = 0 | ||||
for batch_x, batch_y in self.make_batch(data_iterator): | for batch_x, batch_y in self.make_batch(data_iterator): | ||||
@@ -132,7 +164,7 @@ class BaseTrainer(object): | |||||
self.grad_backward(loss) | self.grad_backward(loss) | ||||
self.update() | self.update() | ||||
if step % kwargs["n_print"] == 0: | |||||
if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0: | |||||
end = time.time() | end = time.time() | ||||
diff = timedelta(seconds=round(end - kwargs["start"])) | diff = timedelta(seconds=round(end - kwargs["start"])) | ||||
print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.2} time: {}".format( | print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.2} time: {}".format( | ||||
@@ -153,6 +185,11 @@ class BaseTrainer(object): | |||||
logger.error("the number of folds in train and dev data unequals {}!={}".format(len(train_data_cv), | logger.error("the number of folds in train and dev data unequals {}!={}".format(len(train_data_cv), | ||||
len(dev_data_cv))) | len(dev_data_cv))) | ||||
raise RuntimeError("the number of folds in train and dev data unequals") | raise RuntimeError("the number of folds in train and dev data unequals") | ||||
if self.validate is False: | |||||
logger.warn("Cross validation requires self.validate to be True. Please turn it on. ") | |||||
print("[warning] Cross validation requires self.validate to be True. Please turn it on. ") | |||||
self.validate = True | |||||
n_fold = len(train_data_cv) | n_fold = len(train_data_cv) | ||||
logger.info("perform {} folds cross validation.".format(n_fold)) | logger.info("perform {} folds cross validation.".format(n_fold)) | ||||
for i in range(n_fold): | for i in range(n_fold): | ||||
@@ -186,7 +223,7 @@ class BaseTrainer(object): | |||||
""" | """ | ||||
Define framework-specific optimizer specified by the models. | Define framework-specific optimizer specified by the models. | ||||
""" | """ | ||||
raise NotImplementedError | |||||
self._optimizer = self._optimizer_proto.construct_from_pytorch(self._model.parameters()) | |||||
def update(self): | def update(self): | ||||
""" | """ | ||||
@@ -194,7 +231,7 @@ class BaseTrainer(object): | |||||
For PyTorch, just call optimizer to update. | For PyTorch, just call optimizer to update. | ||||
""" | """ | ||||
raise NotImplementedError | |||||
self._optimizer.step() | |||||
def data_forward(self, network, x): | def data_forward(self, network, x): | ||||
raise NotImplementedError | raise NotImplementedError | ||||
@@ -206,7 +243,8 @@ class BaseTrainer(object): | |||||
For PyTorch, just do "loss.backward()" | For PyTorch, just do "loss.backward()" | ||||
""" | """ | ||||
raise NotImplementedError | |||||
self._model.zero_grad() | |||||
loss.backward() | |||||
def get_loss(self, predict, truth): | def get_loss(self, predict, truth): | ||||
""" | """ | ||||
@@ -215,21 +253,25 @@ class BaseTrainer(object): | |||||
:param truth: ground truth label vector | :param truth: ground truth label vector | ||||
:return: a scalar | :return: a scalar | ||||
""" | """ | ||||
if self.loss_func is None: | |||||
if hasattr(self.model, "loss"): | |||||
self.loss_func = self.model.loss | |||||
logger.info("The model has a loss function, use it.") | |||||
else: | |||||
logger.info("The model didn't define loss, use Trainer's loss.") | |||||
self.define_loss() | |||||
return self.loss_func(predict, truth) | |||||
return self._loss_func(predict, truth) | |||||
def define_loss(self): | def define_loss(self): | ||||
""" | """ | ||||
Assign an instance of loss function to self.loss_func | |||||
E.g. self.loss_func = nn.CrossEntropyLoss() | |||||
if the model defines a loss, use model's loss. | |||||
Otherwise, Trainer must has a loss argument, use it as loss. | |||||
These two losses cannot be defined at the same time. | |||||
Trainer does not handle loss definition or choose default losses. | |||||
""" | """ | ||||
raise NotImplementedError | |||||
if hasattr(self._model, "loss") and self._loss_func is not None: | |||||
raise ValueError("Both the model and Trainer define loss. Please take out your loss.") | |||||
if hasattr(self._model, "loss"): | |||||
self._loss_func = self._model.loss | |||||
logger.info("The model has a loss function, use it.") | |||||
else: | |||||
if self._loss_func is None: | |||||
raise ValueError("Please specify a loss function.") | |||||
logger.info("The model didn't define loss, use Trainer's loss.") | |||||
def best_eval_result(self, validator): | def best_eval_result(self, validator): | ||||
""" | """ | ||||
@@ -238,71 +280,35 @@ class BaseTrainer(object): | |||||
""" | """ | ||||
raise NotImplementedError | raise NotImplementedError | ||||
def save_model(self, network): | |||||
""" | |||||
def save_model(self, network, model_name): | |||||
"""Save this model with such a name. | |||||
This method may be called multiple times by Trainer to overwritten a better model. | |||||
:param network: the PyTorch model | :param network: the PyTorch model | ||||
model_best_dev.pkl may be overwritten by a better model in future epochs. | |||||
:param model_name: str | |||||
""" | """ | ||||
ModelSaver(self.model_saved_path + "model_best_dev.pkl").save_pytorch(network) | |||||
if model_name[-4:] != ".pkl": | |||||
model_name += ".pkl" | |||||
ModelSaver(self.pickle_path + model_name).save_pytorch(network) | |||||
def _create_validator(self, valid_args): | def _create_validator(self, valid_args): | ||||
raise NotImplementedError | raise NotImplementedError | ||||
class ToyTrainer(BaseTrainer): | |||||
""" | |||||
An example to show the definition of Trainer. | |||||
""" | |||||
def __init__(self, training_args): | |||||
super(ToyTrainer, self).__init__(training_args) | |||||
def load_train_data(self, data_path): | |||||
data_train = _pickle.load(open(data_path + "/data_train.pkl", "rb")) | |||||
data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb")) | |||||
return data_train, data_dev, 0, 1 | |||||
def data_forward(self, network, x): | |||||
return network(x) | |||||
def grad_backward(self, loss): | |||||
self.model.zero_grad() | |||||
loss.backward() | |||||
def get_loss(self, pred, truth): | |||||
return np.mean(np.square(pred - truth)) | |||||
def define_optimizer(self): | |||||
self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01) | |||||
def update(self): | |||||
self.optimizer.step() | |||||
class SeqLabelTrainer(BaseTrainer): | class SeqLabelTrainer(BaseTrainer): | ||||
""" | """ | ||||
Trainer for Sequence Modeling | |||||
Trainer for Sequence Labeling | |||||
""" | """ | ||||
def __init__(self, train_args): | |||||
super(SeqLabelTrainer, self).__init__(train_args) | |||||
self.vocab_size = train_args["vocab_size"] | |||||
self.num_classes = train_args["num_classes"] | |||||
def __init__(self, **kwargs): | |||||
super(SeqLabelTrainer, self).__init__(**kwargs) | |||||
# self.vocab_size = kwargs["vocab_size"] | |||||
# self.num_classes = kwargs["num_classes"] | |||||
self.max_len = None | self.max_len = None | ||||
self.mask = None | self.mask = None | ||||
self.best_accuracy = 0.0 | self.best_accuracy = 0.0 | ||||
def define_optimizer(self): | |||||
self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9) | |||||
def grad_backward(self, loss): | |||||
self.model.zero_grad() | |||||
loss.backward() | |||||
def update(self): | |||||
self.optimizer.step() | |||||
def data_forward(self, network, inputs): | def data_forward(self, network, inputs): | ||||
if not isinstance(inputs, tuple): | if not isinstance(inputs, tuple): | ||||
raise RuntimeError("output_length must be true for sequence modeling. Receive {}".format(type(inputs[0]))) | raise RuntimeError("output_length must be true for sequence modeling. Receive {}".format(type(inputs[0]))) | ||||
@@ -330,7 +336,7 @@ class SeqLabelTrainer(BaseTrainer): | |||||
batch_size, max_len = predict.size(0), predict.size(1) | batch_size, max_len = predict.size(0), predict.size(1) | ||||
assert truth.shape == (batch_size, max_len) | assert truth.shape == (batch_size, max_len) | ||||
loss = self.model.loss(predict, truth, self.mask) | |||||
loss = self._model.loss(predict, truth, self.mask) | |||||
return loss | return loss | ||||
def best_eval_result(self, validator): | def best_eval_result(self, validator): | ||||
@@ -345,48 +351,25 @@ class SeqLabelTrainer(BaseTrainer): | |||||
return Action.make_batch(iterator, output_length=True, use_cuda=self.use_cuda) | return Action.make_batch(iterator, output_length=True, use_cuda=self.use_cuda) | ||||
def _create_validator(self, valid_args): | def _create_validator(self, valid_args): | ||||
return SeqLabelTester(valid_args) | |||||
return SeqLabelTester(**valid_args) | |||||
class ClassificationTrainer(BaseTrainer): | class ClassificationTrainer(BaseTrainer): | ||||
"""Trainer for classification.""" | |||||
"""Trainer for text classification.""" | |||||
def __init__(self, train_args): | |||||
super(ClassificationTrainer, self).__init__(train_args) | |||||
self.learn_rate = train_args["learn_rate"] | |||||
self.momentum = train_args["momentum"] | |||||
def __init__(self, **train_args): | |||||
super(ClassificationTrainer, self).__init__(**train_args) | |||||
self.iterator = None | self.iterator = None | ||||
self.loss_func = None | self.loss_func = None | ||||
self.optimizer = None | self.optimizer = None | ||||
self.best_accuracy = 0 | self.best_accuracy = 0 | ||||
def define_loss(self): | |||||
self.loss_func = nn.CrossEntropyLoss() | |||||
def define_optimizer(self): | |||||
""" | |||||
Define framework-specific optimizer specified by the models. | |||||
""" | |||||
self.optimizer = torch.optim.SGD( | |||||
self.model.parameters(), | |||||
lr=self.learn_rate, | |||||
momentum=self.momentum) | |||||
def data_forward(self, network, x): | def data_forward(self, network, x): | ||||
"""Forward through network.""" | """Forward through network.""" | ||||
logits = network(x) | logits = network(x) | ||||
return logits | return logits | ||||
def grad_backward(self, loss): | |||||
"""Compute gradient backward.""" | |||||
self.model.zero_grad() | |||||
loss.backward() | |||||
def update(self): | |||||
"""Apply gradient.""" | |||||
self.optimizer.step() | |||||
def make_batch(self, iterator): | def make_batch(self, iterator): | ||||
return Action.make_batch(iterator, output_length=False, use_cuda=self.use_cuda) | return Action.make_batch(iterator, output_length=False, use_cuda=self.use_cuda) | ||||
@@ -404,4 +387,4 @@ class ClassificationTrainer(BaseTrainer): | |||||
return False | return False | ||||
def _create_validator(self, valid_args): | def _create_validator(self, valid_args): | ||||
return ClassificationTester(valid_args) | |||||
return ClassificationTester(**valid_args) |
@@ -1,4 +1,5 @@ | |||||
from fastNLP.core.predictor import SeqLabelInfer, ClassificationInfer | from fastNLP.core.predictor import SeqLabelInfer, ClassificationInfer | ||||
from fastNLP.core.preprocess import load_pickle | |||||
from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | ||||
from fastNLP.loader.model_loader import ModelLoader | from fastNLP.loader.model_loader import ModelLoader | ||||
@@ -7,14 +8,13 @@ mapping from model name to [URL, file_name.class_name, model_pickle_name] | |||||
Notice that the class of the model should be in "models" directory. | Notice that the class of the model should be in "models" directory. | ||||
Example: | Example: | ||||
"zh_pos_tag_model": ["www.fudan.edu.cn", "sequence_modeling.SeqLabeling", "saved_model.pkl"] | |||||
""" | |||||
FastNLP_MODEL_COLLECTION = { | |||||
"seq_label_model": { | "seq_label_model": { | ||||
"url": "www.fudan.edu.cn", | "url": "www.fudan.edu.cn", | ||||
"class": "sequence_modeling.SeqLabeling", | |||||
"class": "sequence_modeling.SeqLabeling", # file_name.class_name in models/ | |||||
"pickle": "seq_label_model.pkl", | "pickle": "seq_label_model.pkl", | ||||
"type": "seq_label" | |||||
"type": "seq_label", | |||||
"config_file_name": "config", # the name of the config file which stores model initialization parameters | |||||
"config_section_name": "text_class_model" # the name of the section in the config file which stores model init params | |||||
}, | }, | ||||
"text_class_model": { | "text_class_model": { | ||||
"url": "www.fudan.edu.cn", | "url": "www.fudan.edu.cn", | ||||
@@ -22,11 +22,18 @@ FastNLP_MODEL_COLLECTION = { | |||||
"pickle": "text_class_model.pkl", | "pickle": "text_class_model.pkl", | ||||
"type": "text_class" | "type": "text_class" | ||||
} | } | ||||
""" | |||||
FastNLP_MODEL_COLLECTION = { | |||||
"cws_basic_model": { | |||||
"url": "", | |||||
"class": "sequence_modeling.AdvSeqLabel", | |||||
"pickle": "cws_basic_model_v_0.pkl", | |||||
"type": "seq_label", | |||||
"config_file_name": "config", | |||||
"config_section_name": "text_class_model" | |||||
} | |||||
} | } | ||||
CONFIG_FILE_NAME = "config" | |||||
SECTION_NAME = "text_class_model" | |||||
class FastNLP(object): | class FastNLP(object): | ||||
""" | """ | ||||
@@ -51,10 +58,13 @@ class FastNLP(object): | |||||
self.model = None | self.model = None | ||||
self.infer_type = None # "seq_label"/"text_class" | self.infer_type = None # "seq_label"/"text_class" | ||||
def load(self, model_name): | |||||
def load(self, model_name, config_file="config", section_name="model"): | |||||
""" | """ | ||||
Load a pre-trained FastNLP model together with additional data. | Load a pre-trained FastNLP model together with additional data. | ||||
:param model_name: str, the name of a FastNLP model. | :param model_name: str, the name of a FastNLP model. | ||||
:param config_file: str, the name of the config file which stores the initialization information of the model. | |||||
(default: "config") | |||||
:param section_name: str, the name of the corresponding section in the config file. (default: model) | |||||
""" | """ | ||||
assert type(model_name) is str | assert type(model_name) is str | ||||
if model_name not in FastNLP_MODEL_COLLECTION: | if model_name not in FastNLP_MODEL_COLLECTION: | ||||
@@ -64,37 +74,47 @@ class FastNLP(object): | |||||
self._download(model_name, FastNLP_MODEL_COLLECTION[model_name]["url"]) | self._download(model_name, FastNLP_MODEL_COLLECTION[model_name]["url"]) | ||||
model_class = self._get_model_class(FastNLP_MODEL_COLLECTION[model_name]["class"]) | model_class = self._get_model_class(FastNLP_MODEL_COLLECTION[model_name]["class"]) | ||||
print("Restore model class {}".format(str(model_class))) | |||||
model_args = ConfigSection() | model_args = ConfigSection() | ||||
ConfigLoader.load_config(self.model_dir + CONFIG_FILE_NAME, {SECTION_NAME: model_args}) | |||||
ConfigLoader.load_config(self.model_dir + config_file, {section_name: model_args}) | |||||
print("Restore model hyper-parameters {}".format(str(model_args.data))) | |||||
# fetch dictionary size and number of labels from pickle files | |||||
word2index = load_pickle(self.model_dir, "word2id.pkl") | |||||
model_args["vocab_size"] = len(word2index) | |||||
index2label = load_pickle(self.model_dir, "id2class.pkl") | |||||
model_args["num_classes"] = len(index2label) | |||||
# Construct the model | # Construct the model | ||||
model = model_class(model_args) | model = model_class(model_args) | ||||
print("Model constructed.") | |||||
# To do: framework independent | # To do: framework independent | ||||
ModelLoader.load_pytorch(model, self.model_dir + FastNLP_MODEL_COLLECTION[model_name]["pickle"]) | ModelLoader.load_pytorch(model, self.model_dir + FastNLP_MODEL_COLLECTION[model_name]["pickle"]) | ||||
print("Model weights loaded.") | |||||
self.model = model | self.model = model | ||||
self.infer_type = FastNLP_MODEL_COLLECTION[model_name]["type"] | self.infer_type = FastNLP_MODEL_COLLECTION[model_name]["type"] | ||||
print("Model loaded. ") | |||||
print("Inference ready.") | |||||
def run(self, raw_input): | def run(self, raw_input): | ||||
""" | """ | ||||
Perform inference over given input using the loaded model. | Perform inference over given input using the loaded model. | ||||
:param raw_input: str, raw text | |||||
:param raw_input: list of string. Each list is an input query. | |||||
:return results: | :return results: | ||||
""" | """ | ||||
infer = self._create_inference(self.model_dir) | infer = self._create_inference(self.model_dir) | ||||
# string ---> 2-D list of string | |||||
infer_input = self.string_to_list(raw_input) | |||||
# tokenize: list of string ---> 2-D list of string | |||||
infer_input = self.tokenize(raw_input, language="zh") | |||||
# 2-D list of string ---> list of strings | |||||
# 2-D list of string ---> 2-D list of tags | |||||
results = infer.predict(self.model, infer_input) | results = infer.predict(self.model, infer_input) | ||||
# list of strings ---> final answers | |||||
# 2-D list of tags ---> list of final answers | |||||
outputs = self._make_output(results, infer_input) | outputs = self._make_output(results, infer_input) | ||||
return outputs | return outputs | ||||
@@ -142,81 +162,100 @@ class FastNLP(object): | |||||
""" | """ | ||||
return True | return True | ||||
def string_to_list(self, text, delimiter="\n"): | |||||
""" | |||||
This function is used to transform raw input to lists, which is done by DatasetLoader in training. | |||||
Split text string into three-level lists. | |||||
[ | |||||
[word_11, word_12, ...], | |||||
[word_21, word_22, ...], | |||||
... | |||||
] | |||||
:param text: string | |||||
:param delimiter: str, character used to split text into sentences. | |||||
:return data: two-level lists | |||||
def tokenize(self, text, language): | |||||
"""Extract tokens from strings. | |||||
For English, extract words separated by space. | |||||
For Chinese, extract characters. | |||||
TODO: more complex tokenization methods | |||||
:param text: list of string | |||||
:param language: str, one of ('zh', 'en'), Chinese or English. | |||||
:return data: list of list of string, each string is a token. | |||||
""" | """ | ||||
assert language in ("zh", "en") | |||||
data = [] | data = [] | ||||
sents = text.strip().split(delimiter) | |||||
for sent in sents: | |||||
characters = [] | |||||
for ch in sent: | |||||
characters.append(ch) | |||||
data.append(characters) | |||||
for sent in text: | |||||
if language == "en": | |||||
tokens = sent.strip().split() | |||||
elif language == "zh": | |||||
tokens = [char for char in sent] | |||||
else: | |||||
raise RuntimeError("Unknown language {}".format(language)) | |||||
data.append(tokens) | |||||
return data | return data | ||||
def _make_output(self, results, infer_input): | def _make_output(self, results, infer_input): | ||||
"""Transform the infer output into user-friendly output. | |||||
:param results: 1 or 2-D list of strings. | |||||
If self.infer_type == "seq_label", it is of shape [num_examples, tag_seq_length] | |||||
If self.infer_type == "text_class", it is of shape [num_examples] | |||||
:param infer_input: 2-D list of string, the input query before inference. | |||||
:return outputs: list. Each entry is a prediction. | |||||
""" | |||||
if self.infer_type == "seq_label": | if self.infer_type == "seq_label": | ||||
outputs = make_seq_label_output(results, infer_input) | outputs = make_seq_label_output(results, infer_input) | ||||
elif self.infer_type == "text_class": | elif self.infer_type == "text_class": | ||||
outputs = make_class_output(results, infer_input) | outputs = make_class_output(results, infer_input) | ||||
else: | else: | ||||
raise ValueError("fail to make outputs with infer type {}".format(self.infer_type)) | |||||
raise RuntimeError("fail to make outputs with infer type {}".format(self.infer_type)) | |||||
return outputs | return outputs | ||||
def make_seq_label_output(result, infer_input): | def make_seq_label_output(result, infer_input): | ||||
""" | |||||
Transform model output into user-friendly contents. | |||||
:param result: 1-D list of strings. (model output) | |||||
"""Transform model output into user-friendly contents. | |||||
:param result: 2-D list of strings. (model output) | |||||
:param infer_input: 2-D list of string (model input) | :param infer_input: 2-D list of string (model input) | ||||
:return outputs: | |||||
:return ret: list of list of tuples | |||||
[ | |||||
[(word_11, label_11), (word_12, label_12), ...], | |||||
[(word_21, label_21), (word_22, label_22), ...], | |||||
... | |||||
] | |||||
""" | """ | ||||
return result | |||||
ret = [] | |||||
for example_x, example_y in zip(infer_input, result): | |||||
ret.append([(x, y) for x, y in zip(example_x, example_y)]) | |||||
return ret | |||||
def make_class_output(result, infer_input): | def make_class_output(result, infer_input): | ||||
"""Transform model output into user-friendly contents. | |||||
:param result: 2-D list of strings. (model output) | |||||
:param infer_input: 1-D list of string (model input) | |||||
:return ret: the same as result, [label_1, label_2, ...] | |||||
""" | |||||
return result | return result | ||||
def interpret_word_seg_results(infer_input, results): | |||||
""" | |||||
Transform model output into user-friendly contents. | |||||
def interpret_word_seg_results(char_seq, label_seq): | |||||
"""Transform model output into user-friendly contents. | |||||
Example: In CWS, convert <BMES> labeling into segmented text. | Example: In CWS, convert <BMES> labeling into segmented text. | ||||
:param results: list of strings. (model output) | |||||
:param infer_input: 2-D list of string (model input) | |||||
:return output: list of strings | |||||
:param char_seq: list of string, | |||||
:param label_seq: list of string, the same length as char_seq | |||||
Each entry is one of ('B', 'M', 'E', 'S'). | |||||
:return output: list of words | |||||
""" | """ | ||||
outputs = [] | |||||
for sent_char, sent_label in zip(infer_input, results): | |||||
words = [] | |||||
word = "" | |||||
for char, label in zip(sent_char, sent_label): | |||||
if label[0] == "B": | |||||
if word != "": | |||||
words.append(word) | |||||
word = char | |||||
elif label[0] == "M": | |||||
word += char | |||||
elif label[0] == "E": | |||||
word += char | |||||
words = [] | |||||
word = "" | |||||
for char, label in zip(char_seq, label_seq): | |||||
if label[0] == "B": | |||||
if word != "": | |||||
words.append(word) | words.append(word) | ||||
word = "" | |||||
elif label[0] == "S": | |||||
if word != "": | |||||
words.append(word) | |||||
word = "" | |||||
words.append(char) | |||||
else: | |||||
raise ValueError("invalid label") | |||||
outputs.append(" ".join(words)) | |||||
return outputs | |||||
word = char | |||||
elif label[0] == "M": | |||||
word += char | |||||
elif label[0] == "E": | |||||
word += char | |||||
words.append(word) | |||||
word = "" | |||||
elif label[0] == "S": | |||||
if word != "": | |||||
words.append(word) | |||||
word = "" | |||||
words.append(char) | |||||
else: | |||||
raise ValueError("invalid label {}".format(label[0])) | |||||
return words |
@@ -94,6 +94,10 @@ class ConfigSection(object): | |||||
def __contains__(self, item): | def __contains__(self, item): | ||||
return item in self.__dict__.keys() | return item in self.__dict__.keys() | ||||
@property | |||||
def data(self): | |||||
return self.__dict__ | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
config = ConfigLoader('configLoader', 'there is no data') | config = ConfigLoader('configLoader', 'there is no data') | ||||
@@ -142,6 +142,8 @@ class CharLM(BaseModel): | |||||
"char_dict": char_dict, | "char_dict": char_dict, | ||||
"reverse_word_dict": reverse_word_dict, | "reverse_word_dict": reverse_word_dict, | ||||
} | } | ||||
if not os.path.exists("cache"): | |||||
os.mkdir("cache") | |||||
torch.save(objects, "cache/prep.pt") | torch.save(objects, "cache/prep.pt") | ||||
print("Preprocess done.") | print("Preprocess done.") | ||||
@@ -0,0 +1,56 @@ | |||||
import torch | |||||
import torch.nn as nn | |||||
class MLP(nn.Module): | |||||
def __init__(self, size_layer, num_class=2, activation='relu'): | |||||
"""Multilayer Perceptrons as a decoder | |||||
Args: | |||||
size_layer: list of int, define the size of MLP layers | |||||
num_class: int, num of class in output, should be 2 or the last layer's size | |||||
activation: str or function, the activation function for hidden layers | |||||
""" | |||||
super(MLP, self).__init__() | |||||
self.hiddens = nn.ModuleList() | |||||
self.output = None | |||||
for i in range(1, len(size_layer)): | |||||
if i + 1 == len(size_layer): | |||||
self.output = nn.Linear(size_layer[i-1], size_layer[i]) | |||||
else: | |||||
self.hiddens.append(nn.Linear(size_layer[i-1], size_layer[i])) | |||||
if num_class == 2: | |||||
self.out_active = nn.LogSigmoid() | |||||
elif num_class == size_layer[-1]: | |||||
self.out_active = nn.LogSoftmax(dim=1) | |||||
else: | |||||
raise ValueError("should set output num_class correctly: {}".format(num_class)) | |||||
actives = { | |||||
'relu': nn.ReLU(), | |||||
'tanh': nn.Tanh() | |||||
} | |||||
if activation in actives: | |||||
self.hidden_active = actives[activation] | |||||
elif isinstance(activation, callable): | |||||
self.hidden_active = activation | |||||
else: | |||||
raise ValueError("should set activation correctly: {}".format(activation)) | |||||
def forward(self, x): | |||||
for layer in self.hiddens: | |||||
x = self.hidden_active(layer(x)) | |||||
x = self.out_active(self.output(x)) | |||||
return x | |||||
if __name__ == '__main__': | |||||
net1 = MLP([5,10,5]) | |||||
net2 = MLP([5,10,5], 5) | |||||
for net in [net1, net2]: | |||||
x = torch.randn(5, 5) | |||||
y = net(x) | |||||
print(x) | |||||
print(y) | |||||
@@ -15,7 +15,7 @@ class Embedding(nn.Module): | |||||
def __init__(self, nums, dims, padding_idx=0, sparse=False, init_emb=None, dropout=0.0): | def __init__(self, nums, dims, padding_idx=0, sparse=False, init_emb=None, dropout=0.0): | ||||
super(Embedding, self).__init__() | super(Embedding, self).__init__() | ||||
self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse) | self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse) | ||||
if init_emb: | |||||
if init_emb is not None: | |||||
self.embed.weight = nn.Parameter(init_emb) | self.embed.weight = nn.Parameter(init_emb) | ||||
self.dropout = nn.Dropout(dropout) | self.dropout = nn.Dropout(dropout) | ||||
@@ -273,7 +273,7 @@ class MaskedRNNBase(nn.Module): | |||||
hx = (hx, hx) | hx = (hx, hx) | ||||
func = AutogradMaskedStep(num_layers=self.num_layers, | func = AutogradMaskedStep(num_layers=self.num_layers, | ||||
dropout=self.dropout, | |||||
dropout=self.step_dropout, | |||||
train=self.training, | train=self.training, | ||||
lstm=lstm) | lstm=lstm) | ||||
@@ -18,7 +18,6 @@ MLP_HIDDEN = 2000 | |||||
CLASSES_NUM = 5 | CLASSES_NUM = 5 | ||||
from fastNLP.models.base_model import BaseModel | from fastNLP.models.base_model import BaseModel | ||||
from fastNLP.core.trainer import BaseTrainer | |||||
class MyNet(BaseModel): | class MyNet(BaseModel): | ||||
@@ -60,18 +59,6 @@ class Net(nn.Module): | |||||
return x, penalty | return x, penalty | ||||
class MyTrainer(BaseTrainer): | |||||
def __init__(self, args): | |||||
super(MyTrainer, self).__init__(args) | |||||
self.optimizer = None | |||||
def define_optimizer(self): | |||||
self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9) | |||||
def define_loss(self): | |||||
self.loss_func = nn.CrossEntropyLoss() | |||||
def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ | def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ | ||||
momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10): | momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10): | ||||
""" | """ | ||||
@@ -1,26 +1,26 @@ | |||||
import sys, os | |||||
import os | |||||
import sys | |||||
sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) | sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) | ||||
from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | ||||
from fastNLP.core.trainer import SeqLabelTrainer | from fastNLP.core.trainer import SeqLabelTrainer | ||||
from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, BaseLoader | from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, BaseLoader | ||||
from fastNLP.loader.preprocess import POSPreprocess, load_pickle | |||||
from fastNLP.core.preprocess import SeqLabelPreprocess, load_pickle | |||||
from fastNLP.saver.model_saver import ModelSaver | from fastNLP.saver.model_saver import ModelSaver | ||||
from fastNLP.loader.model_loader import ModelLoader | from fastNLP.loader.model_loader import ModelLoader | ||||
from fastNLP.core.tester import SeqLabelTester | from fastNLP.core.tester import SeqLabelTester | ||||
from fastNLP.models.sequence_modeling import AdvSeqLabel | from fastNLP.models.sequence_modeling import AdvSeqLabel | ||||
from fastNLP.core.inference import SeqLabelInfer | |||||
from fastNLP.core.optimizer import SGD | |||||
from fastNLP.core.predictor import SeqLabelInfer | |||||
# not in the file's dir | # not in the file's dir | ||||
if len(os.path.dirname(__file__)) != 0: | if len(os.path.dirname(__file__)) != 0: | ||||
os.chdir(os.path.dirname(__file__)) | os.chdir(os.path.dirname(__file__)) | ||||
datadir = 'icwb2-data' | |||||
cfgfile = 'cws.cfg' | |||||
datadir = "/home/zyfeng/data/" | |||||
cfgfile = './cws.cfg' | |||||
data_name = "pku_training.utf8" | data_name = "pku_training.utf8" | ||||
cws_data_path = os.path.join(datadir, "training/pku_training.utf8") | |||||
cws_data_path = os.path.join(datadir, "pku_training.utf8") | |||||
pickle_path = "save" | pickle_path = "save" | ||||
data_infer_path = os.path.join(datadir, "infer.utf8") | data_infer_path = os.path.join(datadir, "infer.utf8") | ||||
@@ -70,12 +70,13 @@ def train(): | |||||
train_data = loader.load_pku() | train_data = loader.load_pku() | ||||
# Preprocessor | # Preprocessor | ||||
p = POSPreprocess(train_data, pickle_path, train_dev_split=0.3) | |||||
train_args["vocab_size"] = p.vocab_size | |||||
train_args["num_classes"] = p.num_classes | |||||
preprocessor = SeqLabelPreprocess() | |||||
data_train, data_dev = preprocessor.run(train_data, pickle_path=pickle_path, train_dev_split=0.3) | |||||
train_args["vocab_size"] = preprocessor.vocab_size | |||||
train_args["num_classes"] = preprocessor.num_classes | |||||
# Trainer | # Trainer | ||||
trainer = SeqLabelTrainer(train_args) | |||||
trainer = SeqLabelTrainer(**train_args.data) | |||||
# Model | # Model | ||||
model = AdvSeqLabel(train_args) | model = AdvSeqLabel(train_args) | ||||
@@ -83,10 +84,11 @@ def train(): | |||||
ModelLoader.load_pytorch(model, "./save/saved_model.pkl") | ModelLoader.load_pytorch(model, "./save/saved_model.pkl") | ||||
print('model parameter loaded!') | print('model parameter loaded!') | ||||
except Exception as e: | except Exception as e: | ||||
print("No saved model. Continue.") | |||||
pass | pass | ||||
# Start training | # Start training | ||||
trainer.train(model) | |||||
trainer.train(model, data_train, data_dev) | |||||
print("Training finished!") | print("Training finished!") | ||||
# Saver | # Saver | ||||
@@ -106,6 +108,9 @@ def test(): | |||||
index2label = load_pickle(pickle_path, "id2class.pkl") | index2label = load_pickle(pickle_path, "id2class.pkl") | ||||
test_args["num_classes"] = len(index2label) | test_args["num_classes"] = len(index2label) | ||||
# load dev data | |||||
dev_data = load_pickle(pickle_path, "data_dev.pkl") | |||||
# Define the same model | # Define the same model | ||||
model = AdvSeqLabel(test_args) | model = AdvSeqLabel(test_args) | ||||
@@ -114,10 +119,10 @@ def test(): | |||||
print("model loaded!") | print("model loaded!") | ||||
# Tester | # Tester | ||||
tester = SeqLabelTester(test_args) | |||||
tester = SeqLabelTester(**test_args.data) | |||||
# Start testing | # Start testing | ||||
tester.test(model) | |||||
tester.test(model, dev_data) | |||||
# print test results | # print test results | ||||
print(tester.show_matrices()) | print(tester.show_matrices()) | ||||
@@ -0,0 +1,18 @@ | |||||
import os | |||||
import unittest | |||||
from fastNLP.core.action import Action, Batchifier, SequentialSampler | |||||
class TestAction(unittest.TestCase): | |||||
def test_case_1(self): | |||||
x = [1, 2, 3, 4, 5, 6, 7, 8] | |||||
y = [1, 1, 1, 1, 2, 2, 2, 2] | |||||
data = [] | |||||
for i in range(len(x)): | |||||
data.append([[x[i]], [y[i]]]) | |||||
data = Batchifier(SequentialSampler(data), batch_size=2, drop_last=False) | |||||
action = Action() | |||||
for batch_x in action.make_batch(data, use_cuda=False, output_length=True, max_len=None): | |||||
print(batch_x) | |||||
@@ -0,0 +1,43 @@ | |||||
import os | |||||
import unittest | |||||
from fastNLP.core.preprocess import SeqLabelPreprocess | |||||
class TestSeqLabelPreprocess(unittest.TestCase): | |||||
def test_case_1(self): | |||||
data = [ | |||||
[['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']], | |||||
[['Hello', 'world', '!'], ['a', 'n', '.']], | |||||
[['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']], | |||||
[['Hello', 'world', '!'], ['a', 'n', '.']], | |||||
[['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']], | |||||
[['Hello', 'world', '!'], ['a', 'n', '.']], | |||||
[['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']], | |||||
[['Hello', 'world', '!'], ['a', 'n', '.']], | |||||
[['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']], | |||||
[['Hello', 'world', '!'], ['a', 'n', '.']], | |||||
] | |||||
if os.path.exists("./save"): | |||||
for root, dirs, files in os.walk("./save", topdown=False): | |||||
for name in files: | |||||
os.remove(os.path.join(root, name)) | |||||
for name in dirs: | |||||
os.rmdir(os.path.join(root, name)) | |||||
result = SeqLabelPreprocess().run(train_dev_data=data, train_dev_split=0.4, | |||||
pickle_path="./save") | |||||
result = SeqLabelPreprocess().run(train_dev_data=data, train_dev_split=0.4, | |||||
pickle_path="./save") | |||||
if os.path.exists("./save"): | |||||
for root, dirs, files in os.walk("./save", topdown=False): | |||||
for name in files: | |||||
os.remove(os.path.join(root, name)) | |||||
for name in dirs: | |||||
os.rmdir(os.path.join(root, name)) | |||||
result = SeqLabelPreprocess().run(test_data=data, train_dev_data=data, | |||||
pickle_path="./save", train_dev_split=0.4, | |||||
cross_val=True) | |||||
result = SeqLabelPreprocess().run(test_data=data, train_dev_data=data, | |||||
pickle_path="./save", train_dev_split=0.4, | |||||
cross_val=True) |
@@ -0,0 +1,33 @@ | |||||
import os | |||||
import torch.nn as nn | |||||
import unittest | |||||
from fastNLP.core.trainer import SeqLabelTrainer | |||||
from fastNLP.core.loss import Loss | |||||
from fastNLP.core.optimizer import Optimizer | |||||
from fastNLP.models.sequence_modeling import SeqLabeling | |||||
class TestTrainer(unittest.TestCase): | |||||
def test_case_1(self): | |||||
args = {"epochs": 3, "batch_size": 8, "validate": True, "use_cuda": True, "pickle_path": "./save/", | |||||
"save_best_dev": True, "model_name": "default_model_name.pkl", | |||||
"loss": Loss(None), | |||||
"optimizer": Optimizer("Adam", lr=0.001, weight_decay=0), | |||||
"vocab_size": 20, | |||||
"word_emb_dim": 100, | |||||
"rnn_hidden_units": 100, | |||||
"num_classes": 3 | |||||
} | |||||
trainer = SeqLabelTrainer() | |||||
train_data = [ | |||||
[[1, 2, 3, 4, 5, 6], [1, 0, 1, 0, 1, 2]], | |||||
[[2, 3, 4, 5, 1, 6], [0, 1, 0, 1, 0, 2]], | |||||
[[1, 4, 1, 4, 1, 6], [1, 0, 1, 0, 1, 2]], | |||||
[[1, 2, 3, 4, 5, 6], [1, 0, 1, 0, 1, 2]], | |||||
[[2, 3, 4, 5, 1, 6], [0, 1, 0, 1, 0, 2]], | |||||
[[1, 4, 1, 4, 1, 6], [1, 0, 1, 0, 1, 2]], | |||||
] | |||||
dev_data = train_data | |||||
model = SeqLabeling(args) | |||||
trainer.train(network=model, train_data=train_data, dev_data=dev_data) |
@@ -1,65 +1,11 @@ | |||||
[General] | |||||
revision = "first" | |||||
datapath = "./data/smallset/imdb/" | |||||
embed_path = "./data/smallset/imdb/embedding.txt" | |||||
optimizer = "adam" | |||||
attn_mode = "rout" | |||||
seq_encoder = "bilstm" | |||||
out_caps_num = 5 | |||||
rout_iter = 3 | |||||
max_snt_num = 40 | |||||
max_wd_num = 40 | |||||
max_epochs = 50 | |||||
pre_trained = true | |||||
batch_sz = 32 | |||||
batch_sz_min = 32 | |||||
bucket_sz = 5000 | |||||
partial_update_until_epoch = 2 | |||||
embed_size = 300 | |||||
hidden_size = 200 | |||||
dense_hidden = [300, 10] | |||||
lr = 0.0002 | |||||
decay_steps = 1000 | |||||
decay_rate = 0.9 | |||||
dropout = 0.2 | |||||
early_stopping = 7 | |||||
reg = 1e-06 | |||||
[My] | |||||
datapath = "./data/smallset/imdb/" | |||||
embed_path = "./data/smallset/imdb/embedding.txt" | |||||
optimizer = "adam" | |||||
attn_mode = "rout" | |||||
seq_encoder = "bilstm" | |||||
out_caps_num = 5 | |||||
rout_iter = 3 | |||||
max_snt_num = 40 | |||||
max_wd_num = 40 | |||||
max_epochs = 50 | |||||
pre_trained = true | |||||
batch_sz = 32 | |||||
batch_sz_min = 32 | |||||
bucket_sz = 5000 | |||||
partial_update_until_epoch = 2 | |||||
embed_size = 300 | |||||
hidden_size = 200 | |||||
dense_hidden = [300, 10] | |||||
lr = 0.0002 | |||||
decay_steps = 1000 | |||||
decay_rate = 0.9 | |||||
dropout = 0.2 | |||||
early_stopping = 70 | |||||
reg = 1e-05 | |||||
test = 5 | |||||
new_attr = 40 | |||||
[POS] | |||||
[test_seq_label_trainer] | |||||
epochs = 1 | epochs = 1 | ||||
batch_size = 32 | batch_size = 32 | ||||
pickle_path = "./data_for_tests/" | |||||
validate = true | validate = true | ||||
save_best_dev = true | save_best_dev = true | ||||
model_saved_path = "./" | |||||
use_cuda = true | |||||
[test_seq_label_model] | |||||
rnn_hidden_units = 100 | rnn_hidden_units = 100 | ||||
rnn_layers = 1 | rnn_layers = 1 | ||||
rnn_bi_direction = true | rnn_bi_direction = true | ||||
@@ -68,13 +14,12 @@ dropout = 0.5 | |||||
use_crf = true | use_crf = true | ||||
use_cuda = true | use_cuda = true | ||||
[POS_test] | |||||
[test_seq_label_tester] | |||||
save_output = true | save_output = true | ||||
validate_in_training = true | validate_in_training = true | ||||
save_dev_input = false | save_dev_input = false | ||||
save_loss = true | save_loss = true | ||||
batch_size = 1 | batch_size = 1 | ||||
pickle_path = "./data_for_tests/" | |||||
rnn_hidden_units = 100 | rnn_hidden_units = 100 | ||||
rnn_layers = 1 | rnn_layers = 1 | ||||
rnn_bi_direction = true | rnn_bi_direction = true | ||||
@@ -84,7 +29,6 @@ use_crf = true | |||||
use_cuda = true | use_cuda = true | ||||
[POS_infer] | [POS_infer] | ||||
pickle_path = "./data_for_tests/" | |||||
rnn_hidden_units = 100 | rnn_hidden_units = 100 | ||||
rnn_layers = 1 | rnn_layers = 1 | ||||
rnn_bi_direction = true | rnn_bi_direction = true | ||||
@@ -95,14 +39,9 @@ num_classes = 27 | |||||
[text_class] | [text_class] | ||||
epochs = 1 | epochs = 1 | ||||
batch_size = 10 | batch_size = 10 | ||||
pickle_path = "./save_path/" | |||||
validate = false | validate = false | ||||
save_best_dev = false | save_best_dev = false | ||||
model_saved_path = "./save_path/" | |||||
use_cuda = true | use_cuda = true | ||||
learn_rate = 1e-3 | learn_rate = 1e-3 | ||||
momentum = 0.9 | momentum = 0.9 | ||||
[text_class_model] | |||||
vocab_size = 867 | |||||
num_classes = 18 | |||||
model_name = "class_model.pkl" |
@@ -0,0 +1,7 @@ | |||||
[test] | |||||
x = 1 | |||||
y = 2 | |||||
z = 3 | |||||
input = [1,2,3] | |||||
text = "this is text" | |||||
doubles = 0.5 |
@@ -0,0 +1,75 @@ | |||||
import os | |||||
import configparser | |||||
import json | |||||
import unittest | |||||
from fastNLP.loader.config_loader import ConfigSection, ConfigLoader | |||||
from fastNLP.loader.dataset_loader import TokenizeDatasetLoader, POSDatasetLoader, LMDatasetLoader | |||||
class TestConfigLoader(unittest.TestCase): | |||||
def test_case_ConfigLoader(self): | |||||
def read_section_from_config(config_path, section_name): | |||||
dict = {} | |||||
if not os.path.exists(config_path): | |||||
raise FileNotFoundError("config file {} NOT found.".format(config_path)) | |||||
cfg = configparser.ConfigParser() | |||||
cfg.read(config_path) | |||||
if section_name not in cfg: | |||||
raise AttributeError("config file {} do NOT have section {}".format( | |||||
config_path, section_name | |||||
)) | |||||
gen_sec = cfg[section_name] | |||||
for s in gen_sec.keys(): | |||||
try: | |||||
val = json.loads(gen_sec[s]) | |||||
dict[s] = val | |||||
except Exception as e: | |||||
raise AttributeError("json can NOT load {} in section {}, config file {}".format( | |||||
s, section_name, config_path | |||||
)) | |||||
return dict | |||||
test_arg = ConfigSection() | |||||
ConfigLoader("config", "").load_config(os.path.join("./test/loader", "config"), {"test": test_arg}) | |||||
#ConfigLoader("config", "").load_config("/home/ygxu/github/fastNLP_testing/fastNLP/test/loader/config", | |||||
# {"test": test_arg}) | |||||
#dict = read_section_from_config("/home/ygxu/github/fastNLP_testing/fastNLP/test/loader/config", "test") | |||||
dict = read_section_from_config(os.path.join("./test/loader", "config"), "test") | |||||
for sec in dict: | |||||
if (sec not in test_arg) or (dict[sec] != test_arg[sec]): | |||||
raise AttributeError("ERROR") | |||||
for sec in test_arg.__dict__.keys(): | |||||
if (sec not in dict) or (dict[sec] != test_arg[sec]): | |||||
raise AttributeError("ERROR") | |||||
try: | |||||
not_exist = test_arg["NOT EXIST"] | |||||
except Exception as e: | |||||
pass | |||||
print("pass config test!") | |||||
class TestDatasetLoader(unittest.TestCase): | |||||
def test_case_TokenizeDatasetLoader(self): | |||||
loader = TokenizeDatasetLoader("cws_pku_utf_8", "./test/data_for_tests/cws_pku_utf_8") | |||||
data = loader.load_pku(max_seq_len=32) | |||||
print("pass TokenizeDatasetLoader test!") | |||||
def test_case_POSDatasetLoader(self): | |||||
loader = POSDatasetLoader("people", "./test/data_for_tests/people.txt") | |||||
data = loader.load() | |||||
datas = loader.load_lines() | |||||
print("pass POSDatasetLoader test!") | |||||
def test_case_LMDatasetLoader(self): | |||||
loader = LMDatasetLoader("cws_pku_utf_8", "./test/data_for_tests/cws_pku_utf_8") | |||||
data = loader.load() | |||||
datas = loader.load_lines() | |||||
print("pass TokenizeDatasetLoader test!") |
@@ -0,0 +1,27 @@ | |||||
import torch | |||||
import unittest | |||||
from fastNLP.modules.encoder.masked_rnn import MaskedRNN | |||||
class TestMaskedRnn(unittest.TestCase): | |||||
def test_case_1(self): | |||||
masked_rnn = MaskedRNN(input_size=1, hidden_size=1, bidirectional=True, batch_first=True) | |||||
x = torch.tensor([[[1.0], [2.0]]]) | |||||
print(x.size()) | |||||
y = masked_rnn(x) | |||||
mask = torch.tensor([[[1], [1]]]) | |||||
y = masked_rnn(x, mask=mask) | |||||
mask = torch.tensor([[[1], [0]]]) | |||||
y = masked_rnn(x, mask=mask) | |||||
def test_case_2(self): | |||||
masked_rnn = MaskedRNN(input_size=1, hidden_size=1, bidirectional=False, batch_first=True) | |||||
x = torch.tensor([[[1.0], [2.0]]]) | |||||
print(x.size()) | |||||
y = masked_rnn(x) | |||||
mask = torch.tensor([[[1], [1]]]) | |||||
y = masked_rnn(x, mask=mask) | |||||
xx = torch.tensor([[[1.0]]]) | |||||
y = masked_rnn.step(xx) | |||||
y = masked_rnn.step(xx, mask=mask) |
@@ -0,0 +1,30 @@ | |||||
import torch | |||||
import unittest | |||||
from fastNLP.modules.other_modules import GroupNorm, LayerNormalization, BiLinear | |||||
class TestGroupNorm(unittest.TestCase): | |||||
def test_case_1(self): | |||||
gn = GroupNorm(num_features=1, num_groups=10, eps=1.5e-5) | |||||
x = torch.randn((20, 50, 10)) | |||||
y = gn(x) | |||||
class TestLayerNormalization(unittest.TestCase): | |||||
def test_case_1(self): | |||||
ln = LayerNormalization(d_hid=5, eps=2e-3) | |||||
x = torch.randn((20, 50, 5)) | |||||
y = ln(x) | |||||
class TestBiLinear(unittest.TestCase): | |||||
def test_case_1(self): | |||||
bl = BiLinear(n_left=5, n_right=5, n_out=10, bias=True) | |||||
x_left = torch.randn((7, 10, 20, 5)) | |||||
x_right = torch.randn((7, 10, 20, 5)) | |||||
y = bl(x_left, x_right) | |||||
print(bl) | |||||
bl2 = BiLinear(n_left=15, n_right=15, n_out=10, bias=True) |
@@ -0,0 +1,18 @@ | |||||
import torch | |||||
import numpy as np | |||||
import unittest | |||||
import fastNLP.modules.utils as utils | |||||
class TestUtils(unittest.TestCase): | |||||
def test_case_1(self): | |||||
a = torch.tensor([ | |||||
[1, 2, 3, 4, 5], [2, 3, 4, 5, 6] | |||||
]) | |||||
utils.orthogonal(a) | |||||
def test_case_2(self): | |||||
a = np.random.rand(100, 100) | |||||
utils.mst(a) | |||||
@@ -0,0 +1,28 @@ | |||||
import torch | |||||
import unittest | |||||
from fastNLP.modules.encoder.variational_rnn import VarMaskedFastLSTM | |||||
class TestMaskedRnn(unittest.TestCase): | |||||
def test_case_1(self): | |||||
masked_rnn = VarMaskedFastLSTM(input_size=1, hidden_size=1, bidirectional=True, batch_first=True) | |||||
x = torch.tensor([[[1.0], [2.0]]]) | |||||
print(x.size()) | |||||
y = masked_rnn(x) | |||||
mask = torch.tensor([[[1], [1]]]) | |||||
y = masked_rnn(x, mask=mask) | |||||
mask = torch.tensor([[[1], [0]]]) | |||||
y = masked_rnn(x, mask=mask) | |||||
def test_case_2(self): | |||||
masked_rnn = VarMaskedFastLSTM(input_size=1, hidden_size=1, bidirectional=False, batch_first=True) | |||||
x = torch.tensor([[[1.0], [2.0]]]) | |||||
print(x.size()) | |||||
y = masked_rnn(x) | |||||
mask = torch.tensor([[[1], [1]]]) | |||||
y = masked_rnn(x, mask=mask) | |||||
xx = torch.tensor([[[1.0]]]) | |||||
#y, hidden = masked_rnn.step(xx) | |||||
#step() still has a bug | |||||
#y, hidden = masked_rnn.step(xx, mask=mask) |
@@ -20,7 +20,7 @@ class MyNERTrainer(SeqLabelTrainer): | |||||
override | override | ||||
:return: | :return: | ||||
""" | """ | ||||
self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001) | |||||
self.optimizer = torch.optim.Adam(self._model.parameters(), lr=0.001) | |||||
self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=3000, gamma=0.5) | self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=3000, gamma=0.5) | ||||
def update(self): | def update(self): | ||||
@@ -13,6 +13,7 @@ from fastNLP.loader.dataset_loader import ClassDatasetLoader | |||||
from fastNLP.models.base_model import BaseModel | from fastNLP.models.base_model import BaseModel | ||||
from fastNLP.modules import aggregation | from fastNLP.modules import aggregation | ||||
from fastNLP.modules import encoder | from fastNLP.modules import encoder | ||||
from fastNLP.modules import decoder | |||||
class ClassificationModel(BaseModel): | class ClassificationModel(BaseModel): | ||||
@@ -20,20 +21,20 @@ class ClassificationModel(BaseModel): | |||||
Simple text classification model based on CNN. | Simple text classification model based on CNN. | ||||
""" | """ | ||||
def __init__(self, class_num, vocab_size): | |||||
def __init__(self, num_classes, vocab_size): | |||||
super(ClassificationModel, self).__init__() | super(ClassificationModel, self).__init__() | ||||
self.embed = encoder.Embedding(nums=vocab_size, dims=300) | |||||
self.conv = encoder.Conv( | |||||
self.emb = encoder.Embedding(nums=vocab_size, dims=300) | |||||
self.enc = encoder.Conv( | |||||
in_channels=300, out_channels=100, kernel_size=3) | in_channels=300, out_channels=100, kernel_size=3) | ||||
self.pool = aggregation.MaxPool() | |||||
self.output = encoder.Linear(input_size=100, output_size=class_num) | |||||
self.agg = aggregation.MaxPool() | |||||
self.dec = decoder.MLP(100, num_classes=num_classes) | |||||
def forward(self, x): | def forward(self, x): | ||||
x = self.embed(x) # [N,L] -> [N,L,C] | |||||
x = self.conv(x) # [N,L,C_in] -> [N,L,C_out] | |||||
x = self.pool(x) # [N,L,C] -> [N,C] | |||||
x = self.output(x) # [N,C] -> [N, N_class] | |||||
x = self.emb(x) # [N,L] -> [N,L,C] | |||||
x = self.enc(x) # [N,L,C_in] -> [N,L,C_out] | |||||
x = self.agg(x) # [N,L,C] -> [N,C] | |||||
x = self.dec(x) # [N,C] -> [N, N_class] | |||||
return x | return x | ||||
@@ -55,7 +56,7 @@ model_args = { | |||||
'num_classes': n_classes, | 'num_classes': n_classes, | ||||
'vocab_size': vocab_size | 'vocab_size': vocab_size | ||||
} | } | ||||
model = ClassificationModel(class_num=n_classes, vocab_size=vocab_size) | |||||
model = ClassificationModel(num_classes=n_classes, vocab_size=vocab_size) | |||||
# train model | # train model | ||||
train_args = { | train_args = { | ||||
@@ -75,4 +76,4 @@ trainer.cross_validate(model) | |||||
# predict using model | # predict using model | ||||
data_infer = [x[0] for x in data] | data_infer = [x[0] for x in data] | ||||
infer = ClassificationInfer(data_dir) | infer = ClassificationInfer(data_dir) | ||||
labels_pred = infer.predict(model, data_infer) | |||||
labels_pred = infer.predict(model, data_infer) |
@@ -1,7 +1,7 @@ | |||||
import os | |||||
import sys | import sys | ||||
sys.path.append("..") | sys.path.append("..") | ||||
import argparse | |||||
from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | ||||
from fastNLP.core.trainer import SeqLabelTrainer | from fastNLP.core.trainer import SeqLabelTrainer | ||||
from fastNLP.loader.dataset_loader import POSDatasetLoader, BaseLoader | from fastNLP.loader.dataset_loader import POSDatasetLoader, BaseLoader | ||||
@@ -11,17 +11,29 @@ from fastNLP.loader.model_loader import ModelLoader | |||||
from fastNLP.core.tester import SeqLabelTester | from fastNLP.core.tester import SeqLabelTester | ||||
from fastNLP.models.sequence_modeling import SeqLabeling | from fastNLP.models.sequence_modeling import SeqLabeling | ||||
from fastNLP.core.predictor import SeqLabelInfer | from fastNLP.core.predictor import SeqLabelInfer | ||||
from fastNLP.core.optimizer import Optimizer | |||||
parser = argparse.ArgumentParser() | |||||
parser.add_argument("-s", "--save", type=str, default="./seq_label/", help="path to save pickle files") | |||||
parser.add_argument("-t", "--train", type=str, default="./data_for_tests/people.txt", | |||||
help="path to the training data") | |||||
parser.add_argument("-c", "--config", type=str, default="./data_for_tests/config", help="path to the config file") | |||||
parser.add_argument("-m", "--model_name", type=str, default="seq_label_model.pkl", help="the name of the model") | |||||
parser.add_argument("-i", "--infer", type=str, default="data_for_tests/people_infer.txt", | |||||
help="data used for inference") | |||||
data_name = "people.txt" | |||||
data_path = "data_for_tests/people.txt" | |||||
pickle_path = "seq_label/" | |||||
data_infer_path = "data_for_tests/people_infer.txt" | |||||
args = parser.parse_args() | |||||
pickle_path = args.save | |||||
model_name = args.model_name | |||||
config_dir = args.config | |||||
data_path = args.train | |||||
data_infer_path = args.infer | |||||
def infer(): | def infer(): | ||||
# Load infer configuration, the same as test | # Load infer configuration, the same as test | ||||
test_args = ConfigSection() | test_args = ConfigSection() | ||||
ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args}) | |||||
ConfigLoader("config.cfg", "").load_config(config_dir, {"POS_infer": test_args}) | |||||
# fetch dictionary size and number of labels from pickle files | # fetch dictionary size and number of labels from pickle files | ||||
word2index = load_pickle(pickle_path, "word2id.pkl") | word2index = load_pickle(pickle_path, "word2id.pkl") | ||||
@@ -33,11 +45,11 @@ def infer(): | |||||
model = SeqLabeling(test_args) | model = SeqLabeling(test_args) | ||||
# Dump trained parameters into the model | # Dump trained parameters into the model | ||||
ModelLoader.load_pytorch(model, pickle_path + "saved_model.pkl") | |||||
ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name)) | |||||
print("model loaded!") | print("model loaded!") | ||||
# Data Loader | # Data Loader | ||||
raw_data_loader = BaseLoader(data_name, data_infer_path) | |||||
raw_data_loader = BaseLoader("xxx", data_infer_path) | |||||
infer_data = raw_data_loader.load_lines() | infer_data = raw_data_loader.load_lines() | ||||
# Inference interface | # Inference interface | ||||
@@ -51,49 +63,72 @@ def infer(): | |||||
def train_and_test(): | def train_and_test(): | ||||
# Config Loader | # Config Loader | ||||
train_args = ConfigSection() | |||||
ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args}) | |||||
trainer_args = ConfigSection() | |||||
model_args = ConfigSection() | |||||
ConfigLoader("config.cfg", "").load_config(config_dir, { | |||||
"test_seq_label_trainer": trainer_args, "test_seq_label_model": model_args}) | |||||
# Data Loader | # Data Loader | ||||
pos_loader = POSDatasetLoader(data_name, data_path) | |||||
pos_loader = POSDatasetLoader("xxx", data_path) | |||||
train_data = pos_loader.load_lines() | train_data = pos_loader.load_lines() | ||||
# Preprocessor | # Preprocessor | ||||
p = SeqLabelPreprocess() | p = SeqLabelPreprocess() | ||||
data_train, data_dev = p.run(train_data, pickle_path=pickle_path, train_dev_split=0.5) | data_train, data_dev = p.run(train_data, pickle_path=pickle_path, train_dev_split=0.5) | ||||
train_args["vocab_size"] = p.vocab_size | |||||
train_args["num_classes"] = p.num_classes | |||||
# Trainer | |||||
trainer = SeqLabelTrainer(train_args) | |||||
model_args["vocab_size"] = p.vocab_size | |||||
model_args["num_classes"] = p.num_classes | |||||
# Trainer: two definition styles | |||||
# 1 | |||||
# trainer = SeqLabelTrainer(trainer_args.data) | |||||
# 2 | |||||
trainer = SeqLabelTrainer( | |||||
epochs=trainer_args["epochs"], | |||||
batch_size=trainer_args["batch_size"], | |||||
validate=trainer_args["validate"], | |||||
use_cuda=trainer_args["use_cuda"], | |||||
pickle_path=pickle_path, | |||||
save_best_dev=trainer_args["save_best_dev"], | |||||
model_name=model_name, | |||||
optimizer=Optimizer("SGD", lr=0.01, momentum=0.9), | |||||
) | |||||
# Model | # Model | ||||
model = SeqLabeling(train_args) | |||||
model = SeqLabeling(model_args) | |||||
# Start training | # Start training | ||||
trainer.train(model, data_train, data_dev) | trainer.train(model, data_train, data_dev) | ||||
print("Training finished!") | print("Training finished!") | ||||
# Saver | # Saver | ||||
saver = ModelSaver(pickle_path + "saved_model.pkl") | |||||
saver = ModelSaver(os.path.join(pickle_path, model_name)) | |||||
saver.save_pytorch(model) | saver.save_pytorch(model) | ||||
print("Model saved!") | print("Model saved!") | ||||
del model, trainer, pos_loader | del model, trainer, pos_loader | ||||
# Define the same model | # Define the same model | ||||
model = SeqLabeling(train_args) | |||||
model = SeqLabeling(model_args) | |||||
# Dump trained parameters into the model | # Dump trained parameters into the model | ||||
ModelLoader.load_pytorch(model, pickle_path + "saved_model.pkl") | |||||
ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name)) | |||||
print("model loaded!") | print("model loaded!") | ||||
# Load test configuration | # Load test configuration | ||||
test_args = ConfigSection() | |||||
ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args}) | |||||
tester_args = ConfigSection() | |||||
ConfigLoader("config.cfg", "").load_config(config_dir, {"test_seq_label_tester": tester_args}) | |||||
# Tester | # Tester | ||||
tester = SeqLabelTester(test_args) | |||||
tester = SeqLabelTester(save_output=False, | |||||
save_loss=False, | |||||
save_best_dev=False, | |||||
batch_size=4, | |||||
use_cuda=False, | |||||
pickle_path=pickle_path, | |||||
model_name="seq_label_in_test.pkl", | |||||
print_every_step=1 | |||||
) | |||||
# Start testing with validation data | # Start testing with validation data | ||||
tester.test(model, data_dev) | tester.test(model, data_dev) | ||||
@@ -1,13 +1,24 @@ | |||||
import sys | |||||
sys.path.append("..") | |||||
from fastNLP.fastnlp import FastNLP | from fastNLP.fastnlp import FastNLP | ||||
from fastNLP.fastnlp import interpret_word_seg_results | |||||
PATH_TO_CWS_PICKLE_FILES = "/home/zyfeng/fastNLP/reproduction/chinese_word_segment/save/" | |||||
def word_seg(): | def word_seg(): | ||||
nlp = FastNLP("./data_for_tests/") | |||||
nlp.load("seq_label_model") | |||||
text = "这是最好的基于深度学习的中文分词系统。" | |||||
result = nlp.run(text) | |||||
print(result) | |||||
print("FastNLP finished!") | |||||
nlp = FastNLP(model_dir=PATH_TO_CWS_PICKLE_FILES) | |||||
nlp.load("cws_basic_model", config_file="cws.cfg", section_name="POS_test") | |||||
text = ["这是最好的基于深度学习的中文分词系统。", | |||||
"大王叫我来巡山。", | |||||
"我党多年来致力于改善人民生活水平。"] | |||||
results = nlp.run(text) | |||||
print(results) | |||||
for example in results: | |||||
words, labels = [], [] | |||||
for res in example: | |||||
words.append(res[0]) | |||||
labels.append(res[1]) | |||||
print(interpret_word_seg_results(words, labels)) | |||||
def text_class(): | def text_class(): | ||||
@@ -19,5 +30,14 @@ def text_class(): | |||||
print("FastNLP finished!") | print("FastNLP finished!") | ||||
def test_word_seg_interpret(): | |||||
foo = [[('这', 'S'), ('是', 'S'), ('最', 'S'), ('好', 'S'), ('的', 'S'), ('基', 'B'), ('于', 'E'), ('深', 'B'), ('度', 'E'), | |||||
('学', 'B'), ('习', 'E'), ('的', 'S'), ('中', 'B'), ('文', 'E'), ('分', 'B'), ('词', 'E'), ('系', 'B'), ('统', 'E'), | |||||
('。', 'S')]] | |||||
chars = [x[0] for x in foo[0]] | |||||
labels = [x[1] for x in foo[0]] | |||||
print(interpret_word_seg_results(chars, labels)) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
text_class() | |||||
word_seg() |
@@ -1,6 +1,7 @@ | |||||
# Python: 3.5 | # Python: 3.5 | ||||
# encoding: utf-8 | # encoding: utf-8 | ||||
import argparse | |||||
import os | import os | ||||
import sys | import sys | ||||
@@ -13,75 +14,105 @@ from fastNLP.loader.model_loader import ModelLoader | |||||
from fastNLP.core.preprocess import ClassPreprocess | from fastNLP.core.preprocess import ClassPreprocess | ||||
from fastNLP.models.cnn_text_classification import CNNText | from fastNLP.models.cnn_text_classification import CNNText | ||||
from fastNLP.saver.model_saver import ModelSaver | from fastNLP.saver.model_saver import ModelSaver | ||||
from fastNLP.core.optimizer import Optimizer | |||||
from fastNLP.core.loss import Loss | |||||
save_path = "./test_classification/" | |||||
data_dir = "./data_for_tests/" | |||||
train_file = 'text_classify.txt' | |||||
model_name = "model_class.pkl" | |||||
parser = argparse.ArgumentParser() | |||||
parser.add_argument("-s", "--save", type=str, default="./test_classification/", help="path to save pickle files") | |||||
parser.add_argument("-t", "--train", type=str, default="./data_for_tests/text_classify.txt", | |||||
help="path to the training data") | |||||
parser.add_argument("-c", "--config", type=str, default="./data_for_tests/config", help="path to the config file") | |||||
parser.add_argument("-m", "--model_name", type=str, default="classify_model.pkl", help="the name of the model") | |||||
args = parser.parse_args() | |||||
save_dir = args.save | |||||
train_data_dir = args.train | |||||
model_name = args.model_name | |||||
config_dir = args.config | |||||
def infer(): | def infer(): | ||||
# load dataset | # load dataset | ||||
print("Loading data...") | print("Loading data...") | ||||
ds_loader = ClassDatasetLoader("train", os.path.join(data_dir, train_file)) | |||||
ds_loader = ClassDatasetLoader("train", train_data_dir) | |||||
data = ds_loader.load() | data = ds_loader.load() | ||||
unlabeled_data = [x[0] for x in data] | unlabeled_data = [x[0] for x in data] | ||||
# pre-process data | # pre-process data | ||||
pre = ClassPreprocess() | pre = ClassPreprocess() | ||||
vocab_size, n_classes = pre.run(data, pickle_path=save_path) | |||||
print("vocabulary size:", vocab_size) | |||||
print("number of classes:", n_classes) | |||||
data = pre.run(data, pickle_path=save_dir) | |||||
print("vocabulary size:", pre.vocab_size) | |||||
print("number of classes:", pre.num_classes) | |||||
model_args = ConfigSection() | model_args = ConfigSection() | ||||
ConfigLoader.load_config("data_for_tests/config", {"text_class_model": model_args}) | |||||
# TODO: load from config file | |||||
model_args["vocab_size"] = pre.vocab_size | |||||
model_args["num_classes"] = pre.num_classes | |||||
# ConfigLoader.load_config(config_dir, {"text_class_model": model_args}) | |||||
# construct model | # construct model | ||||
print("Building model...") | print("Building model...") | ||||
cnn = CNNText(model_args) | cnn = CNNText(model_args) | ||||
# Dump trained parameters into the model | # Dump trained parameters into the model | ||||
ModelLoader.load_pytorch(cnn, "./data_for_tests/saved_model.pkl") | |||||
ModelLoader.load_pytorch(cnn, os.path.join(save_dir, model_name)) | |||||
print("model loaded!") | print("model loaded!") | ||||
infer = ClassificationInfer(data_dir) | |||||
infer = ClassificationInfer(pickle_path=save_dir) | |||||
results = infer.predict(cnn, unlabeled_data) | results = infer.predict(cnn, unlabeled_data) | ||||
print(results) | print(results) | ||||
def train(): | def train(): | ||||
train_args, model_args = ConfigSection(), ConfigSection() | train_args, model_args = ConfigSection(), ConfigSection() | ||||
ConfigLoader.load_config("data_for_tests/config", {"text_class": train_args, "text_class_model": model_args}) | |||||
ConfigLoader.load_config(config_dir, {"text_class": train_args}) | |||||
# load dataset | # load dataset | ||||
print("Loading data...") | print("Loading data...") | ||||
ds_loader = ClassDatasetLoader("train", os.path.join(data_dir, train_file)) | |||||
ds_loader = ClassDatasetLoader("train", train_data_dir) | |||||
data = ds_loader.load() | data = ds_loader.load() | ||||
print(data[0]) | print(data[0]) | ||||
# pre-process data | # pre-process data | ||||
pre = ClassPreprocess() | pre = ClassPreprocess() | ||||
data_train = pre.run(data, pickle_path=save_path) | |||||
data_train = pre.run(data, pickle_path=save_dir) | |||||
print("vocabulary size:", pre.vocab_size) | print("vocabulary size:", pre.vocab_size) | ||||
print("number of classes:", pre.num_classes) | print("number of classes:", pre.num_classes) | ||||
model_args["num_classes"] = pre.num_classes | |||||
model_args["vocab_size"] = pre.vocab_size | |||||
# construct model | # construct model | ||||
print("Building model...") | print("Building model...") | ||||
model = CNNText(model_args) | model = CNNText(model_args) | ||||
# ConfigSaver().save_config(config_dir, {"text_class_model": model_args}) | |||||
# train | # train | ||||
print("Training...") | print("Training...") | ||||
trainer = ClassificationTrainer(train_args) | |||||
# 1 | |||||
# trainer = ClassificationTrainer(train_args) | |||||
# 2 | |||||
trainer = ClassificationTrainer(epochs=train_args["epochs"], | |||||
batch_size=train_args["batch_size"], | |||||
validate=train_args["validate"], | |||||
use_cuda=train_args["use_cuda"], | |||||
pickle_path=save_dir, | |||||
save_best_dev=train_args["save_best_dev"], | |||||
model_name=model_name, | |||||
loss=Loss("cross_entropy"), | |||||
optimizer=Optimizer("SGD", lr=0.001, momentum=0.9)) | |||||
trainer.train(model, data_train) | trainer.train(model, data_train) | ||||
print("Training finished!") | print("Training finished!") | ||||
saver = ModelSaver("./data_for_tests/saved_model.pkl") | |||||
saver = ModelSaver(os.path.join(save_dir, model_name)) | |||||
saver.save_pytorch(model) | saver.save_pytorch(model) | ||||
print("Model saved!") | print("Model saved!") | ||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
train() | train() | ||||
# infer() | |||||
infer() |