@@ -1,4 +1,4 @@ | |||||
from saver.logger import Logger | |||||
import numpy as np | |||||
class Action(object): | class Action(object): | ||||
@@ -8,39 +8,64 @@ class Action(object): | |||||
def __init__(self): | def __init__(self): | ||||
super(Action, self).__init__() | super(Action, self).__init__() | ||||
self.logger = Logger("logger_output.txt") | |||||
def load_config(self, args): | |||||
raise NotImplementedError | |||||
def load_dataset(self, args): | |||||
class BaseSampler(object): | |||||
""" | |||||
Base class for all samplers. | |||||
""" | |||||
def __init__(self, data_set): | |||||
self.data_set_length = len(data_set) | |||||
def __len__(self): | |||||
return self.data_set_length | |||||
def __iter__(self): | |||||
raise NotImplementedError | raise NotImplementedError | ||||
def log(self, string): | |||||
self.logger.log(string) | |||||
def batchify(self, batch_size, X, Y=None): | |||||
""" | |||||
:param batch_size: int | |||||
:param X: feature matrix of size [n_sample, m_feature] | |||||
:param Y: label vector of size [n_sample, 1] (optional) | |||||
:return iteration:int, the number of step in each epoch | |||||
generator:generator, to generate batch inputs | |||||
""" | |||||
n_samples = X.shape[0] | |||||
num_iter = n_samples // batch_size | |||||
if Y is None: | |||||
generator = self._batch_generate(batch_size, num_iter, X) | |||||
else: | |||||
generator = self._batch_generate(batch_size, num_iter, X, Y) | |||||
return num_iter, generator | |||||
@staticmethod | |||||
def _batch_generate(batch_size, num_iter, *data): | |||||
for step in range(num_iter): | |||||
start = batch_size * step | |||||
end = batch_size * (step + 1) | |||||
yield tuple([x[start:end] for x in data]) | |||||
def make_log(self, *args): | |||||
return "log" | |||||
class SequentialSampler(BaseSampler): | |||||
""" | |||||
Sample data in the original order. | |||||
""" | |||||
def __init__(self, data_set): | |||||
super(SequentialSampler, self).__init__(data_set) | |||||
def __iter__(self): | |||||
return iter(range(self.data_set_length)) | |||||
class RandomSampler(BaseSampler): | |||||
""" | |||||
Sample data in random permutation order. | |||||
""" | |||||
def __init__(self, data_set): | |||||
super(RandomSampler, self).__init__(data_set) | |||||
def __iter__(self): | |||||
return iter(np.random.permutation(self.data_set_length)) | |||||
class Batchifier(object): | |||||
""" | |||||
Wrap random or sequential sampler to generate a mini-batch. | |||||
""" | |||||
def __init__(self, sampler, batch_size, drop_last=True): | |||||
super(Batchifier, self).__init__() | |||||
self.sampler = sampler | |||||
self.batch_size = batch_size | |||||
self.drop_last = drop_last | |||||
def __iter__(self): | |||||
batch = [] | |||||
for idx in self.sampler: | |||||
batch.append(idx) | |||||
if len(batch) == self.batch_size: | |||||
yield batch | |||||
batch = [] | |||||
if len(batch) < self.batch_size and self.drop_last is False: | |||||
yield batch |
@@ -2,7 +2,7 @@ from collections import namedtuple | |||||
import numpy as np | import numpy as np | ||||
from fastNLP.action import Action | |||||
from fastNLP.action.action import Action | |||||
class Tester(Action): | class Tester(Action): | ||||
@@ -1,93 +1,320 @@ | |||||
import _pickle | |||||
from collections import namedtuple | from collections import namedtuple | ||||
from .action import Action | |||||
from .tester import Tester | |||||
import numpy as np | |||||
import torch | |||||
from fastNLP.action.action import Action | |||||
from fastNLP.action.action import RandomSampler, Batchifier | |||||
from fastNLP.action.tester import Tester | |||||
class Trainer(Action): | |||||
""" | |||||
Trainer is a common training pipeline shared among all models. | |||||
class BaseTrainer(Action): | |||||
"""Base trainer for all trainers. | |||||
Trainer receives a model and data, and then performs training. | |||||
Subclasses must implement the following abstract methods: | |||||
- prepare_input | |||||
- mode | |||||
- define_optimizer | |||||
- data_forward | |||||
- grad_backward | |||||
- get_loss | |||||
""" | """ | ||||
TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better", | |||||
"log_per_step", "log_validation", "batch_size"]) | |||||
TrainConfig = namedtuple("config", ["epochs", "validate", "batch_size", "pickle_path"]) | |||||
def __init__(self, train_args): | def __init__(self, train_args): | ||||
""" | """ | ||||
:param train_args: namedtuple | |||||
training parameters | |||||
""" | """ | ||||
super(Trainer, self).__init__() | |||||
super(BaseTrainer, self).__init__() | |||||
self.n_epochs = train_args.epochs | self.n_epochs = train_args.epochs | ||||
self.validate = train_args.validate | self.validate = train_args.validate | ||||
self.save_when_better = train_args.save_when_better | |||||
self.log_per_step = train_args.log_per_step | |||||
self.log_validation = train_args.log_validation | |||||
self.batch_size = train_args.batch_size | self.batch_size = train_args.batch_size | ||||
self.pickle_path = train_args.pickle_path | |||||
self.model = None | |||||
self.iterator = None | |||||
self.loss_func = None | |||||
def train(self, network, train_data, dev_data=None): | |||||
""" | |||||
:param network: the models controller | |||||
:param train_data: raw data for training | |||||
:param dev_data: raw data for validation | |||||
This method will call all the base methods of network (implemented in models.base_model). | |||||
""" | |||||
train_x, train_y = network.prepare_input(train_data) | |||||
def train(self, network): | |||||
"""General training loop. | |||||
:param network: a model | |||||
iterations, train_batch_generator = self.batchify(self.batch_size, train_x, train_y) | |||||
The method is framework independent. | |||||
Work by calling the following methods: | |||||
- prepare_input | |||||
- mode | |||||
- define_optimizer | |||||
- data_forward | |||||
- get_loss | |||||
- grad_backward | |||||
- update | |||||
Subclasses must implement these methods with a specific framework. | |||||
""" | |||||
self.model = network | |||||
data_train, data_dev, data_test, embedding = self.prepare_input(self.pickle_path) | |||||
test_args = Tester.TestConfig(save_output=True, validate_in_training=True, | test_args = Tester.TestConfig(save_output=True, validate_in_training=True, | ||||
save_dev_input=True, save_loss=True, batch_size=self.batch_size) | save_dev_input=True, save_loss=True, batch_size=self.batch_size) | ||||
evaluator = Tester(test_args) | evaluator = Tester(test_args) | ||||
best_loss = 1e10 | best_loss = 1e10 | ||||
loss_history = list() | |||||
iterations = len(data_train) // self.batch_size | |||||
for epoch in range(self.n_epochs): | for epoch in range(self.n_epochs): | ||||
network.mode(test=False) # turn on the train mode | |||||
self.mode(test=False) | |||||
network.define_optimizer() | |||||
self.define_optimizer() | |||||
for step in range(iterations): | for step in range(iterations): | ||||
batch_x, batch_y = train_batch_generator.__next__() | |||||
prediction = network.data_forward(batch_x) | |||||
batch_x, batch_y = self.batchify(self.batch_size, data_train) | |||||
loss = network.get_loss(prediction, batch_y) | |||||
network.grad_backward() | |||||
prediction = self.data_forward(network, batch_x) | |||||
if step % self.log_per_step == 0: | |||||
print("step ", step) | |||||
loss_history.append(loss) | |||||
self.log(self.make_log(epoch, step, loss)) | |||||
loss = self.get_loss(prediction, batch_y) | |||||
self.grad_backward(loss) | |||||
self.update() | |||||
#################### evaluate over dev set ################### | |||||
if self.validate: | if self.validate: | ||||
if dev_data is None: | |||||
if data_dev is None: | |||||
raise RuntimeError("No validation data provided.") | raise RuntimeError("No validation data provided.") | ||||
# give all controls to tester | |||||
evaluator.test(network, dev_data) | |||||
if self.log_validation: | |||||
self.log(self.make_valid_log(epoch, evaluator.loss)) | |||||
evaluator.test(network, data_dev) | |||||
if evaluator.loss < best_loss: | if evaluator.loss < best_loss: | ||||
best_loss = evaluator.loss | best_loss = evaluator.loss | ||||
if self.save_when_better: | |||||
self.save_model(network) | |||||
# finish training | # finish training | ||||
def make_log(self, *args): | |||||
return "make a log" | |||||
def prepare_input(self, data_path): | |||||
""" | |||||
To do: Load pkl files of train/dev/test and embedding | |||||
""" | |||||
data_train = _pickle.load(open(data_path + "data_train.pkl", "rb")) | |||||
data_dev = _pickle.load(open(data_path + "data_dev.pkl", "rb")) | |||||
data_test = _pickle.load(open(data_path + "data_test.pkl", "rb")) | |||||
embedding = _pickle.load(open(data_path + "embedding.pkl", "rb")) | |||||
return data_train, data_dev, data_test, embedding | |||||
def make_valid_log(self, *args): | |||||
return "make a valid log" | |||||
def mode(self, test=False): | |||||
""" | |||||
Tell the network to be trained or not. | |||||
:param test: bool | |||||
""" | |||||
raise NotImplementedError | |||||
def save_model(self, model): | |||||
model.save() | |||||
def define_optimizer(self): | |||||
""" | |||||
Define framework-specific optimizer specified by the models. | |||||
""" | |||||
raise NotImplementedError | |||||
def load_data(self, data_name): | |||||
print("load data") | |||||
def update(self): | |||||
""" | |||||
Perform weight update on a model. | |||||
def load_config(self, args): | |||||
For PyTorch, just call optimizer to update. | |||||
""" | |||||
raise NotImplementedError | raise NotImplementedError | ||||
def load_dataset(self, args): | |||||
def data_forward(self, network, x): | |||||
""" | |||||
Forward pass of the data. | |||||
:param network: a model | |||||
:param x: input feature matrix and label vector | |||||
:return: output by the models | |||||
For PyTorch, just do "network(*x)" | |||||
""" | |||||
raise NotImplementedError | |||||
def grad_backward(self, loss): | |||||
""" | |||||
Compute gradient with link rules. | |||||
:param loss: a scalar where back-prop starts | |||||
For PyTorch, just do "loss.backward()" | |||||
""" | |||||
raise NotImplementedError | |||||
def get_loss(self, predict, truth): | |||||
""" | |||||
Compute loss given prediction and ground truth. | |||||
:param predict: prediction label vector | |||||
:param truth: ground truth label vector | |||||
:return: a scalar | |||||
""" | |||||
if self.loss_func is None: | |||||
if hasattr(self.model, "loss"): | |||||
self.loss_func = self.model.loss | |||||
else: | |||||
self.define_loss() | |||||
return self.loss_func(predict, truth) | |||||
def define_loss(self): | |||||
""" | |||||
Assign an instance of loss function to self.loss_func | |||||
E.g. self.loss_func = nn.CrossEntropyLoss() | |||||
""" | |||||
raise NotImplementedError | raise NotImplementedError | ||||
def batchify(self, batch_size, data): | |||||
""" | |||||
1. Perform batching from data and produce a batch of training data. | |||||
2. Add padding. | |||||
:param batch_size: int, the size of a batch | |||||
:param data: list. Each entry is a sample, which is also a list of features and label(s). | |||||
E.g. | |||||
[ | |||||
[[feature_1, feature_2, feature_3], [label_1. label_2]], # sample 1 | |||||
[[feature_1, feature_2, feature_3], [label_1. label_2]], # sample 2 | |||||
... | |||||
] | |||||
:return batch_x: list. Each entry is a list of features of a sample. | |||||
batch_y: list. Each entry is a list of labels of a sample. | |||||
""" | |||||
if self.iterator is None: | |||||
self.iterator = iter(Batchifier(RandomSampler(data), batch_size, drop_last=True)) | |||||
indices = next(self.iterator) | |||||
batch = [data[idx] for idx in indices] | |||||
batch_x = [sample[0] for sample in batch] | |||||
batch_y = [sample[1] for sample in batch] | |||||
batch_x = self.pad(batch_x) | |||||
return batch_x, batch_y | |||||
@staticmethod | |||||
def pad(batch, fill=0): | |||||
""" | |||||
Pad a batch of samples to maximum length. | |||||
:param batch: list of list | |||||
:param fill: word index to pad, default 0. | |||||
:return: a padded batch | |||||
""" | |||||
max_length = max([len(x) for x in batch]) | |||||
for idx, sample in enumerate(batch): | |||||
if len(sample) < max_length: | |||||
batch[idx] = sample + [fill * (max_length - len(sample))] | |||||
return batch | |||||
class ToyTrainer(BaseTrainer): | |||||
"""A simple trainer for a PyTorch model.""" | |||||
def __init__(self, train_args): | |||||
super(ToyTrainer, self).__init__(train_args) | |||||
self.test_mode = False | |||||
self.weight = np.random.rand(5, 1) | |||||
self.bias = np.random.rand() | |||||
self._loss = 0 | |||||
self._optimizer = None | |||||
def prepare_input(self, data): | |||||
return data[:, :-1], data[:, -1] | |||||
def mode(self, test=False): | |||||
self.model.mode(test) | |||||
def data_forward(self, network, x): | |||||
return np.matmul(x, self.weight) + self.bias | |||||
def grad_backward(self, loss): | |||||
loss.backward() | |||||
def get_loss(self, pred, truth): | |||||
self._loss = np.mean(np.square(pred - truth)) | |||||
return self._loss | |||||
def define_optimizer(self): | |||||
self._optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01) | |||||
def update(self): | |||||
self._optimizer.step() | |||||
class WordSegTrainer(BaseTrainer): | |||||
""" | |||||
reserve for changes | |||||
""" | |||||
def __init__(self, train_args): | |||||
super(WordSegTrainer, self).__init__(train_args) | |||||
self.id2word = None | |||||
self.word2id = None | |||||
self.id2tag = None | |||||
self.tag2id = None | |||||
self.lstm_batch_size = 8 | |||||
self.lstm_seq_len = 32 # Trainer batch_size == lstm_batch_size * lstm_seq_len | |||||
self.hidden_dim = 100 | |||||
self.lstm_num_layers = 2 | |||||
self.vocab_size = 100 | |||||
self.word_emb_dim = 100 | |||||
self.hidden = (self.to_var(torch.zeros(2, self.lstm_batch_size, self.word_emb_dim)), | |||||
self.to_var(torch.zeros(2, self.lstm_batch_size, self.word_emb_dim))) | |||||
self.optimizer = None | |||||
self._loss = None | |||||
self.USE_GPU = False | |||||
def to_var(self, x): | |||||
if torch.cuda.is_available() and self.USE_GPU: | |||||
x = x.cuda() | |||||
return torch.autograd.Variable(x) | |||||
def prepare_input(self, data): | |||||
""" | |||||
perform word indices lookup to convert strings into indices | |||||
:param data: list of string, each string contains word + space + [B, M, E, S] | |||||
:return | |||||
""" | |||||
word_list = [] | |||||
tag_list = [] | |||||
for line in data: | |||||
if len(line) > 2: | |||||
tokens = line.split("#") | |||||
word_list.append(tokens[0]) | |||||
tag_list.append(tokens[2][0]) | |||||
self.id2word = list(set(word_list)) | |||||
self.word2id = {word: idx for idx, word in enumerate(self.id2word)} | |||||
self.id2tag = list(set(tag_list)) | |||||
self.tag2id = {tag: idx for idx, tag in enumerate(self.id2tag)} | |||||
words = np.array([self.word2id[w] for w in word_list]).reshape(-1, 1) | |||||
tags = np.array([self.tag2id[t] for t in tag_list]).reshape(-1, 1) | |||||
return words, tags | |||||
def mode(self, test=False): | |||||
if test: | |||||
self.model.eval() | |||||
else: | |||||
self.model.train() | |||||
def data_forward(self, network, x): | |||||
""" | |||||
:param network: a PyTorch model | |||||
:param x: sequence of length [batch_size], word indices | |||||
:return: | |||||
""" | |||||
x = x.reshape(self.lstm_batch_size, self.lstm_seq_len) | |||||
output, self.hidden = network(x, self.hidden) | |||||
return output | |||||
def define_optimizer(self): | |||||
self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.85) | |||||
def get_loss(self, predict, truth): | |||||
self._loss = torch.nn.CrossEntropyLoss(predict, truth) | |||||
return self._loss | |||||
def grad_backward(self, network): | |||||
self.model.zero_grad() | |||||
self._loss.backward() | |||||
torch.nn.utils.clip_grad_norm(self.model.parameters(), 5, norm_type=2) | |||||
def update(self): | |||||
self.optimizer.step() | |||||
if __name__ == "__name__": | |||||
train_args = BaseTrainer.TrainConfig(epochs=1, validate=False, batch_size=3, pickle_path="./") | |||||
trainer = BaseTrainer(train_args) | |||||
data_train = [[[1, 2, 3, 4], [0]] * 10] + [[[1, 3, 5, 2], [1]] * 10] | |||||
trainer.batchify(batch_size=3, data=data_train) |
@@ -1,35 +0,0 @@ | |||||
class BasePreprocess(object): | |||||
def __init__(self, data, pickle_path): | |||||
super(BasePreprocess, self).__init__() | |||||
self.data = data | |||||
self.pickle_path = pickle_path | |||||
if not self.pickle_path.endswith('/'): | |||||
self.pickle_path = self.pickle_path + '/' | |||||
def word2id(self): | |||||
raise NotImplementedError | |||||
def id2word(self): | |||||
raise NotImplementedError | |||||
def class2id(self): | |||||
raise NotImplementedError | |||||
def id2class(self): | |||||
raise NotImplementedError | |||||
def embedding(self): | |||||
raise NotImplementedError | |||||
def data_train(self): | |||||
raise NotImplementedError | |||||
def data_dev(self): | |||||
raise NotImplementedError | |||||
def data_test(self): | |||||
raise NotImplementedError |
@@ -1,9 +1,8 @@ | |||||
from fastNLP.loader.base_loader import BaseLoader | |||||
import configparser | import configparser | ||||
import traceback | |||||
import json | import json | ||||
from fastNLP.loader.base_loader import BaseLoader | |||||
class ConfigLoader(BaseLoader): | class ConfigLoader(BaseLoader): | ||||
"""loader for configuration files""" | """loader for configuration files""" | ||||
@@ -17,14 +16,14 @@ class ConfigLoader(BaseLoader): | |||||
raise NotImplementedError | raise NotImplementedError | ||||
@staticmethod | @staticmethod | ||||
def loadConfig(filePath, sections): | |||||
def load_config(file_path, sections): | |||||
""" | """ | ||||
:param filePath: the path of config file | |||||
:param file_path: the path of config file | |||||
:param sections: the dict of sections | :param sections: the dict of sections | ||||
:return: | :return: | ||||
""" | """ | ||||
cfg = configparser.ConfigParser() | cfg = configparser.ConfigParser() | ||||
cfg.read(filePath) | |||||
cfg.read(file_path) | |||||
for s in sections: | for s in sections: | ||||
attr_list = [i for i in type(sections[s]).__dict__.keys() if | attr_list = [i for i in type(sections[s]).__dict__.keys() if | ||||
not callable(getattr(sections[s], i)) and not i.startswith("__")] | not callable(getattr(sections[s], i)) and not i.startswith("__")] | ||||
@@ -1,6 +1,7 @@ | |||||
from fastNLP.loader.base_loader import BaseLoader | |||||
import os | import os | ||||
from fastNLP.loader.base_loader import BaseLoader | |||||
class DatasetLoader(BaseLoader): | class DatasetLoader(BaseLoader): | ||||
""""loader for data sets""" | """"loader for data sets""" | ||||
@@ -16,7 +17,6 @@ class POSDatasetLoader(DatasetLoader): | |||||
super(POSDatasetLoader, self).__init__(data_name, data_path) | super(POSDatasetLoader, self).__init__(data_name, data_path) | ||||
#self.data_set = self.load() | #self.data_set = self.load() | ||||
def load(self): | def load(self): | ||||
assert os.path.exists(self.data_path) | assert os.path.exists(self.data_path) | ||||
with open(self.data_path, "r", encoding="utf-8") as f: | with open(self.data_path, "r", encoding="utf-8") as f: | ||||
@@ -30,11 +30,11 @@ class POSDatasetLoader(DatasetLoader): | |||||
return lines | return lines | ||||
class ClassficationDatasetLoader(DatasetLoader): | |||||
class ClassificationDatasetLoader(DatasetLoader): | |||||
"""loader for classfication data sets""" | """loader for classfication data sets""" | ||||
def __init__(self, data_name, data_path): | def __init__(self, data_name, data_path): | ||||
super(ClassficationDatasetLoader, data_name) | |||||
super(ClassificationDatasetLoader, data_name).__init__() | |||||
def load(self): | def load(self): | ||||
assert os.path.exists(self.data_path) | assert os.path.exists(self.data_path) | ||||
@@ -58,6 +58,7 @@ class ClassficationDatasetLoader(DatasetLoader): | |||||
dataset.append(sentence) | dataset.append(sentence) | ||||
return dataset | return dataset | ||||
class ConllLoader(DatasetLoader): | class ConllLoader(DatasetLoader): | ||||
"""loader for conll format files""" | """loader for conll format files""" | ||||
@@ -1,25 +1,57 @@ | |||||
import pickle | |||||
import _pickle | import _pickle | ||||
import os | import os | ||||
from fastNLP.loader.base_preprocess import BasePreprocess | |||||
DEFAULT_PADDING_LABEL = '<pad>' #dict index = 0 | |||||
DEFAULT_UNKNOWN_LABEL = '<unk>' #dict index = 1 | |||||
DEFAULT_PADDING_LABEL = '<pad>' # dict index = 0 | |||||
DEFAULT_UNKNOWN_LABEL = '<unk>' # dict index = 1 | |||||
DEFAULT_RESERVED_LABEL = ['<reserved-2>', | DEFAULT_RESERVED_LABEL = ['<reserved-2>', | ||||
'<reserved-3>', | '<reserved-3>', | ||||
'<reserved-4>'] #dict index = 2~4 | |||||
#the first vocab in dict with the index = 5 | |||||
'<reserved-4>'] # dict index = 2~4 | |||||
# the first vocab in dict with the index = 5 | |||||
class BasePreprocess(object): | |||||
def __init__(self, data, pickle_path): | |||||
super(BasePreprocess, self).__init__() | |||||
self.data = data | |||||
self.pickle_path = pickle_path | |||||
if not self.pickle_path.endswith('/'): | |||||
self.pickle_path = self.pickle_path + '/' | |||||
def word2id(self): | |||||
raise NotImplementedError | |||||
def id2word(self): | |||||
raise NotImplementedError | |||||
def class2id(self): | |||||
raise NotImplementedError | |||||
def id2class(self): | |||||
raise NotImplementedError | |||||
def embedding(self): | |||||
raise NotImplementedError | |||||
def data_train(self): | |||||
raise NotImplementedError | |||||
def data_dev(self): | |||||
raise NotImplementedError | |||||
def data_test(self): | |||||
raise NotImplementedError | |||||
class POSPreprocess(BasePreprocess): | class POSPreprocess(BasePreprocess): | ||||
""" | """ | ||||
This class are used to preprocess the pos datasets. | This class are used to preprocess the pos datasets. | ||||
In these datasets, each line are divided by '\t' | |||||
while the first Col is the vocabulary and the second | |||||
Col is the label. | |||||
In these datasets, each line is divided by '\t' | |||||
The first Col is the vocabulary. | |||||
The second Col is the labels. | |||||
Different sentence are divided by an empty line. | Different sentence are divided by an empty line. | ||||
e.g: | e.g: | ||||
Tom label1 | Tom label1 | ||||
@@ -36,7 +68,9 @@ class POSPreprocess(BasePreprocess): | |||||
""" | """ | ||||
def __init__(self, data, pickle_path): | def __init__(self, data, pickle_path): | ||||
super(POSPreprocess, self).__init(data, pickle_path) | |||||
super(POSPreprocess, self).__init__(data, pickle_path) | |||||
self.word_dict = None | |||||
self.label_dict = None | |||||
self.build_dict() | self.build_dict() | ||||
self.word2id() | self.word2id() | ||||
self.id2word() | self.id2word() | ||||
@@ -46,8 +80,6 @@ class POSPreprocess(BasePreprocess): | |||||
self.data_train() | self.data_train() | ||||
self.data_dev() | self.data_dev() | ||||
self.data_test() | self.data_test() | ||||
#... | |||||
def build_dict(self): | def build_dict(self): | ||||
self.word_dict = {DEFAULT_PADDING_LABEL: 0, DEFAULT_UNKNOWN_LABEL: 1, | self.word_dict = {DEFAULT_PADDING_LABEL: 0, DEFAULT_UNKNOWN_LABEL: 1, | ||||
@@ -68,7 +100,6 @@ class POSPreprocess(BasePreprocess): | |||||
index = len(self.label_dict) | index = len(self.label_dict) | ||||
self.label_dict[label] = index | self.label_dict[label] = index | ||||
def pickle_exist(self, pickle_name): | def pickle_exist(self, pickle_name): | ||||
""" | """ | ||||
:param pickle_name: the filename of target pickle file | :param pickle_name: the filename of target pickle file | ||||
@@ -82,7 +113,6 @@ class POSPreprocess(BasePreprocess): | |||||
else: | else: | ||||
return False | return False | ||||
def word2id(self): | def word2id(self): | ||||
if self.pickle_exist("word2id.pkl"): | if self.pickle_exist("word2id.pkl"): | ||||
return | return | ||||
@@ -92,11 +122,10 @@ class POSPreprocess(BasePreprocess): | |||||
with open(file_name, "wb", encoding='utf-8') as f: | with open(file_name, "wb", encoding='utf-8') as f: | ||||
_pickle.dump(self.word_dict, f) | _pickle.dump(self.word_dict, f) | ||||
def id2word(self): | def id2word(self): | ||||
if self.pickle_exist("id2word.pkl"): | if self.pickle_exist("id2word.pkl"): | ||||
return | return | ||||
#nothing will be done if id2word.pkl exists | |||||
# nothing will be done if id2word.pkl exists | |||||
id2word_dict = {} | id2word_dict = {} | ||||
for word in self.word_dict: | for word in self.word_dict: | ||||
@@ -105,7 +134,6 @@ class POSPreprocess(BasePreprocess): | |||||
with open(file_name, "wb", encoding='utf-8') as f: | with open(file_name, "wb", encoding='utf-8') as f: | ||||
_pickle.dump(id2word_dict, f) | _pickle.dump(id2word_dict, f) | ||||
def class2id(self): | def class2id(self): | ||||
if self.pickle_exist("class2id.pkl"): | if self.pickle_exist("class2id.pkl"): | ||||
return | return | ||||
@@ -115,11 +143,10 @@ class POSPreprocess(BasePreprocess): | |||||
with open(file_name, "wb", encoding='utf-8') as f: | with open(file_name, "wb", encoding='utf-8') as f: | ||||
_pickle.dump(self.label_dict, f) | _pickle.dump(self.label_dict, f) | ||||
def id2class(self): | def id2class(self): | ||||
if self.pickle_exist("id2class.pkl"): | if self.pickle_exist("id2class.pkl"): | ||||
return | return | ||||
#nothing will be done if id2class.pkl exists | |||||
# nothing will be done if id2class.pkl exists | |||||
id2class_dict = {} | id2class_dict = {} | ||||
for label in self.label_dict: | for label in self.label_dict: | ||||
@@ -128,17 +155,15 @@ class POSPreprocess(BasePreprocess): | |||||
with open(file_name, "wb", encoding='utf-8') as f: | with open(file_name, "wb", encoding='utf-8') as f: | ||||
_pickle.dump(id2class_dict, f) | _pickle.dump(id2class_dict, f) | ||||
def embedding(self): | def embedding(self): | ||||
if self.pickle_exist("embedding.pkl"): | if self.pickle_exist("embedding.pkl"): | ||||
return | return | ||||
#nothing will be done if embedding.pkl exists | |||||
# nothing will be done if embedding.pkl exists | |||||
def data_train(self): | def data_train(self): | ||||
if self.pickle_exist("data_train.pkl"): | if self.pickle_exist("data_train.pkl"): | ||||
return | return | ||||
#nothing will be done if data_train.pkl exists | |||||
# nothing will be done if data_train.pkl exists | |||||
data_train = [] | data_train = [] | ||||
sentence = [] | sentence = [] | ||||
@@ -1,4 +1,3 @@ | |||||
import numpy as np | |||||
import torch | import torch | ||||
@@ -30,100 +29,6 @@ class BaseModel(torch.nn.Module): | |||||
raise NotImplementedError | raise NotImplementedError | ||||
class BaseController(object): | |||||
"""Base Controller for all controllers. | |||||
This class and its subclasses are actually "controllers" of the PyTorch models. | |||||
They act as an interface between Trainer and the PyTorch models. | |||||
This controller provides the following methods to be called by Trainer. | |||||
- prepare_input | |||||
- mode | |||||
- define_optimizer | |||||
- data_forward | |||||
- grad_backward | |||||
- get_loss | |||||
""" | |||||
def __init__(self): | |||||
""" | |||||
Define PyTorch model parameters here. | |||||
""" | |||||
pass | |||||
def prepare_input(self, data): | |||||
""" | |||||
Perform data transformation from raw input to vector/matrix inputs. | |||||
:param data: raw inputs | |||||
:return (X, Y): tuple, input features and labels | |||||
""" | |||||
raise NotImplementedError | |||||
def mode(self, test=False): | |||||
""" | |||||
Tell the network to be trained or not, required by PyTorch. | |||||
:param test: bool | |||||
""" | |||||
raise NotImplementedError | |||||
def define_optimizer(self): | |||||
""" | |||||
Define PyTorch optimizer specified by the models. | |||||
""" | |||||
raise NotImplementedError | |||||
def data_forward(self, *x): | |||||
""" | |||||
Forward pass of the data. | |||||
:param x: input feature matrix and label vector | |||||
:return: output by the models | |||||
""" | |||||
# required by PyTorch nn | |||||
raise NotImplementedError | |||||
def grad_backward(self): | |||||
""" | |||||
Perform gradient descent to update the models parameters. | |||||
""" | |||||
raise NotImplementedError | |||||
def get_loss(self, pred, truth): | |||||
""" | |||||
Compute loss given models prediction and ground truth. Loss function specified by the models. | |||||
:param pred: prediction label vector | |||||
:param truth: ground truth label vector | |||||
:return: a scalar | |||||
""" | |||||
raise NotImplementedError | |||||
class ToyController(BaseController): | |||||
"""This is for code testing.""" | |||||
def __init__(self): | |||||
super(ToyController, self).__init__() | |||||
self.test_mode = False | |||||
self.weight = np.random.rand(5, 1) | |||||
self.bias = np.random.rand() | |||||
self._loss = 0 | |||||
def prepare_input(self, data): | |||||
return data[:, :-1], data[:, -1] | |||||
def mode(self, test=False): | |||||
self.test_mode = test | |||||
def data_forward(self, x): | |||||
return np.matmul(x, self.weight) + self.bias | |||||
def grad_backward(self): | |||||
print("loss gradient backward") | |||||
def get_loss(self, pred, truth): | |||||
self._loss = np.mean(np.square(pred - truth)) | |||||
return self._loss | |||||
def define_optimizer(self): | |||||
pass | |||||
class Vocabulary(object): | class Vocabulary(object): | ||||
"""A look-up table that allows you to access `Lexeme` objects. The `Vocab` | """A look-up table that allows you to access `Lexeme` objects. The `Vocab` | ||||
@@ -6,11 +6,16 @@ import torch | |||||
import torch.nn as nn | import torch.nn as nn | ||||
import torch.nn.functional as F | import torch.nn.functional as F | ||||
import torch.optim as optim | import torch.optim as optim | ||||
from model.base_model import BaseModel | |||||
from torch.autograd import Variable | from torch.autograd import Variable | ||||
from fastNLP.models.base_model import BaseModel | |||||
USE_GPU = True | USE_GPU = True | ||||
""" | |||||
To be deprecated. | |||||
""" | |||||
class CharLM(BaseModel): | class CharLM(BaseModel): | ||||
""" | """ | ||||
@@ -1,95 +1,6 @@ | |||||
import numpy as np | |||||
import torch | |||||
import torch.nn as nn | import torch.nn as nn | ||||
import torch.optim as optim | |||||
from torch.autograd import Variable | |||||
from fastNLP.models.base_model import BaseModel, BaseController | |||||
USE_GPU = True | |||||
def to_var(x): | |||||
if torch.cuda.is_available() and USE_GPU: | |||||
x = x.cuda() | |||||
return Variable(x) | |||||
class WordSegModel(BaseController): | |||||
""" | |||||
Model controller for WordSeg | |||||
""" | |||||
def __init__(self): | |||||
super(WordSegModel, self).__init__() | |||||
self.id2word = None | |||||
self.word2id = None | |||||
self.id2tag = None | |||||
self.tag2id = None | |||||
self.lstm_batch_size = 8 | |||||
self.lstm_seq_len = 32 # Trainer batch_size == lstm_batch_size * lstm_seq_len | |||||
self.hidden_dim = 100 | |||||
self.lstm_num_layers = 2 | |||||
self.vocab_size = 100 | |||||
self.word_emb_dim = 100 | |||||
self.model = WordSeg(self.hidden_dim, self.lstm_num_layers, self.vocab_size, self.word_emb_dim) | |||||
self.hidden = (to_var(torch.zeros(2, self.lstm_batch_size, self.word_emb_dim)), | |||||
to_var(torch.zeros(2, self.lstm_batch_size, self.word_emb_dim))) | |||||
self.optimizer = None | |||||
self._loss = None | |||||
def prepare_input(self, data): | |||||
""" | |||||
perform word indices lookup to convert strings into indices | |||||
:param data: list of string, each string contains word + space + [B, M, E, S] | |||||
:return | |||||
""" | |||||
word_list = [] | |||||
tag_list = [] | |||||
for line in data: | |||||
if len(line) > 2: | |||||
tokens = line.split("#") | |||||
word_list.append(tokens[0]) | |||||
tag_list.append(tokens[2][0]) | |||||
self.id2word = list(set(word_list)) | |||||
self.word2id = {word: idx for idx, word in enumerate(self.id2word)} | |||||
self.id2tag = list(set(tag_list)) | |||||
self.tag2id = {tag: idx for idx, tag in enumerate(self.id2tag)} | |||||
words = np.array([self.word2id[w] for w in word_list]).reshape(-1, 1) | |||||
tags = np.array([self.tag2id[t] for t in tag_list]).reshape(-1, 1) | |||||
return words, tags | |||||
def mode(self, test=False): | |||||
if test: | |||||
self.model.eval() | |||||
else: | |||||
self.model.train() | |||||
def data_forward(self, x): | |||||
""" | |||||
:param x: sequence of length [batch_size], word indices | |||||
:return: | |||||
""" | |||||
x = x.reshape(self.lstm_batch_size, self.lstm_seq_len) | |||||
output, self.hidden = self.model(x, self.hidden) | |||||
return output | |||||
def define_optimizer(self): | |||||
self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.85) | |||||
def get_loss(self, pred, truth): | |||||
self._loss = nn.CrossEntropyLoss(pred, truth) | |||||
return self._loss | |||||
def grad_backward(self): | |||||
self.model.zero_grad() | |||||
self._loss.backward() | |||||
torch.nn.utils.clip_grad_norm(self.model.parameters(), 5, norm_type=2) | |||||
self.optimizer.step() | |||||
from fastNLP.models.base_model import BaseModel | |||||
class WordSeg(BaseModel): | class WordSeg(BaseModel): | ||||
@@ -0,0 +1,24 @@ | |||||
# python: 3.6 | |||||
# encoding: utf-8 | |||||
import torch.nn as nn | |||||
import torch.nn.functional as F | |||||
class AvgPool(nn.Module): | |||||
"""1-d average pooling module.""" | |||||
def __init__(self, stride=None, padding=0): | |||||
super(AvgPool, self).__init__() | |||||
self.stride = stride | |||||
self.padding = padding | |||||
def forward(self, x): | |||||
# [N,C,L] -> [N,C] | |||||
kernel_size = x.size(2) | |||||
x = F.max_pool1d( | |||||
input=x, | |||||
kernel_size=kernel_size, | |||||
stride=self.stride, | |||||
padding=self.padding) | |||||
return x.squeeze(dim=-1) |
@@ -0,0 +1,28 @@ | |||||
# python: 3.6 | |||||
# encoding: utf-8 | |||||
import torch.nn as nn | |||||
# import torch.nn.functional as F | |||||
class Conv(nn.Module): | |||||
""" | |||||
Basic 1-d convolution module. | |||||
""" | |||||
def __init__(self, in_channels, out_channels, kernel_size, | |||||
stride=1, padding=0, dilation=1, | |||||
groups=1, bias=True): | |||||
super(Conv, self).__init__() | |||||
self.conv = nn.Conv1d( | |||||
in_channels=in_channels, | |||||
out_channels=out_channels, | |||||
kernel_size=kernel_size, | |||||
stride=stride, | |||||
padding=padding, | |||||
dilation=dilation, | |||||
groups=groups, | |||||
bias=bias) | |||||
def forward(self, x): | |||||
return self.conv(x) # [N,C,L] |
@@ -0,0 +1,20 @@ | |||||
# python: 3.6 | |||||
# encoding: utf-8 | |||||
import torch | |||||
import torch.nn as nn | |||||
# import torch.nn.functional as F | |||||
class KMaxPool(nn.Module): | |||||
"""K max-pooling module.""" | |||||
def __init__(self, k): | |||||
super(KMaxPool, self).__init__() | |||||
self.k = k | |||||
def forward(self, x): | |||||
# [N,C,L] -> [N,C*k] | |||||
x, index = torch.topk(x, self.k, dim=-1, sorted=False) | |||||
x = torch.reshape(x, (x.size(0), -1)) | |||||
return x |
@@ -0,0 +1,26 @@ | |||||
# python: 3.6 | |||||
# encoding: utf-8 | |||||
import torch.nn as nn | |||||
import torch.nn.functional as F | |||||
class MaxPool(nn.Module): | |||||
"""1-d max-pooling module.""" | |||||
def __init__(self, stride=None, padding=0, dilation=1): | |||||
super(MaxPool, self).__init__() | |||||
self.stride = stride | |||||
self.padding = padding | |||||
self.dilation = dilation | |||||
def forward(self, x): | |||||
# [N,C,L] -> [N,C] | |||||
kernel_size = x.size(2) | |||||
x = F.max_pool1d( | |||||
input=x, | |||||
kernel_size=kernel_size, | |||||
stride=self.stride, | |||||
padding=self.padding, | |||||
dilation=self.dilation) | |||||
return x.squeeze(dim=-1) |
@@ -0,0 +1,41 @@ | |||||
# Prototype | |||||
## Word2Idx.py | |||||
A mapping model between words and indexes | |||||
## embedding.py | |||||
embedding modules | |||||
Contains a simple encapsulation for torch.nn.Embedding | |||||
## encoder.py | |||||
encoder modules | |||||
Contains a simple encapsulation for torch.nn.LSTM | |||||
## aggregation.py | |||||
aggregation modules | |||||
Contains a self-attention model, according to paper "A Structured Self-attentive Sentence Embedding", https://arxiv.org/abs/1703.03130 | |||||
## predict.py | |||||
predict modules | |||||
Contains a two layers perceptron for classification | |||||
## example.py | |||||
An example showing how to use above modules to build a model | |||||
Contains a model for sentiment analysis on Yelp dataset, and its training and testing procedures. See https://arxiv.org/abs/1703.03130 for more details. | |||||
## prepare.py | |||||
A case of using Word2Idx to build Yelp datasets | |||||
## dataloader.py | |||||
A dataloader for Yelp dataset | |||||
It is an iterable object, returning a zero-padded batch every iteration. | |||||
@@ -0,0 +1,63 @@ | |||||
import collections | |||||
import pickle | |||||
class Word2Idx(): | |||||
""" | |||||
Build a word index according to word frequency. | |||||
If "min_freq" is given, then only words with a frequncy not lesser than min_freq will be kept. | |||||
If "max_num" is given, then at most the most frequent $max_num words will be kept. | |||||
"words" should be a list [ w_1,w_2,...,w_i,...,w_n ] where each w_i is a string representing a word. | |||||
num is the size of the lookup table. | |||||
w2i is a lookup table assigning each word an index. | |||||
i2w is a vector which serves as an invert mapping of w2i. | |||||
Note that index 0 is token "<PAD>" for padding | |||||
index 1 is token "<UNK>" for unregistered words | |||||
e.g. i2w[w2i["word"]] == "word" | |||||
""" | |||||
def __init__(self): | |||||
self.__w2i = dict() | |||||
self.__i2w = [] | |||||
self.num = 0 | |||||
def build(self, words, min_freq=0, max_num=None): | |||||
"""build a model from words""" | |||||
counter = collections.Counter(words) | |||||
word_set = set(words) | |||||
if max_num is not None: | |||||
most_common = counter.most_common(min(len(word_set), max_num - 1)) | |||||
else: | |||||
most_common = counter.most_common() | |||||
self.__w2i = dict((w[0],i + 1) for i,w in enumerate(most_common) if w[1] >= min_freq) | |||||
self.__w2i["<PAD>"] = 0 | |||||
self.__w2i["<UNK>"] = 1 | |||||
self.__i2w = ["<PAD>", "<UNK>"] + [ w[0] for w in most_common if w[1] >= min_freq ] | |||||
self.num = len(self.__i2w) | |||||
def w2i(self, word): | |||||
"""word to index""" | |||||
if word in self.__w2i: | |||||
return self.__w2i[word] | |||||
return 0 | |||||
def i2w(self, idx): | |||||
"""index to word""" | |||||
if idx >= self.num: | |||||
raise Exception("out of range\n") | |||||
return self.__i2w[idx] | |||||
def save(self, addr): | |||||
"""save the model to a file with address "addr" """ | |||||
f = open(addr,"wb") | |||||
pickle.dump([self.__i2w, self.__w2i, self.num], f) | |||||
f.close() | |||||
def load(self, addr): | |||||
"""load a model from a file with address "addr" """ | |||||
f = open(addr,"rb") | |||||
paras = pickle.load(f) | |||||
self.__i2w, self.__w2i, self.num = paras[0], paras[1], paras[2] | |||||
f.close() | |||||
@@ -0,0 +1,40 @@ | |||||
import torch | |||||
import torch.nn as nn | |||||
from torch.autograd import Variable | |||||
class Selfattention(nn.Module): | |||||
""" | |||||
Self Attention Module. | |||||
Args: | |||||
input_size : the size for the input vector | |||||
d_a : the width of weight matrix | |||||
r : the number of encoded vectors | |||||
""" | |||||
def __init__(self, input_size, d_a, r): | |||||
super(Selfattention, self).__init__() | |||||
self.W_s1 = nn.Parameter(torch.randn(d_a, input_size), requires_grad=True) | |||||
self.W_s2 = nn.Parameter(torch.randn(r, d_a), requires_grad=True) | |||||
self.softmax = nn.Softmax(dim=2) | |||||
self.tanh = nn.Tanh() | |||||
def penalization(self, A): | |||||
""" | |||||
compute the penalization term for attention module | |||||
""" | |||||
if self.W_s1.is_cuda: | |||||
I = Variable(torch.eye(A.size(1)).cuda(), requires_grad=False) | |||||
else: | |||||
I = Variable(torch.eye(A.size(1)), requires_grad=False) | |||||
M = torch.matmul(A, torch.transpose(A, 1, 2)) - I | |||||
M = M.view(M.size(0), -1) | |||||
return torch.sum(M ** 2, dim=1) | |||||
def forward(self, x): | |||||
inter = self.tanh(torch.matmul(self.W_s1, torch.transpose(x, 1, 2))) | |||||
A = self.softmax(torch.matmul(self.W_s2, inter)) | |||||
out = torch.matmul(A, x) | |||||
out = out.view(out.size(0), -1) | |||||
penalty = self.penalization(A) | |||||
return out, penalty | |||||
@@ -0,0 +1,81 @@ | |||||
import random | |||||
import pickle | |||||
import torch | |||||
import numpy as np | |||||
from torch.autograd import Variable | |||||
def float_wrapper(x, requires_grad=True, using_cuda=True): | |||||
""" | |||||
transform float type list to pytorch variable | |||||
""" | |||||
if using_cuda==True: | |||||
return Variable(torch.FloatTensor(x).cuda(), requires_grad=requires_grad) | |||||
else: | |||||
return Variable(torch.FloatTensor(x), requires_grad=requires_grad) | |||||
def long_wrapper(x, requires_grad=True, using_cuda=True): | |||||
""" | |||||
transform long type list to pytorch variable | |||||
""" | |||||
if using_cuda==True: | |||||
return Variable(torch.LongTensor(x).cuda(), requires_grad=requires_grad) | |||||
else: | |||||
return Variable(torch.LongTensor(x), requires_grad=requires_grad) | |||||
def pad(X, using_cuda): | |||||
""" | |||||
zero-pad sequnces to same length then pack them together | |||||
""" | |||||
maxlen = max([x.size(0) for x in X]) | |||||
Y = [] | |||||
for x in X: | |||||
padlen = maxlen - x.size(0) | |||||
if padlen > 0: | |||||
if using_cuda: | |||||
paddings = Variable(torch.zeros(padlen).long()).cuda() | |||||
else: | |||||
paddings = Variable(torch.zeros(padlen).long()) | |||||
x_ = torch.cat((x, paddings), 0) | |||||
Y.append(x_) | |||||
else: | |||||
Y.append(x) | |||||
return torch.stack(Y) | |||||
class DataLoader(object): | |||||
""" | |||||
load data with form {"feature", "class"} | |||||
Args: | |||||
fdir : data file address | |||||
batch_size : batch_size | |||||
shuffle : if True, shuffle dataset every epoch | |||||
using_cuda : if True, return tensors on GPU | |||||
""" | |||||
def __init__(self, fdir, batch_size, shuffle=True, using_cuda=True): | |||||
with open(fdir, "rb") as f: | |||||
self.data = pickle.load(f) | |||||
self.batch_size = batch_size | |||||
self.num = len(self.data) | |||||
self.count = 0 | |||||
self.iters = int(self.num / batch_size) | |||||
self.shuffle = shuffle | |||||
self.using_cuda = using_cuda | |||||
def __iter__(self): | |||||
return self | |||||
def __next__(self): | |||||
if self.count == self.iters: | |||||
self.count = 0 | |||||
if self.shuffle: | |||||
random.shuffle(self.data) | |||||
raise StopIteration() | |||||
else: | |||||
batch = self.data[self.count * self.batch_size : (self.count + 1) * self.batch_size] | |||||
self.count += 1 | |||||
X = [long_wrapper(x["sent"], using_cuda=self.using_cuda, requires_grad=False) for x in batch] | |||||
X = pad(X, self.using_cuda) | |||||
y = long_wrapper([x["class"] for x in batch], using_cuda=self.using_cuda, requires_grad=False) | |||||
return {"feature" : X, "class" : y} | |||||
@@ -0,0 +1,23 @@ | |||||
import torch | |||||
import torch.nn as nn | |||||
class Lookuptable(nn.Module): | |||||
""" | |||||
A simple lookup table | |||||
Args: | |||||
nums : the size of the lookup table | |||||
dims : the size of each vector | |||||
padding_idx : pads the tensor with zeros whenever it encounters this index | |||||
sparse : If True, gradient matrix will be a sparse tensor. In this case, | |||||
only optim.SGD(cuda and cpu) and optim.Adagrad(cpu) can be used | |||||
""" | |||||
def __init__(self, nums, dims, padding_idx=0, sparse=False): | |||||
super(Lookuptable, self).__init__() | |||||
self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse) | |||||
def forward(self, x): | |||||
return self.embed(x) | |||||
if __name__ == "__main__": | |||||
model = Lookuptable(10, 20) |
@@ -0,0 +1,22 @@ | |||||
import torch | |||||
import torch.nn as nn | |||||
class Lstm(nn.Module): | |||||
""" | |||||
LSTM module | |||||
Args: | |||||
input_size : input size | |||||
hidden_size : hidden size | |||||
num_layers : number of hidden layers | |||||
dropout : dropout rate | |||||
bidirectional : If True, becomes a bidirectional RNN | |||||
""" | |||||
def __init__(self, input_size, hidden_size, num_layers, dropout, bidirectional): | |||||
super(Lstm, self).__init__() | |||||
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=True,\ | |||||
dropout=dropout, bidirectional=bidirectional) | |||||
def forward(self, x): | |||||
x, _ = self.lstm(x) | |||||
return x |
@@ -0,0 +1,129 @@ | |||||
import torch | |||||
import torch.nn as nn | |||||
import encoder | |||||
import aggregation | |||||
import embedding | |||||
import predict | |||||
import torch.optim as optim | |||||
import time | |||||
import dataloader | |||||
WORD_NUM = 357361 | |||||
WORD_SIZE = 100 | |||||
HIDDEN_SIZE = 300 | |||||
D_A = 350 | |||||
R = 10 | |||||
MLP_HIDDEN = 2000 | |||||
CLASSES_NUM = 5 | |||||
class Net(nn.Module): | |||||
""" | |||||
A model for sentiment analysis using lstm and self-attention | |||||
""" | |||||
def __init__(self): | |||||
super(Net, self).__init__() | |||||
self.embedding = embedding.Lookuptable(WORD_NUM, WORD_SIZE) | |||||
self.encoder = encoder.Lstm(WORD_SIZE, HIDDEN_SIZE, 1, 0.5, True) | |||||
self.aggregation = aggregation.Selfattention(2 * HIDDEN_SIZE, D_A, R) | |||||
self.predict = predict.MLP(R * HIDDEN_SIZE * 2, MLP_HIDDEN, CLASSES_NUM) | |||||
def forward(self, x): | |||||
x = self.embedding(x) | |||||
x = self.encoder(x) | |||||
x, penalty = self.aggregation(x) | |||||
x = self.predict(x) | |||||
return x, penalty | |||||
def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ | |||||
momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10): | |||||
""" | |||||
training procedure | |||||
Args: | |||||
If model_dict is given (a file address), it will continue training on the given model. | |||||
Otherwise, it would train a new model from scratch. | |||||
If using_cuda is true, the training would be conducted on GPU. | |||||
Learning_rate and momentum is for SGD optimizer. | |||||
coef is the coefficent between the cross-entropy loss and the penalization term. | |||||
interval is the frequncy of reporting. | |||||
the result will be saved with a form "model_dict_+current time", which could be used for further training | |||||
""" | |||||
if using_cuda: | |||||
net = Net().cuda() | |||||
else: | |||||
net = Net() | |||||
if model_dict != None: | |||||
net.load_state_dict(torch.load(model_dict)) | |||||
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum) | |||||
criterion = nn.CrossEntropyLoss() | |||||
dataset = dataloader.DataLoader("train_set.pkl", batch_size, using_cuda=using_cuda) | |||||
#statistics | |||||
loss_count = 0 | |||||
prepare_time = 0 | |||||
run_time = 0 | |||||
count = 0 | |||||
for epoch in range(epochs): | |||||
print("epoch: %d"%(epoch)) | |||||
for i, batch in enumerate(dataset): | |||||
t1 = time.time() | |||||
X = batch["feature"] | |||||
y = batch["class"] | |||||
t2 = time.time() | |||||
y_pred, y_penl = net(X) | |||||
loss = criterion(y_pred, y) + torch.sum(y_penl) / batch_size * coef | |||||
optimizer.zero_grad() | |||||
loss.backward() | |||||
nn.utils.clip_grad_norm(net.parameters(), 0.5) | |||||
optimizer.step() | |||||
t3 = time.time() | |||||
loss_count += torch.sum(y_penl).data[0] | |||||
prepare_time += (t2 - t1) | |||||
run_time += (t3 - t2) | |||||
p, idx = torch.max(y_pred.data, dim=1) | |||||
count += torch.sum(torch.eq(idx.cpu(), y.data.cpu())) | |||||
if (i + 1) % interval == 0: | |||||
print("epoch : %d, iters: %d"%(epoch, i + 1)) | |||||
print("loss count:" + str(loss_count / (interval * batch_size))) | |||||
print("acuracy:" + str(count / (interval * batch_size))) | |||||
print("penalty:" + str(torch.sum(y_penl).data[0] / batch_size)) | |||||
print("prepare time:" + str(prepare_time)) | |||||
print("run time:" + str(run_time)) | |||||
prepare_time = 0 | |||||
run_time = 0 | |||||
loss_count = 0 | |||||
count = 0 | |||||
string = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()) | |||||
torch.save(net.state_dict(), "model_dict_%s.dict"%(string)) | |||||
def test(model_dict, using_cuda=True): | |||||
if using_cuda: | |||||
net = Net().cuda() | |||||
else: | |||||
net = Net() | |||||
net.load_state_dict(torch.load(model_dict)) | |||||
dataset = dataloader.DataLoader("test_set.pkl", batch_size=1, using_cuda=using_cuda) | |||||
count = 0 | |||||
for i, batch in enumerate(dataset): | |||||
X = batch["feature"] | |||||
y = batch["class"] | |||||
y_pred, _ = net(X) | |||||
p, idx = torch.max(y_pred.data, dim=1) | |||||
count += torch.sum(torch.eq(idx.cpu(), y.data.cpu())) | |||||
print("accuracy: %f"%(count / dataset.num)) | |||||
if __name__ == "__main__": | |||||
train(using_cuda=torch.cuda.is_available()) | |||||
@@ -0,0 +1,25 @@ | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.nn.functional as F | |||||
class MLP(nn.Module): | |||||
""" | |||||
A two layers perceptron for classification. | |||||
Output : Unnormalized possibility distribution | |||||
Args: | |||||
input_size : the size of input | |||||
hidden_size : the size of hidden layer | |||||
output_size : the size of output | |||||
""" | |||||
def __init__(self, input_size, hidden_size, output_size): | |||||
super(MLP,self).__init__() | |||||
self.L1 = nn.Linear(input_size, hidden_size) | |||||
self.L2 = nn.Linear(hidden_size, output_size) | |||||
def forward(self, x): | |||||
out = self.L2(F.relu(self.L1(x))) | |||||
return out | |||||
if __name__ == "__main__": | |||||
MLP(20, 30, 20) |
@@ -0,0 +1,50 @@ | |||||
import pickle | |||||
import Word2Idx | |||||
def get_sets(m, n): | |||||
""" | |||||
get a train set containing m samples and a test set containing n samples | |||||
""" | |||||
samples = pickle.load(open("tuples.pkl","rb")) | |||||
if m+n > len(samples): | |||||
print("asking for too many tuples\n") | |||||
return | |||||
train_samples = samples[ : m] | |||||
test_samples = samples[m: m+n] | |||||
return train_samples, test_samples | |||||
def build_wordidx(): | |||||
""" | |||||
build wordidx using word2idx | |||||
""" | |||||
train, test = get_sets(500000, 2000) | |||||
words = [] | |||||
for x in train: | |||||
words += x[0] | |||||
wordidx = Word2Idx.Word2Idx() | |||||
wordidx.build(words) | |||||
print(wordidx.num) | |||||
print(wordidx.i2w(0)) | |||||
wordidx.save("wordidx.pkl") | |||||
def build_sets(): | |||||
""" | |||||
build train set and test set, transform word to index | |||||
""" | |||||
train, test = get_sets(500000, 2000) | |||||
wordidx = Word2Idx.Word2Idx() | |||||
wordidx.load("wordidx.pkl") | |||||
train_set = [] | |||||
for x in train: | |||||
sent = [wordidx.w2i(w) for w in x[0]] | |||||
train_set.append({"sent" : sent, "class" : x[1]}) | |||||
test_set = [] | |||||
for x in test: | |||||
sent = [wordidx.w2i(w) for w in x[0]] | |||||
test_set.append({"sent" : sent, "class" : x[1]}) | |||||
pickle.dump(train_set, open("train_set.pkl", "wb")) | |||||
pickle.dump(test_set, open("test_set.pkl", "wb")) | |||||
if __name__ == "__main__": | |||||
build_wordidx() | |||||
build_sets() |
@@ -0,0 +1,14 @@ | |||||
class BaseSaver(object): | |||||
"""base class for all savers""" | |||||
def __init__(self, save_path): | |||||
self.save_path = save_path | |||||
def save_bytes(self): | |||||
raise NotImplementedError | |||||
def save_str(self): | |||||
raise NotImplementedError | |||||
def compress(self): | |||||
raise NotImplementedError |
@@ -0,0 +1,12 @@ | |||||
from saver.base_saver import BaseSaver | |||||
class Logger(BaseSaver): | |||||
"""Logging""" | |||||
def __init__(self, save_path): | |||||
super(Logger, self).__init__(save_path) | |||||
def log(self, string): | |||||
with open(self.save_path, "a") as f: | |||||
f.write(string) |
@@ -0,0 +1,8 @@ | |||||
from saver.base_saver import BaseSaver | |||||
class ModelSaver(BaseSaver): | |||||
"""Save a models""" | |||||
def __init__(self, save_path): | |||||
super(ModelSaver, self).__init__(save_path) |
@@ -1,23 +1,20 @@ | |||||
from loader.base_loader import BaseLoader | |||||
from model.word_seg_model import WordSegModel | |||||
from fastNLP.action.tester import Tester | |||||
from fastNLP.action.trainer import WordSegTrainer | |||||
from fastNLP.loader.base_loader import BaseLoader | |||||
from fastNLP.models.word_seg_model import WordSeg | |||||
from fastNLP.action import Tester | |||||
from fastNLP.action.trainer import Trainer | |||||
def test_charlm(): | |||||
train_config = Trainer.TrainConfig(epochs=5, validate=False, save_when_better=False, | |||||
def test_wordseg(): | |||||
train_config = WordSegTrainer.TrainConfig(epochs=5, validate=False, save_when_better=False, | |||||
log_per_step=10, log_validation=False, batch_size=254) | log_per_step=10, log_validation=False, batch_size=254) | ||||
trainer = Trainer(train_config) | |||||
trainer = WordSegTrainer(train_config) | |||||
model = WordSegModel() | |||||
model = WordSeg(100, 2, 1000) | |||||
train_data = BaseLoader("load_train", "./data_for_tests/cws_train").load_lines() | train_data = BaseLoader("load_train", "./data_for_tests/cws_train").load_lines() | ||||
trainer.train(model, train_data) | trainer.train(model, train_data) | ||||
trainer.save_model(model) | |||||
test_config = Tester.TestConfig(save_output=False, validate_in_training=False, | test_config = Tester.TestConfig(save_output=False, validate_in_training=False, | ||||
save_dev_input=False, save_loss=False, batch_size=254) | save_dev_input=False, save_loss=False, batch_size=254) | ||||
tester = Tester(test_config) | tester = Tester(test_config) | ||||
@@ -28,4 +25,4 @@ def test_charlm(): | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
test_charlm() | |||||
test_wordseg() |