diff --git a/action/action.py b/action/action.py index 557c7ef2..f47ede2c 100644 --- a/action/action.py +++ b/action/action.py @@ -1,3 +1,5 @@ +from saver.logger import Logger + class Action(object): """ @@ -6,7 +8,7 @@ class Action(object): def __init__(self): super(Action, self).__init__() - self.logger = None + self.logger = Logger("logger_output.txt") def load_config(self, args): raise NotImplementedError @@ -14,27 +16,31 @@ class Action(object): def load_dataset(self, args): raise NotImplementedError - def log(self, args): - print("call logger.log") + def log(self, string): + self.logger.log(string) - def batchify(self, X, Y=None): + def batchify(self, batch_size, X, Y=None): """ - :param X: - :param Y: + :param batch_size: int + :param X: feature matrix of size [n_sample, m_feature] + :param Y: label vector of size [n_sample, 1] (optional) :return iteration:int, the number of step in each epoch generator:generator, to generate batch inputs """ - data = X - if Y is not None: - data = [X, Y] - return 2, self._batch_generate(data) - - def _batch_generate(self, data): - step = 10 - for i in range(2): - start = i * step - end = (i + 1) * step - yield data[0][start:end], data[1][start:end] + n_samples = X.shape[0] + num_iter = n_samples / batch_size + if Y is None: + generator = self._batch_generate(batch_size, num_iter, X) + else: + generator = self._batch_generate(batch_size, num_iter, X, Y) + return num_iter, generator + + @staticmethod + def _batch_generate(batch_size, num_iter, *data): + for step in range(num_iter): + start = batch_size * step + end = (batch_size + 1) * step + yield tuple([x[start:end, :] for x in data]) def make_log(self, *args): return "log" diff --git a/action/tester.py b/action/tester.py index 96c8a0ae..581b3b78 100644 --- a/action/tester.py +++ b/action/tester.py @@ -1,3 +1,5 @@ +from collections import namedtuple + import numpy as np from action.action import Action @@ -6,22 +8,39 @@ from action.action import Action class Tester(Action): """docstring for Tester""" + TestConfig = namedtuple("config", ["validate_in_training", "save_dev_input", "save_output", + "save_loss", "batch_size"]) + def __init__(self, test_args): """ :param test_args: named tuple """ super(Tester, self).__init__() - self.test_args = test_args - # self.args_dict = {name: value for name, value in self.test_args.__dict__.iteritems()} - self.mean_loss = None + self.validate_in_training = test_args.validate_in_training + self.save_dev_input = test_args.save_dev_input + self.valid_x = None + self.valid_y = None + self.save_output = test_args.save_output self.output = None + self.save_loss = test_args.save_loss + self.mean_loss = None + self.batch_size = test_args.batch_size def test(self, network, data): - # transform into network input and label - X, Y = network.prepare_input(data) + network.mode(test=True) # turn on the testing mode + + if not self.save_dev_input: + # transform into network input and label + valid_x, valid_y = network.prepare_input(data) + if self.validate_in_training: + self.valid_x = valid_x + self.valid_y = valid_y + else: + valid_x = self.valid_x + valid_y = self.valid_y # split into batches by self.batch_size - iterations, test_batch_generator = self.batchify(X, Y) + iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y) batch_output = list() loss_history = list() @@ -33,16 +52,19 @@ class Tester(Action): # forward pass from tests input to predicted output prediction = network.data_forward(batch_x) - batch_output.append(prediction) - # get the loss loss = network.loss(batch_y, prediction) - loss_history.append(loss) - self.log(self.make_log(step, loss)) + if self.save_output: + batch_output.append(prediction) + if self.save_loss: + loss_history.append(loss) + self.log(self.make_log(step, loss)) - self.mean_loss = np.mean(np.array(loss_history)) - self.output = self.make_output(batch_output) + if self.save_loss: + self.mean_loss = np.mean(np.array(loss_history)) + if self.save_output: + self.output = self.make_output(batch_output) @property def loss(self): @@ -55,3 +77,9 @@ class Tester(Action): def make_output(self, batch_output): # construct full prediction with batch outputs return np.concatenate((batch_output[0], batch_output[1]), axis=0) + + def load_config(self, args): + raise NotImplementedError + + def load_dataset(self, args): + raise NotImplementedError diff --git a/action/trainer.py b/action/trainer.py index 15f28583..724bfc77 100644 --- a/action/trainer.py +++ b/action/trainer.py @@ -1,3 +1,5 @@ +from collections import namedtuple + from .action import Action from .tester import Tester @@ -6,32 +8,42 @@ class Trainer(Action): """ Trainer for common training logic of all models """ + TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better", "log_per_step", "log_validation"]) def __init__(self, train_args): """ :param train_args: namedtuple """ super(Trainer, self).__init__() - self.train_args = train_args - # self.args_dict = {name: value for name, value in self.train_args.__dict__.iteritems()} - self.n_epochs = self.train_args.epochs - self.validate = self.train_args.validate - self.save_when_better = self.train_args.save_when_better + self.n_epochs = train_args.epochs + self.validate = train_args.validate + self.save_when_better = train_args.save_when_better + self.log_per_step = train_args.log_per_step + self.log_validation = train_args.log_validation + + def train(self, network, train_data, dev_data): + """ + :param network: the model controller + :param train_data: raw data for training + :param dev_data: raw data for validation + :return: + """ + train_x, train_y = network.prepare_input(train_data.train_set, train_data.train_label) - def train(self, network, data, dev_data): - train_x, train_y = network.prepare_input(data.train_set, data.train_label) - valid_x, valid_y = network.prepare_input(dev_data.valid_set, dev_data.valid_label) + network.mode(test=False) # turn on the train mode iterations, train_batch_generator = self.batchify(train_x, train_y) - loss_history = list() - network.mode(test=False) - test_args = "..." + test_args = Tester.TestConfig(save_output=True, validate_in_training=True, + save_dev_input=True, save_loss=True, batch_size=16) evaluator = Tester(test_args) + best_loss = 1e10 + loss_history = list() for epoch in range(self.n_epochs): + network.define_optimizer() for step in range(iterations): batch_x, batch_y = train_batch_generator.__next__() @@ -39,14 +51,18 @@ class Trainer(Action): loss = network.loss(batch_y, prediction) network.grad_backward() - loss_history.append(loss) - self.log(self.make_log(epoch, step, loss)) + + if step % self.log_per_step == 0: + loss_history.append(loss) + self.log(self.make_log(epoch, step, loss)) #################### evaluate over dev set ################### if self.validate: - evaluator.test(network, [valid_x, valid_y]) + # give all controls to tester + evaluator.test(network, dev_data) - self.log(self.make_valid_log(epoch, evaluator.loss)) + if self.log_validation: + self.log(self.make_valid_log(epoch, evaluator.loss)) if evaluator.loss < best_loss: best_loss = evaluator.loss if self.save_when_better: @@ -54,15 +70,20 @@ class Trainer(Action): # finish training - @staticmethod - def prepare_training(network, data): - return network.prepare_training(data) - def make_log(self, *args): - print("logged") + return "make a log" def make_valid_log(self, *args): - print("logged") + return "make a valid log" def save_model(self, model): - print("model saved") + model.save() + + def load_data(self, data_name): + print("load data") + + def load_config(self, args): + raise NotImplementedError + + def load_dataset(self, args): + raise NotImplementedError diff --git a/loader/base_loader.py b/loader/base_loader.py index bc1ad7b1..9579a1e5 100644 --- a/loader/base_loader.py +++ b/loader/base_loader.py @@ -13,3 +13,19 @@ class BaseLoader(object): with open(self.data_path, "r", encoding="utf-8") as f: text = f.read() return text + + +class ToyLoader0(BaseLoader): + """ + For charLM + """ + + def __init__(self, name, path): + super(ToyLoader0, self).__init__(name, path) + + def load(self): + with open(self.data_path, 'r') as f: + corpus = f.read().lower() + import re + corpus = re.sub(r"", "unk", corpus) + return corpus.split() diff --git a/model/char_language_model.py b/model/char_language_model.py index f5b5e09b..ef8f6052 100644 --- a/model/char_language_model.py +++ b/model/char_language_model.py @@ -14,6 +14,8 @@ from model.base_model import BaseModel class CharLM(BaseModel): """ Controller of the Character-level Neural Language Model + To do: + - where the data goes, call data savers. """ def __init__(self): @@ -28,12 +30,15 @@ class CharLM(BaseModel): self.lstm_batch_size = 20 self.vocab_size = 100 self.num_char = 150 + self.max_word_len = 10 + self.num_epoch = 10 + self.old_PPL = 100000 + self.best_PPL = 100000 self.data = None # named tuple to store all data set self.data_ready = False self.criterion = nn.CrossEntropyLoss() self.loss = None - self.optimizer = optim.SGD(self.parameters(), lr=learning_rate, momentum=0.85) self.use_gpu = False # word_emb_dim == hidden_size / num of hidden units self.hidden = (to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim)), @@ -44,10 +49,17 @@ class CharLM(BaseModel): self.vocab_size, self.num_char, use_gpu=self.use_gpu) + for param in self.model.parameters(): + nn.init.uniform(param.data, -0.05, 0.05) + + self.learning_rate = 0.1 + self.optimizer = None def prepare_input(self, raw_text): """ - Do some preparation jobs. Transform raw data into input vectors. + :param raw_text: raw input data + :return: torch.Tensor, torch.Tensor + feature matrix, label vector """ if not self.data_ready: # To do: These need to be dropped out from here. (below) @@ -82,10 +94,20 @@ class CharLM(BaseModel): DataTuple = namedtuple("DataTuple", ["feature", "label"]) self.data = DataTuple(feature=input_vec, label=input_label) - return self.data.feature, self.data.label + feature_input = torch.from_numpy(self.data.feature) + label_input = torch.from_numpy(self.data.label) + num_seq = feature_input.size()[0] // self.lstm_seq_len + feature_input = feature_input[:num_seq * self.lstm_seq_len, :] + feature_input = feature_input.view(-1, self.lstm_seq_len, self.max_word_len + 2) + self.num_iter_per_epoch = feature_input.size()[0] // self.lstm_batch_size + + return feature_input, label_input def mode(self, test=False): - raise NotImplementedError + if test: + self.model.eval() + else: + self.model.train() def data_forward(self, x): # detach hidden state of LSTM from last batch @@ -103,6 +125,13 @@ class CharLM(BaseModel): self.loss = self.criterion(predict, to_var(truth)) return self.loss + def define_optimizer(self): + # redefine optimizer for every new epoch + self.optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.85) + + def save(self): + torch.save(self.model, "cache/model.pkl") + @staticmethod def preprocess(): word_dict, char_dict = create_word_char_dict("valid.txt", "train.txt", "tests.txt") @@ -122,23 +151,6 @@ class CharLM(BaseModel): torch.save(objects, "cache/prep.pt") print("Preprocess done.") - def forward(self, x, hidden): - lstm_batch_size = x.size()[0] - lstm_seq_len = x.size()[1] - x = x.contiguous().view(-1, x.size()[2]) - x = self.char_embed(x) - x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3) - x = self.conv_layers(x) - x = self.batch_norm(x) - x = self.highway1(x) - x = self.highway2(x) - x = x.contiguous().view(lstm_batch_size, lstm_seq_len, -1) - x, hidden = self.lstm(x, hidden) - x = self.dropout(x) - x = x.contiguous().view(lstm_batch_size * lstm_seq_len, -1) - x = self.linear(x) - return x, hidden - """ Global Functions diff --git a/saver/logger.py b/saver/logger.py index d6af6f6a..be38de40 100644 --- a/saver/logger.py +++ b/saver/logger.py @@ -8,4 +8,5 @@ class Logger(BaseSaver): super(Logger, self).__init__(save_path) def log(self, string): - raise NotImplementedError + with open(self.save_path, "a") as f: + f.write(string) diff --git a/tests/test_charlm.py b/tests/test_charlm.py new file mode 100644 index 00000000..92b67dc2 --- /dev/null +++ b/tests/test_charlm.py @@ -0,0 +1,30 @@ +from action.tester import Tester +from action.trainer import Trainer +from loader.base_loader import ToyLoader0 +from model.char_language_model import CharLM + + +def test_charlm(): + train_config = Trainer.TrainConfig(epochs=1, validate=True, save_when_better=True, + log_per_step=10, log_validation=True) + trainer = Trainer(train_config) + + model = CharLM() + train_data = ToyLoader0("load_train", "path_to_train_file").load() + valid_data = ToyLoader0("load_valid", "path_to_valid_file").load() + + trainer.train(model, train_data, valid_data) + + trainer.save_model(model) + + test_config = Tester.TestConfig(save_output=True, validate_in_training=True, + save_dev_input=True, save_loss=True, batch_size=16) + tester = Tester(test_config) + + test_data = ToyLoader0("load_test", "path_to_test").load() + + tester.test(model, test_data) + + +if __name__ == "__main__": + test_charlm()