From 52b1337e8bcd1f6b538a42861016cf0aca68b2da Mon Sep 17 00:00:00 2001 From: FengZiYjun Date: Fri, 25 May 2018 17:40:28 +0800 Subject: [PATCH] READY TO GO: test_charlm tested --- action/action.py | 8 ++--- action/tester.py | 24 +++++++------ action/trainer.py | 16 +++++---- model/base_model.py | 4 +-- model/char_language_model.py | 67 ++++++++++++++++++++---------------- tests/test_charlm.py | 7 ++-- 6 files changed, 70 insertions(+), 56 deletions(-) diff --git a/action/action.py b/action/action.py index f47ede2c..9c3f32f6 100644 --- a/action/action.py +++ b/action/action.py @@ -27,8 +27,8 @@ class Action(object): :return iteration:int, the number of step in each epoch generator:generator, to generate batch inputs """ - n_samples = X.shape[0] - num_iter = n_samples / batch_size + n_samples = X.size()[0] + num_iter = n_samples // batch_size if Y is None: generator = self._batch_generate(batch_size, num_iter, X) else: @@ -39,8 +39,8 @@ class Action(object): def _batch_generate(batch_size, num_iter, *data): for step in range(num_iter): start = batch_size * step - end = (batch_size + 1) * step - yield tuple([x[start:end, :] for x in data]) + end = batch_size * (step + 1) + yield tuple([x[start:end] for x in data]) def make_log(self, *args): return "log" diff --git a/action/tester.py b/action/tester.py index 581b3b78..96ab1a4a 100644 --- a/action/tester.py +++ b/action/tester.py @@ -27,17 +27,18 @@ class Tester(Action): self.batch_size = test_args.batch_size def test(self, network, data): + print("testing") network.mode(test=True) # turn on the testing mode - - if not self.save_dev_input: - # transform into network input and label - valid_x, valid_y = network.prepare_input(data) - if self.validate_in_training: + if self.save_dev_input: + if self.valid_x is None: + valid_x, valid_y = network.prepare_input(data) self.valid_x = valid_x self.valid_y = valid_y + else: + valid_x = self.valid_x + valid_y = self.valid_y else: - valid_x = self.valid_x - valid_y = self.valid_y + valid_x, valid_y = network.prepare_input(data) # split into batches by self.batch_size iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y) @@ -53,10 +54,10 @@ class Tester(Action): # forward pass from tests input to predicted output prediction = network.data_forward(batch_x) - loss = network.loss(batch_y, prediction) + loss = network.get_loss(prediction, batch_y) if self.save_output: - batch_output.append(prediction) + batch_output.append(prediction.data) if self.save_loss: loss_history.append(loss) self.log(self.make_log(step, loss)) @@ -74,9 +75,10 @@ class Tester(Action): def result(self): return self.output - def make_output(self, batch_output): + @staticmethod + def make_output(batch_outputs): # construct full prediction with batch outputs - return np.concatenate((batch_output[0], batch_output[1]), axis=0) + return np.concatenate(batch_outputs, axis=0) def load_config(self, args): raise NotImplementedError diff --git a/action/trainer.py b/action/trainer.py index 724bfc77..2584552b 100644 --- a/action/trainer.py +++ b/action/trainer.py @@ -8,7 +8,8 @@ class Trainer(Action): """ Trainer for common training logic of all models """ - TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better", "log_per_step", "log_validation"]) + TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better", + "log_per_step", "log_validation", "batch_size"]) def __init__(self, train_args): """ @@ -20,6 +21,7 @@ class Trainer(Action): self.save_when_better = train_args.save_when_better self.log_per_step = train_args.log_per_step self.log_validation = train_args.log_validation + self.batch_size = train_args.batch_size def train(self, network, train_data, dev_data): """ @@ -28,20 +30,19 @@ class Trainer(Action): :param dev_data: raw data for validation :return: """ - train_x, train_y = network.prepare_input(train_data.train_set, train_data.train_label) + train_x, train_y = network.prepare_input(train_data) - network.mode(test=False) # turn on the train mode - - iterations, train_batch_generator = self.batchify(train_x, train_y) + iterations, train_batch_generator = self.batchify(self.batch_size, train_x, train_y) test_args = Tester.TestConfig(save_output=True, validate_in_training=True, - save_dev_input=True, save_loss=True, batch_size=16) + save_dev_input=True, save_loss=True, batch_size=self.batch_size) evaluator = Tester(test_args) best_loss = 1e10 loss_history = list() for epoch in range(self.n_epochs): + network.mode(test=False) # turn on the train mode network.define_optimizer() for step in range(iterations): @@ -49,10 +50,11 @@ class Trainer(Action): prediction = network.data_forward(batch_x) - loss = network.loss(batch_y, prediction) + loss = network.get_loss(prediction, batch_y) network.grad_backward() if step % self.log_per_step == 0: + print("step ", step) loss_history.append(loss) self.log(self.make_log(epoch, step, loss)) diff --git a/model/base_model.py b/model/base_model.py index 92d4068a..facb82d9 100644 --- a/model/base_model.py +++ b/model/base_model.py @@ -24,7 +24,7 @@ class BaseModel(object): def grad_backward(self): raise NotImplementedError - def loss(self, pred, truth): + def get_loss(self, pred, truth): raise NotImplementedError @@ -50,7 +50,7 @@ class ToyModel(BaseModel): def grad_backward(self): print("loss gradient backward") - def loss(self, pred, truth): + def get_loss(self, pred, truth): self._loss = np.mean(np.square(pred - truth)) return self._loss diff --git a/model/char_language_model.py b/model/char_language_model.py index be21442d..760d694d 100644 --- a/model/char_language_model.py +++ b/model/char_language_model.py @@ -10,6 +10,8 @@ from torch.autograd import Variable from model.base_model import BaseModel +USE_GPU = True + class CharLM(BaseModel): @@ -20,16 +22,16 @@ class CharLM(BaseModel): """ DataTuple = namedtuple("DataTuple", ["feature", "label"]) - def __init__(self): + def __init__(self, lstm_batch_size, lstm_seq_len): super(CharLM, self).__init__() """ Settings: should come from config loader or pre-processing """ - self.word_embed_dim = 100 + self.word_embed_dim = 300 self.char_embedding_dim = 15 - self.cnn_batch_size = 40 - self.lstm_seq_len = 10 - self.lstm_batch_size = 4 + self.cnn_batch_size = lstm_batch_size * lstm_seq_len + self.lstm_seq_len = lstm_seq_len + self.lstm_batch_size = lstm_batch_size self.num_epoch = 10 self.old_PPL = 100000 self.best_PPL = 100000 @@ -45,8 +47,9 @@ class CharLM(BaseModel): self.data = None # named tuple to store all data set self.data_ready = False self.criterion = nn.CrossEntropyLoss() - self.loss = None - self.use_gpu = False + self._loss = None + self.use_gpu = USE_GPU + # word_emb_dim == hidden_size / num of hidden units self.hidden = (to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim)), to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim))) @@ -64,7 +67,7 @@ class CharLM(BaseModel): def prepare_input(self, raw_text): """ - :param raw_text: raw input data + :param raw_text: raw input text consisting of words :return: torch.Tensor, torch.Tensor feature matrix, label vector This function is only called once in Trainer.train, but may called multiple times in Tester.test @@ -78,17 +81,12 @@ class CharLM(BaseModel): max_word_len = self.max_word_len print("word/char dictionary built. Start making inputs.") - input_vec = np.array(text2vec(raw_text, char_dict, max_word_len)) + words = raw_text + input_vec = np.array(text2vec(words, char_dict, max_word_len)) # Labels are next-word index in word_dict with the same length as inputs - input_label = np.array([word_dict[w] for w in raw_text[1:]] + [word_dict[raw_text[-1]]]) - - data = self.DataTuple(feature=input_vec, label=input_label) - feature_input = torch.from_numpy(data.feature) - label_input = torch.from_numpy(data.label) - num_seq = feature_input.size()[0] // self.lstm_seq_len - feature_input = feature_input[:num_seq * self.lstm_seq_len, :] - feature_input = feature_input.view(-1, self.lstm_seq_len, self.max_word_len + 2) - + input_label = np.array([word_dict[w] for w in words[1:]] + [word_dict[words[-1]]]) + feature_input = torch.from_numpy(input_vec) + label_input = torch.from_numpy(input_label) return feature_input, label_input def mode(self, test=False): @@ -98,6 +96,15 @@ class CharLM(BaseModel): self.model.train() def data_forward(self, x): + """ + :param x: Tensor of size [lstm_batch_size, lstm_seq_len, max_word_len+2] + :return: Tensor of size [num_words, ?] + """ + # additional processing of inputs after batching + num_seq = x.size()[0] // self.lstm_seq_len + x = x[:num_seq * self.lstm_seq_len, :] + x = x.view(-1, self.lstm_seq_len, self.max_word_len + 2) + # detach hidden state of LSTM from last batch hidden = [state.detach() for state in self.hidden] output, self.hidden = self.model(to_var(x), hidden) @@ -105,13 +112,13 @@ class CharLM(BaseModel): def grad_backward(self): self.model.zero_grad() - self.loss.backward() + self._loss.backward() torch.nn.utils.clip_grad_norm(self.model.parameters(), 5, norm_type=2) self.optimizer.step() - def loss(self, predict, truth): - self.loss = self.criterion(predict, to_var(truth)) - return self.loss + def get_loss(self, predict, truth): + self._loss = self.criterion(predict, to_var(truth)) + return self._loss.data # No pytorch data structure exposed outsides def define_optimizer(self): # redefine optimizer for every new epoch @@ -123,12 +130,13 @@ class CharLM(BaseModel): def preprocess(self, all_text_files): word_dict, char_dict = create_word_char_dict(all_text_files) - self.num_char = len(char_dict) + num_char = len(char_dict) self.vocab_size = len(word_dict) - char_dict["BOW"] = self.num_char + 1 - char_dict["EOW"] = self.num_char + 2 + char_dict["BOW"] = num_char + 1 + char_dict["EOW"] = num_char + 2 char_dict["PAD"] = 0 - # dict of (int, string) + self.num_char = num_char + 3 + # char_dict is a dict of (int, string), int counting from 0 to 47 reverse_word_dict = {value: key for key, value in word_dict.items()} self.max_word_len = max([len(word) for word in word_dict]) objects = { @@ -194,7 +202,7 @@ def create_word_char_dict(*file_name): def to_var(x): - if torch.cuda.is_available(): + if torch.cuda.is_available() and USE_GPU: x = x.cuda() return Variable(x) @@ -246,7 +254,8 @@ class charLM(nn.Module): self.convolutions = [] # list of tuples: (the number of filter, width) - self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)] + # self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)] + self.filter_num_width = [(25, 1), (50, 2), (75, 3)] for out_channel, filter_width in self.filter_num_width: self.convolutions.append( @@ -304,7 +313,7 @@ class charLM(nn.Module): # [num_seq*seq_len, max_word_len+2, char_emb_dim] x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3) - # [num_seq*seq_len, 1, max_word_len+2, char_emb_dim] + # [num_seq*seq_len, 1, char_emb_dim, max_word_len+2] x = self.conv_layers(x) # [num_seq*seq_len, total_num_filters] diff --git a/tests/test_charlm.py b/tests/test_charlm.py index cbad3c8f..eecee428 100644 --- a/tests/test_charlm.py +++ b/tests/test_charlm.py @@ -6,10 +6,11 @@ from model.char_language_model import CharLM def test_charlm(): train_config = Trainer.TrainConfig(epochs=1, validate=True, save_when_better=True, - log_per_step=10, log_validation=True) + log_per_step=10, log_validation=True, batch_size=160) trainer = Trainer(train_config) - model = CharLM() + model = CharLM(lstm_batch_size=16, lstm_seq_len=10) + train_data = ToyLoader0("load_train", "./data_for_tests/charlm.txt").load() valid_data = ToyLoader0("load_valid", "./data_for_tests/charlm.txt").load() @@ -18,7 +19,7 @@ def test_charlm(): trainer.save_model(model) test_config = Tester.TestConfig(save_output=True, validate_in_training=True, - save_dev_input=True, save_loss=True, batch_size=16) + save_dev_input=True, save_loss=True, batch_size=160) tester = Tester(test_config) test_data = ToyLoader0("load_test", "./data_for_tests/charlm.txt").load()