@@ -27,8 +27,8 @@ class Action(object): | |||||
:return iteration:int, the number of step in each epoch | :return iteration:int, the number of step in each epoch | ||||
generator:generator, to generate batch inputs | generator:generator, to generate batch inputs | ||||
""" | """ | ||||
n_samples = X.shape[0] | |||||
num_iter = n_samples / batch_size | |||||
n_samples = X.size()[0] | |||||
num_iter = n_samples // batch_size | |||||
if Y is None: | if Y is None: | ||||
generator = self._batch_generate(batch_size, num_iter, X) | generator = self._batch_generate(batch_size, num_iter, X) | ||||
else: | else: | ||||
@@ -39,8 +39,8 @@ class Action(object): | |||||
def _batch_generate(batch_size, num_iter, *data): | def _batch_generate(batch_size, num_iter, *data): | ||||
for step in range(num_iter): | for step in range(num_iter): | ||||
start = batch_size * step | start = batch_size * step | ||||
end = (batch_size + 1) * step | |||||
yield tuple([x[start:end, :] for x in data]) | |||||
end = batch_size * (step + 1) | |||||
yield tuple([x[start:end] for x in data]) | |||||
def make_log(self, *args): | def make_log(self, *args): | ||||
return "log" | return "log" |
@@ -27,17 +27,18 @@ class Tester(Action): | |||||
self.batch_size = test_args.batch_size | self.batch_size = test_args.batch_size | ||||
def test(self, network, data): | def test(self, network, data): | ||||
print("testing") | |||||
network.mode(test=True) # turn on the testing mode | network.mode(test=True) # turn on the testing mode | ||||
if not self.save_dev_input: | |||||
# transform into network input and label | |||||
valid_x, valid_y = network.prepare_input(data) | |||||
if self.validate_in_training: | |||||
if self.save_dev_input: | |||||
if self.valid_x is None: | |||||
valid_x, valid_y = network.prepare_input(data) | |||||
self.valid_x = valid_x | self.valid_x = valid_x | ||||
self.valid_y = valid_y | self.valid_y = valid_y | ||||
else: | |||||
valid_x = self.valid_x | |||||
valid_y = self.valid_y | |||||
else: | else: | ||||
valid_x = self.valid_x | |||||
valid_y = self.valid_y | |||||
valid_x, valid_y = network.prepare_input(data) | |||||
# split into batches by self.batch_size | # split into batches by self.batch_size | ||||
iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y) | iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y) | ||||
@@ -53,10 +54,10 @@ class Tester(Action): | |||||
# forward pass from tests input to predicted output | # forward pass from tests input to predicted output | ||||
prediction = network.data_forward(batch_x) | prediction = network.data_forward(batch_x) | ||||
loss = network.loss(batch_y, prediction) | |||||
loss = network.get_loss(prediction, batch_y) | |||||
if self.save_output: | if self.save_output: | ||||
batch_output.append(prediction) | |||||
batch_output.append(prediction.data) | |||||
if self.save_loss: | if self.save_loss: | ||||
loss_history.append(loss) | loss_history.append(loss) | ||||
self.log(self.make_log(step, loss)) | self.log(self.make_log(step, loss)) | ||||
@@ -74,9 +75,10 @@ class Tester(Action): | |||||
def result(self): | def result(self): | ||||
return self.output | return self.output | ||||
def make_output(self, batch_output): | |||||
@staticmethod | |||||
def make_output(batch_outputs): | |||||
# construct full prediction with batch outputs | # construct full prediction with batch outputs | ||||
return np.concatenate((batch_output[0], batch_output[1]), axis=0) | |||||
return np.concatenate(batch_outputs, axis=0) | |||||
def load_config(self, args): | def load_config(self, args): | ||||
raise NotImplementedError | raise NotImplementedError | ||||
@@ -8,7 +8,8 @@ class Trainer(Action): | |||||
""" | """ | ||||
Trainer for common training logic of all models | Trainer for common training logic of all models | ||||
""" | """ | ||||
TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better", "log_per_step", "log_validation"]) | |||||
TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better", | |||||
"log_per_step", "log_validation", "batch_size"]) | |||||
def __init__(self, train_args): | def __init__(self, train_args): | ||||
""" | """ | ||||
@@ -20,6 +21,7 @@ class Trainer(Action): | |||||
self.save_when_better = train_args.save_when_better | self.save_when_better = train_args.save_when_better | ||||
self.log_per_step = train_args.log_per_step | self.log_per_step = train_args.log_per_step | ||||
self.log_validation = train_args.log_validation | self.log_validation = train_args.log_validation | ||||
self.batch_size = train_args.batch_size | |||||
def train(self, network, train_data, dev_data): | def train(self, network, train_data, dev_data): | ||||
""" | """ | ||||
@@ -28,20 +30,19 @@ class Trainer(Action): | |||||
:param dev_data: raw data for validation | :param dev_data: raw data for validation | ||||
:return: | :return: | ||||
""" | """ | ||||
train_x, train_y = network.prepare_input(train_data.train_set, train_data.train_label) | |||||
train_x, train_y = network.prepare_input(train_data) | |||||
network.mode(test=False) # turn on the train mode | |||||
iterations, train_batch_generator = self.batchify(train_x, train_y) | |||||
iterations, train_batch_generator = self.batchify(self.batch_size, train_x, train_y) | |||||
test_args = Tester.TestConfig(save_output=True, validate_in_training=True, | test_args = Tester.TestConfig(save_output=True, validate_in_training=True, | ||||
save_dev_input=True, save_loss=True, batch_size=16) | |||||
save_dev_input=True, save_loss=True, batch_size=self.batch_size) | |||||
evaluator = Tester(test_args) | evaluator = Tester(test_args) | ||||
best_loss = 1e10 | best_loss = 1e10 | ||||
loss_history = list() | loss_history = list() | ||||
for epoch in range(self.n_epochs): | for epoch in range(self.n_epochs): | ||||
network.mode(test=False) # turn on the train mode | |||||
network.define_optimizer() | network.define_optimizer() | ||||
for step in range(iterations): | for step in range(iterations): | ||||
@@ -49,10 +50,11 @@ class Trainer(Action): | |||||
prediction = network.data_forward(batch_x) | prediction = network.data_forward(batch_x) | ||||
loss = network.loss(batch_y, prediction) | |||||
loss = network.get_loss(prediction, batch_y) | |||||
network.grad_backward() | network.grad_backward() | ||||
if step % self.log_per_step == 0: | if step % self.log_per_step == 0: | ||||
print("step ", step) | |||||
loss_history.append(loss) | loss_history.append(loss) | ||||
self.log(self.make_log(epoch, step, loss)) | self.log(self.make_log(epoch, step, loss)) | ||||
@@ -24,7 +24,7 @@ class BaseModel(object): | |||||
def grad_backward(self): | def grad_backward(self): | ||||
raise NotImplementedError | raise NotImplementedError | ||||
def loss(self, pred, truth): | |||||
def get_loss(self, pred, truth): | |||||
raise NotImplementedError | raise NotImplementedError | ||||
@@ -50,7 +50,7 @@ class ToyModel(BaseModel): | |||||
def grad_backward(self): | def grad_backward(self): | ||||
print("loss gradient backward") | print("loss gradient backward") | ||||
def loss(self, pred, truth): | |||||
def get_loss(self, pred, truth): | |||||
self._loss = np.mean(np.square(pred - truth)) | self._loss = np.mean(np.square(pred - truth)) | ||||
return self._loss | return self._loss | ||||
@@ -10,6 +10,8 @@ from torch.autograd import Variable | |||||
from model.base_model import BaseModel | from model.base_model import BaseModel | ||||
USE_GPU = True | |||||
class CharLM(BaseModel): | class CharLM(BaseModel): | ||||
@@ -20,16 +22,16 @@ class CharLM(BaseModel): | |||||
""" | """ | ||||
DataTuple = namedtuple("DataTuple", ["feature", "label"]) | DataTuple = namedtuple("DataTuple", ["feature", "label"]) | ||||
def __init__(self): | |||||
def __init__(self, lstm_batch_size, lstm_seq_len): | |||||
super(CharLM, self).__init__() | super(CharLM, self).__init__() | ||||
""" | """ | ||||
Settings: should come from config loader or pre-processing | Settings: should come from config loader or pre-processing | ||||
""" | """ | ||||
self.word_embed_dim = 100 | |||||
self.word_embed_dim = 300 | |||||
self.char_embedding_dim = 15 | self.char_embedding_dim = 15 | ||||
self.cnn_batch_size = 40 | |||||
self.lstm_seq_len = 10 | |||||
self.lstm_batch_size = 4 | |||||
self.cnn_batch_size = lstm_batch_size * lstm_seq_len | |||||
self.lstm_seq_len = lstm_seq_len | |||||
self.lstm_batch_size = lstm_batch_size | |||||
self.num_epoch = 10 | self.num_epoch = 10 | ||||
self.old_PPL = 100000 | self.old_PPL = 100000 | ||||
self.best_PPL = 100000 | self.best_PPL = 100000 | ||||
@@ -45,8 +47,9 @@ class CharLM(BaseModel): | |||||
self.data = None # named tuple to store all data set | self.data = None # named tuple to store all data set | ||||
self.data_ready = False | self.data_ready = False | ||||
self.criterion = nn.CrossEntropyLoss() | self.criterion = nn.CrossEntropyLoss() | ||||
self.loss = None | |||||
self.use_gpu = False | |||||
self._loss = None | |||||
self.use_gpu = USE_GPU | |||||
# word_emb_dim == hidden_size / num of hidden units | # word_emb_dim == hidden_size / num of hidden units | ||||
self.hidden = (to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim)), | self.hidden = (to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim)), | ||||
to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim))) | to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim))) | ||||
@@ -64,7 +67,7 @@ class CharLM(BaseModel): | |||||
def prepare_input(self, raw_text): | def prepare_input(self, raw_text): | ||||
""" | """ | ||||
:param raw_text: raw input data | |||||
:param raw_text: raw input text consisting of words | |||||
:return: torch.Tensor, torch.Tensor | :return: torch.Tensor, torch.Tensor | ||||
feature matrix, label vector | feature matrix, label vector | ||||
This function is only called once in Trainer.train, but may called multiple times in Tester.test | This function is only called once in Trainer.train, but may called multiple times in Tester.test | ||||
@@ -78,17 +81,12 @@ class CharLM(BaseModel): | |||||
max_word_len = self.max_word_len | max_word_len = self.max_word_len | ||||
print("word/char dictionary built. Start making inputs.") | print("word/char dictionary built. Start making inputs.") | ||||
input_vec = np.array(text2vec(raw_text, char_dict, max_word_len)) | |||||
words = raw_text | |||||
input_vec = np.array(text2vec(words, char_dict, max_word_len)) | |||||
# Labels are next-word index in word_dict with the same length as inputs | # Labels are next-word index in word_dict with the same length as inputs | ||||
input_label = np.array([word_dict[w] for w in raw_text[1:]] + [word_dict[raw_text[-1]]]) | |||||
data = self.DataTuple(feature=input_vec, label=input_label) | |||||
feature_input = torch.from_numpy(data.feature) | |||||
label_input = torch.from_numpy(data.label) | |||||
num_seq = feature_input.size()[0] // self.lstm_seq_len | |||||
feature_input = feature_input[:num_seq * self.lstm_seq_len, :] | |||||
feature_input = feature_input.view(-1, self.lstm_seq_len, self.max_word_len + 2) | |||||
input_label = np.array([word_dict[w] for w in words[1:]] + [word_dict[words[-1]]]) | |||||
feature_input = torch.from_numpy(input_vec) | |||||
label_input = torch.from_numpy(input_label) | |||||
return feature_input, label_input | return feature_input, label_input | ||||
def mode(self, test=False): | def mode(self, test=False): | ||||
@@ -98,6 +96,15 @@ class CharLM(BaseModel): | |||||
self.model.train() | self.model.train() | ||||
def data_forward(self, x): | def data_forward(self, x): | ||||
""" | |||||
:param x: Tensor of size [lstm_batch_size, lstm_seq_len, max_word_len+2] | |||||
:return: Tensor of size [num_words, ?] | |||||
""" | |||||
# additional processing of inputs after batching | |||||
num_seq = x.size()[0] // self.lstm_seq_len | |||||
x = x[:num_seq * self.lstm_seq_len, :] | |||||
x = x.view(-1, self.lstm_seq_len, self.max_word_len + 2) | |||||
# detach hidden state of LSTM from last batch | # detach hidden state of LSTM from last batch | ||||
hidden = [state.detach() for state in self.hidden] | hidden = [state.detach() for state in self.hidden] | ||||
output, self.hidden = self.model(to_var(x), hidden) | output, self.hidden = self.model(to_var(x), hidden) | ||||
@@ -105,13 +112,13 @@ class CharLM(BaseModel): | |||||
def grad_backward(self): | def grad_backward(self): | ||||
self.model.zero_grad() | self.model.zero_grad() | ||||
self.loss.backward() | |||||
self._loss.backward() | |||||
torch.nn.utils.clip_grad_norm(self.model.parameters(), 5, norm_type=2) | torch.nn.utils.clip_grad_norm(self.model.parameters(), 5, norm_type=2) | ||||
self.optimizer.step() | self.optimizer.step() | ||||
def loss(self, predict, truth): | |||||
self.loss = self.criterion(predict, to_var(truth)) | |||||
return self.loss | |||||
def get_loss(self, predict, truth): | |||||
self._loss = self.criterion(predict, to_var(truth)) | |||||
return self._loss.data # No pytorch data structure exposed outsides | |||||
def define_optimizer(self): | def define_optimizer(self): | ||||
# redefine optimizer for every new epoch | # redefine optimizer for every new epoch | ||||
@@ -123,12 +130,13 @@ class CharLM(BaseModel): | |||||
def preprocess(self, all_text_files): | def preprocess(self, all_text_files): | ||||
word_dict, char_dict = create_word_char_dict(all_text_files) | word_dict, char_dict = create_word_char_dict(all_text_files) | ||||
self.num_char = len(char_dict) | |||||
num_char = len(char_dict) | |||||
self.vocab_size = len(word_dict) | self.vocab_size = len(word_dict) | ||||
char_dict["BOW"] = self.num_char + 1 | |||||
char_dict["EOW"] = self.num_char + 2 | |||||
char_dict["BOW"] = num_char + 1 | |||||
char_dict["EOW"] = num_char + 2 | |||||
char_dict["PAD"] = 0 | char_dict["PAD"] = 0 | ||||
# dict of (int, string) | |||||
self.num_char = num_char + 3 | |||||
# char_dict is a dict of (int, string), int counting from 0 to 47 | |||||
reverse_word_dict = {value: key for key, value in word_dict.items()} | reverse_word_dict = {value: key for key, value in word_dict.items()} | ||||
self.max_word_len = max([len(word) for word in word_dict]) | self.max_word_len = max([len(word) for word in word_dict]) | ||||
objects = { | objects = { | ||||
@@ -194,7 +202,7 @@ def create_word_char_dict(*file_name): | |||||
def to_var(x): | def to_var(x): | ||||
if torch.cuda.is_available(): | |||||
if torch.cuda.is_available() and USE_GPU: | |||||
x = x.cuda() | x = x.cuda() | ||||
return Variable(x) | return Variable(x) | ||||
@@ -246,7 +254,8 @@ class charLM(nn.Module): | |||||
self.convolutions = [] | self.convolutions = [] | ||||
# list of tuples: (the number of filter, width) | # list of tuples: (the number of filter, width) | ||||
self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)] | |||||
# self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)] | |||||
self.filter_num_width = [(25, 1), (50, 2), (75, 3)] | |||||
for out_channel, filter_width in self.filter_num_width: | for out_channel, filter_width in self.filter_num_width: | ||||
self.convolutions.append( | self.convolutions.append( | ||||
@@ -304,7 +313,7 @@ class charLM(nn.Module): | |||||
# [num_seq*seq_len, max_word_len+2, char_emb_dim] | # [num_seq*seq_len, max_word_len+2, char_emb_dim] | ||||
x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3) | x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3) | ||||
# [num_seq*seq_len, 1, max_word_len+2, char_emb_dim] | |||||
# [num_seq*seq_len, 1, char_emb_dim, max_word_len+2] | |||||
x = self.conv_layers(x) | x = self.conv_layers(x) | ||||
# [num_seq*seq_len, total_num_filters] | # [num_seq*seq_len, total_num_filters] | ||||
@@ -6,10 +6,11 @@ from model.char_language_model import CharLM | |||||
def test_charlm(): | def test_charlm(): | ||||
train_config = Trainer.TrainConfig(epochs=1, validate=True, save_when_better=True, | train_config = Trainer.TrainConfig(epochs=1, validate=True, save_when_better=True, | ||||
log_per_step=10, log_validation=True) | |||||
log_per_step=10, log_validation=True, batch_size=160) | |||||
trainer = Trainer(train_config) | trainer = Trainer(train_config) | ||||
model = CharLM() | |||||
model = CharLM(lstm_batch_size=16, lstm_seq_len=10) | |||||
train_data = ToyLoader0("load_train", "./data_for_tests/charlm.txt").load() | train_data = ToyLoader0("load_train", "./data_for_tests/charlm.txt").load() | ||||
valid_data = ToyLoader0("load_valid", "./data_for_tests/charlm.txt").load() | valid_data = ToyLoader0("load_valid", "./data_for_tests/charlm.txt").load() | ||||
@@ -18,7 +19,7 @@ def test_charlm(): | |||||
trainer.save_model(model) | trainer.save_model(model) | ||||
test_config = Tester.TestConfig(save_output=True, validate_in_training=True, | test_config = Tester.TestConfig(save_output=True, validate_in_training=True, | ||||
save_dev_input=True, save_loss=True, batch_size=16) | |||||
save_dev_input=True, save_loss=True, batch_size=160) | |||||
tester = Tester(test_config) | tester = Tester(test_config) | ||||
test_data = ToyLoader0("load_test", "./data_for_tests/charlm.txt").load() | test_data = ToyLoader0("load_test", "./data_for_tests/charlm.txt").load() | ||||