@@ -1,3 +1,5 @@ | |||||
from saver.logger import Logger | |||||
class Action(object): | class Action(object): | ||||
""" | """ | ||||
@@ -6,7 +8,7 @@ class Action(object): | |||||
def __init__(self): | def __init__(self): | ||||
super(Action, self).__init__() | super(Action, self).__init__() | ||||
self.logger = None | |||||
self.logger = Logger("logger_output.txt") | |||||
def load_config(self, args): | def load_config(self, args): | ||||
raise NotImplementedError | raise NotImplementedError | ||||
@@ -14,27 +16,31 @@ class Action(object): | |||||
def load_dataset(self, args): | def load_dataset(self, args): | ||||
raise NotImplementedError | raise NotImplementedError | ||||
def log(self, args): | |||||
print("call logger.log") | |||||
def log(self, string): | |||||
self.logger.log(string) | |||||
def batchify(self, X, Y=None): | |||||
def batchify(self, batch_size, X, Y=None): | |||||
""" | """ | ||||
:param X: | |||||
:param Y: | |||||
:param batch_size: int | |||||
:param X: feature matrix of size [n_sample, m_feature] | |||||
:param Y: label vector of size [n_sample, 1] (optional) | |||||
:return iteration:int, the number of step in each epoch | :return iteration:int, the number of step in each epoch | ||||
generator:generator, to generate batch inputs | generator:generator, to generate batch inputs | ||||
""" | """ | ||||
data = X | |||||
if Y is not None: | |||||
data = [X, Y] | |||||
return 2, self._batch_generate(data) | |||||
def _batch_generate(self, data): | |||||
step = 10 | |||||
for i in range(2): | |||||
start = i * step | |||||
end = (i + 1) * step | |||||
yield data[0][start:end], data[1][start:end] | |||||
n_samples = X.shape[0] | |||||
num_iter = n_samples / batch_size | |||||
if Y is None: | |||||
generator = self._batch_generate(batch_size, num_iter, X) | |||||
else: | |||||
generator = self._batch_generate(batch_size, num_iter, X, Y) | |||||
return num_iter, generator | |||||
@staticmethod | |||||
def _batch_generate(batch_size, num_iter, *data): | |||||
for step in range(num_iter): | |||||
start = batch_size * step | |||||
end = (batch_size + 1) * step | |||||
yield tuple([x[start:end, :] for x in data]) | |||||
def make_log(self, *args): | def make_log(self, *args): | ||||
return "log" | return "log" |
@@ -1,3 +1,5 @@ | |||||
from collections import namedtuple | |||||
import numpy as np | import numpy as np | ||||
from action.action import Action | from action.action import Action | ||||
@@ -6,22 +8,39 @@ from action.action import Action | |||||
class Tester(Action): | class Tester(Action): | ||||
"""docstring for Tester""" | """docstring for Tester""" | ||||
TestConfig = namedtuple("config", ["validate_in_training", "save_dev_input", "save_output", | |||||
"save_loss", "batch_size"]) | |||||
def __init__(self, test_args): | def __init__(self, test_args): | ||||
""" | """ | ||||
:param test_args: named tuple | :param test_args: named tuple | ||||
""" | """ | ||||
super(Tester, self).__init__() | super(Tester, self).__init__() | ||||
self.test_args = test_args | |||||
# self.args_dict = {name: value for name, value in self.test_args.__dict__.iteritems()} | |||||
self.mean_loss = None | |||||
self.validate_in_training = test_args.validate_in_training | |||||
self.save_dev_input = test_args.save_dev_input | |||||
self.valid_x = None | |||||
self.valid_y = None | |||||
self.save_output = test_args.save_output | |||||
self.output = None | self.output = None | ||||
self.save_loss = test_args.save_loss | |||||
self.mean_loss = None | |||||
self.batch_size = test_args.batch_size | |||||
def test(self, network, data): | def test(self, network, data): | ||||
# transform into network input and label | |||||
X, Y = network.prepare_input(data) | |||||
network.mode(test=True) # turn on the testing mode | |||||
if not self.save_dev_input: | |||||
# transform into network input and label | |||||
valid_x, valid_y = network.prepare_input(data) | |||||
if self.validate_in_training: | |||||
self.valid_x = valid_x | |||||
self.valid_y = valid_y | |||||
else: | |||||
valid_x = self.valid_x | |||||
valid_y = self.valid_y | |||||
# split into batches by self.batch_size | # split into batches by self.batch_size | ||||
iterations, test_batch_generator = self.batchify(X, Y) | |||||
iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y) | |||||
batch_output = list() | batch_output = list() | ||||
loss_history = list() | loss_history = list() | ||||
@@ -33,16 +52,19 @@ class Tester(Action): | |||||
# forward pass from tests input to predicted output | # forward pass from tests input to predicted output | ||||
prediction = network.data_forward(batch_x) | prediction = network.data_forward(batch_x) | ||||
batch_output.append(prediction) | |||||
# get the loss | |||||
loss = network.loss(batch_y, prediction) | loss = network.loss(batch_y, prediction) | ||||
loss_history.append(loss) | |||||
self.log(self.make_log(step, loss)) | |||||
if self.save_output: | |||||
batch_output.append(prediction) | |||||
if self.save_loss: | |||||
loss_history.append(loss) | |||||
self.log(self.make_log(step, loss)) | |||||
self.mean_loss = np.mean(np.array(loss_history)) | |||||
self.output = self.make_output(batch_output) | |||||
if self.save_loss: | |||||
self.mean_loss = np.mean(np.array(loss_history)) | |||||
if self.save_output: | |||||
self.output = self.make_output(batch_output) | |||||
@property | @property | ||||
def loss(self): | def loss(self): | ||||
@@ -55,3 +77,9 @@ class Tester(Action): | |||||
def make_output(self, batch_output): | def make_output(self, batch_output): | ||||
# construct full prediction with batch outputs | # construct full prediction with batch outputs | ||||
return np.concatenate((batch_output[0], batch_output[1]), axis=0) | return np.concatenate((batch_output[0], batch_output[1]), axis=0) | ||||
def load_config(self, args): | |||||
raise NotImplementedError | |||||
def load_dataset(self, args): | |||||
raise NotImplementedError |
@@ -1,3 +1,5 @@ | |||||
from collections import namedtuple | |||||
from .action import Action | from .action import Action | ||||
from .tester import Tester | from .tester import Tester | ||||
@@ -6,32 +8,42 @@ class Trainer(Action): | |||||
""" | """ | ||||
Trainer for common training logic of all models | Trainer for common training logic of all models | ||||
""" | """ | ||||
TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better", "log_per_step", "log_validation"]) | |||||
def __init__(self, train_args): | def __init__(self, train_args): | ||||
""" | """ | ||||
:param train_args: namedtuple | :param train_args: namedtuple | ||||
""" | """ | ||||
super(Trainer, self).__init__() | super(Trainer, self).__init__() | ||||
self.train_args = train_args | |||||
# self.args_dict = {name: value for name, value in self.train_args.__dict__.iteritems()} | |||||
self.n_epochs = self.train_args.epochs | |||||
self.validate = self.train_args.validate | |||||
self.save_when_better = self.train_args.save_when_better | |||||
self.n_epochs = train_args.epochs | |||||
self.validate = train_args.validate | |||||
self.save_when_better = train_args.save_when_better | |||||
self.log_per_step = train_args.log_per_step | |||||
self.log_validation = train_args.log_validation | |||||
def train(self, network, train_data, dev_data): | |||||
""" | |||||
:param network: the model controller | |||||
:param train_data: raw data for training | |||||
:param dev_data: raw data for validation | |||||
:return: | |||||
""" | |||||
train_x, train_y = network.prepare_input(train_data.train_set, train_data.train_label) | |||||
def train(self, network, data, dev_data): | |||||
train_x, train_y = network.prepare_input(data.train_set, data.train_label) | |||||
valid_x, valid_y = network.prepare_input(dev_data.valid_set, dev_data.valid_label) | |||||
network.mode(test=False) # turn on the train mode | |||||
iterations, train_batch_generator = self.batchify(train_x, train_y) | iterations, train_batch_generator = self.batchify(train_x, train_y) | ||||
loss_history = list() | |||||
network.mode(test=False) | |||||
test_args = "..." | |||||
test_args = Tester.TestConfig(save_output=True, validate_in_training=True, | |||||
save_dev_input=True, save_loss=True, batch_size=16) | |||||
evaluator = Tester(test_args) | evaluator = Tester(test_args) | ||||
best_loss = 1e10 | best_loss = 1e10 | ||||
loss_history = list() | |||||
for epoch in range(self.n_epochs): | for epoch in range(self.n_epochs): | ||||
network.define_optimizer() | |||||
for step in range(iterations): | for step in range(iterations): | ||||
batch_x, batch_y = train_batch_generator.__next__() | batch_x, batch_y = train_batch_generator.__next__() | ||||
@@ -39,14 +51,18 @@ class Trainer(Action): | |||||
loss = network.loss(batch_y, prediction) | loss = network.loss(batch_y, prediction) | ||||
network.grad_backward() | network.grad_backward() | ||||
loss_history.append(loss) | |||||
self.log(self.make_log(epoch, step, loss)) | |||||
if step % self.log_per_step == 0: | |||||
loss_history.append(loss) | |||||
self.log(self.make_log(epoch, step, loss)) | |||||
#################### evaluate over dev set ################### | #################### evaluate over dev set ################### | ||||
if self.validate: | if self.validate: | ||||
evaluator.test(network, [valid_x, valid_y]) | |||||
# give all controls to tester | |||||
evaluator.test(network, dev_data) | |||||
self.log(self.make_valid_log(epoch, evaluator.loss)) | |||||
if self.log_validation: | |||||
self.log(self.make_valid_log(epoch, evaluator.loss)) | |||||
if evaluator.loss < best_loss: | if evaluator.loss < best_loss: | ||||
best_loss = evaluator.loss | best_loss = evaluator.loss | ||||
if self.save_when_better: | if self.save_when_better: | ||||
@@ -54,15 +70,20 @@ class Trainer(Action): | |||||
# finish training | # finish training | ||||
@staticmethod | |||||
def prepare_training(network, data): | |||||
return network.prepare_training(data) | |||||
def make_log(self, *args): | def make_log(self, *args): | ||||
print("logged") | |||||
return "make a log" | |||||
def make_valid_log(self, *args): | def make_valid_log(self, *args): | ||||
print("logged") | |||||
return "make a valid log" | |||||
def save_model(self, model): | def save_model(self, model): | ||||
print("model saved") | |||||
model.save() | |||||
def load_data(self, data_name): | |||||
print("load data") | |||||
def load_config(self, args): | |||||
raise NotImplementedError | |||||
def load_dataset(self, args): | |||||
raise NotImplementedError |
@@ -13,3 +13,19 @@ class BaseLoader(object): | |||||
with open(self.data_path, "r", encoding="utf-8") as f: | with open(self.data_path, "r", encoding="utf-8") as f: | ||||
text = f.read() | text = f.read() | ||||
return text | return text | ||||
class ToyLoader0(BaseLoader): | |||||
""" | |||||
For charLM | |||||
""" | |||||
def __init__(self, name, path): | |||||
super(ToyLoader0, self).__init__(name, path) | |||||
def load(self): | |||||
with open(self.data_path, 'r') as f: | |||||
corpus = f.read().lower() | |||||
import re | |||||
corpus = re.sub(r"<unk>", "unk", corpus) | |||||
return corpus.split() |
@@ -14,6 +14,8 @@ from model.base_model import BaseModel | |||||
class CharLM(BaseModel): | class CharLM(BaseModel): | ||||
""" | """ | ||||
Controller of the Character-level Neural Language Model | Controller of the Character-level Neural Language Model | ||||
To do: | |||||
- where the data goes, call data savers. | |||||
""" | """ | ||||
def __init__(self): | def __init__(self): | ||||
@@ -28,12 +30,15 @@ class CharLM(BaseModel): | |||||
self.lstm_batch_size = 20 | self.lstm_batch_size = 20 | ||||
self.vocab_size = 100 | self.vocab_size = 100 | ||||
self.num_char = 150 | self.num_char = 150 | ||||
self.max_word_len = 10 | |||||
self.num_epoch = 10 | |||||
self.old_PPL = 100000 | |||||
self.best_PPL = 100000 | |||||
self.data = None # named tuple to store all data set | self.data = None # named tuple to store all data set | ||||
self.data_ready = False | self.data_ready = False | ||||
self.criterion = nn.CrossEntropyLoss() | self.criterion = nn.CrossEntropyLoss() | ||||
self.loss = None | self.loss = None | ||||
self.optimizer = optim.SGD(self.parameters(), lr=learning_rate, momentum=0.85) | |||||
self.use_gpu = False | self.use_gpu = False | ||||
# word_emb_dim == hidden_size / num of hidden units | # word_emb_dim == hidden_size / num of hidden units | ||||
self.hidden = (to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim)), | self.hidden = (to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim)), | ||||
@@ -44,10 +49,17 @@ class CharLM(BaseModel): | |||||
self.vocab_size, | self.vocab_size, | ||||
self.num_char, | self.num_char, | ||||
use_gpu=self.use_gpu) | use_gpu=self.use_gpu) | ||||
for param in self.model.parameters(): | |||||
nn.init.uniform(param.data, -0.05, 0.05) | |||||
self.learning_rate = 0.1 | |||||
self.optimizer = None | |||||
def prepare_input(self, raw_text): | def prepare_input(self, raw_text): | ||||
""" | """ | ||||
Do some preparation jobs. Transform raw data into input vectors. | |||||
:param raw_text: raw input data | |||||
:return: torch.Tensor, torch.Tensor | |||||
feature matrix, label vector | |||||
""" | """ | ||||
if not self.data_ready: | if not self.data_ready: | ||||
# To do: These need to be dropped out from here. (below) | # To do: These need to be dropped out from here. (below) | ||||
@@ -82,10 +94,20 @@ class CharLM(BaseModel): | |||||
DataTuple = namedtuple("DataTuple", ["feature", "label"]) | DataTuple = namedtuple("DataTuple", ["feature", "label"]) | ||||
self.data = DataTuple(feature=input_vec, label=input_label) | self.data = DataTuple(feature=input_vec, label=input_label) | ||||
return self.data.feature, self.data.label | |||||
feature_input = torch.from_numpy(self.data.feature) | |||||
label_input = torch.from_numpy(self.data.label) | |||||
num_seq = feature_input.size()[0] // self.lstm_seq_len | |||||
feature_input = feature_input[:num_seq * self.lstm_seq_len, :] | |||||
feature_input = feature_input.view(-1, self.lstm_seq_len, self.max_word_len + 2) | |||||
self.num_iter_per_epoch = feature_input.size()[0] // self.lstm_batch_size | |||||
return feature_input, label_input | |||||
def mode(self, test=False): | def mode(self, test=False): | ||||
raise NotImplementedError | |||||
if test: | |||||
self.model.eval() | |||||
else: | |||||
self.model.train() | |||||
def data_forward(self, x): | def data_forward(self, x): | ||||
# detach hidden state of LSTM from last batch | # detach hidden state of LSTM from last batch | ||||
@@ -103,6 +125,13 @@ class CharLM(BaseModel): | |||||
self.loss = self.criterion(predict, to_var(truth)) | self.loss = self.criterion(predict, to_var(truth)) | ||||
return self.loss | return self.loss | ||||
def define_optimizer(self): | |||||
# redefine optimizer for every new epoch | |||||
self.optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.85) | |||||
def save(self): | |||||
torch.save(self.model, "cache/model.pkl") | |||||
@staticmethod | @staticmethod | ||||
def preprocess(): | def preprocess(): | ||||
word_dict, char_dict = create_word_char_dict("valid.txt", "train.txt", "tests.txt") | word_dict, char_dict = create_word_char_dict("valid.txt", "train.txt", "tests.txt") | ||||
@@ -122,23 +151,6 @@ class CharLM(BaseModel): | |||||
torch.save(objects, "cache/prep.pt") | torch.save(objects, "cache/prep.pt") | ||||
print("Preprocess done.") | print("Preprocess done.") | ||||
def forward(self, x, hidden): | |||||
lstm_batch_size = x.size()[0] | |||||
lstm_seq_len = x.size()[1] | |||||
x = x.contiguous().view(-1, x.size()[2]) | |||||
x = self.char_embed(x) | |||||
x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3) | |||||
x = self.conv_layers(x) | |||||
x = self.batch_norm(x) | |||||
x = self.highway1(x) | |||||
x = self.highway2(x) | |||||
x = x.contiguous().view(lstm_batch_size, lstm_seq_len, -1) | |||||
x, hidden = self.lstm(x, hidden) | |||||
x = self.dropout(x) | |||||
x = x.contiguous().view(lstm_batch_size * lstm_seq_len, -1) | |||||
x = self.linear(x) | |||||
return x, hidden | |||||
""" | """ | ||||
Global Functions | Global Functions | ||||
@@ -8,4 +8,5 @@ class Logger(BaseSaver): | |||||
super(Logger, self).__init__(save_path) | super(Logger, self).__init__(save_path) | ||||
def log(self, string): | def log(self, string): | ||||
raise NotImplementedError | |||||
with open(self.save_path, "a") as f: | |||||
f.write(string) |
@@ -0,0 +1,30 @@ | |||||
from action.tester import Tester | |||||
from action.trainer import Trainer | |||||
from loader.base_loader import ToyLoader0 | |||||
from model.char_language_model import CharLM | |||||
def test_charlm(): | |||||
train_config = Trainer.TrainConfig(epochs=1, validate=True, save_when_better=True, | |||||
log_per_step=10, log_validation=True) | |||||
trainer = Trainer(train_config) | |||||
model = CharLM() | |||||
train_data = ToyLoader0("load_train", "path_to_train_file").load() | |||||
valid_data = ToyLoader0("load_valid", "path_to_valid_file").load() | |||||
trainer.train(model, train_data, valid_data) | |||||
trainer.save_model(model) | |||||
test_config = Tester.TestConfig(save_output=True, validate_in_training=True, | |||||
save_dev_input=True, save_loss=True, batch_size=16) | |||||
tester = Tester(test_config) | |||||
test_data = ToyLoader0("load_test", "path_to_test").load() | |||||
tester.test(model, test_data) | |||||
if __name__ == "__main__": | |||||
test_charlm() |