Browse Source

READY TO GO: test_charlm tested

tags/v0.1.0
FengZiYjun 6 years ago
parent
commit
52b1337e8b
6 changed files with 70 additions and 56 deletions
  1. +4
    -4
      action/action.py
  2. +13
    -11
      action/tester.py
  3. +9
    -7
      action/trainer.py
  4. +2
    -2
      model/base_model.py
  5. +38
    -29
      model/char_language_model.py
  6. +4
    -3
      tests/test_charlm.py

+ 4
- 4
action/action.py View File

@@ -27,8 +27,8 @@ class Action(object):
:return iteration:int, the number of step in each epoch :return iteration:int, the number of step in each epoch
generator:generator, to generate batch inputs generator:generator, to generate batch inputs
""" """
n_samples = X.shape[0]
num_iter = n_samples / batch_size
n_samples = X.size()[0]
num_iter = n_samples // batch_size
if Y is None: if Y is None:
generator = self._batch_generate(batch_size, num_iter, X) generator = self._batch_generate(batch_size, num_iter, X)
else: else:
@@ -39,8 +39,8 @@ class Action(object):
def _batch_generate(batch_size, num_iter, *data): def _batch_generate(batch_size, num_iter, *data):
for step in range(num_iter): for step in range(num_iter):
start = batch_size * step start = batch_size * step
end = (batch_size + 1) * step
yield tuple([x[start:end, :] for x in data])
end = batch_size * (step + 1)
yield tuple([x[start:end] for x in data])


def make_log(self, *args): def make_log(self, *args):
return "log" return "log"

+ 13
- 11
action/tester.py View File

@@ -27,17 +27,18 @@ class Tester(Action):
self.batch_size = test_args.batch_size self.batch_size = test_args.batch_size


def test(self, network, data): def test(self, network, data):
print("testing")
network.mode(test=True) # turn on the testing mode network.mode(test=True) # turn on the testing mode

if not self.save_dev_input:
# transform into network input and label
valid_x, valid_y = network.prepare_input(data)
if self.validate_in_training:
if self.save_dev_input:
if self.valid_x is None:
valid_x, valid_y = network.prepare_input(data)
self.valid_x = valid_x self.valid_x = valid_x
self.valid_y = valid_y self.valid_y = valid_y
else:
valid_x = self.valid_x
valid_y = self.valid_y
else: else:
valid_x = self.valid_x
valid_y = self.valid_y
valid_x, valid_y = network.prepare_input(data)


# split into batches by self.batch_size # split into batches by self.batch_size
iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y) iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y)
@@ -53,10 +54,10 @@ class Tester(Action):
# forward pass from tests input to predicted output # forward pass from tests input to predicted output
prediction = network.data_forward(batch_x) prediction = network.data_forward(batch_x)


loss = network.loss(batch_y, prediction)
loss = network.get_loss(prediction, batch_y)


if self.save_output: if self.save_output:
batch_output.append(prediction)
batch_output.append(prediction.data)
if self.save_loss: if self.save_loss:
loss_history.append(loss) loss_history.append(loss)
self.log(self.make_log(step, loss)) self.log(self.make_log(step, loss))
@@ -74,9 +75,10 @@ class Tester(Action):
def result(self): def result(self):
return self.output return self.output


def make_output(self, batch_output):
@staticmethod
def make_output(batch_outputs):
# construct full prediction with batch outputs # construct full prediction with batch outputs
return np.concatenate((batch_output[0], batch_output[1]), axis=0)
return np.concatenate(batch_outputs, axis=0)


def load_config(self, args): def load_config(self, args):
raise NotImplementedError raise NotImplementedError


+ 9
- 7
action/trainer.py View File

@@ -8,7 +8,8 @@ class Trainer(Action):
""" """
Trainer for common training logic of all models Trainer for common training logic of all models
""" """
TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better", "log_per_step", "log_validation"])
TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better",
"log_per_step", "log_validation", "batch_size"])


def __init__(self, train_args): def __init__(self, train_args):
""" """
@@ -20,6 +21,7 @@ class Trainer(Action):
self.save_when_better = train_args.save_when_better self.save_when_better = train_args.save_when_better
self.log_per_step = train_args.log_per_step self.log_per_step = train_args.log_per_step
self.log_validation = train_args.log_validation self.log_validation = train_args.log_validation
self.batch_size = train_args.batch_size


def train(self, network, train_data, dev_data): def train(self, network, train_data, dev_data):
""" """
@@ -28,20 +30,19 @@ class Trainer(Action):
:param dev_data: raw data for validation :param dev_data: raw data for validation
:return: :return:
""" """
train_x, train_y = network.prepare_input(train_data.train_set, train_data.train_label)
train_x, train_y = network.prepare_input(train_data)


network.mode(test=False) # turn on the train mode

iterations, train_batch_generator = self.batchify(train_x, train_y)
iterations, train_batch_generator = self.batchify(self.batch_size, train_x, train_y)


test_args = Tester.TestConfig(save_output=True, validate_in_training=True, test_args = Tester.TestConfig(save_output=True, validate_in_training=True,
save_dev_input=True, save_loss=True, batch_size=16)
save_dev_input=True, save_loss=True, batch_size=self.batch_size)
evaluator = Tester(test_args) evaluator = Tester(test_args)


best_loss = 1e10 best_loss = 1e10
loss_history = list() loss_history = list()


for epoch in range(self.n_epochs): for epoch in range(self.n_epochs):
network.mode(test=False) # turn on the train mode


network.define_optimizer() network.define_optimizer()
for step in range(iterations): for step in range(iterations):
@@ -49,10 +50,11 @@ class Trainer(Action):


prediction = network.data_forward(batch_x) prediction = network.data_forward(batch_x)


loss = network.loss(batch_y, prediction)
loss = network.get_loss(prediction, batch_y)
network.grad_backward() network.grad_backward()


if step % self.log_per_step == 0: if step % self.log_per_step == 0:
print("step ", step)
loss_history.append(loss) loss_history.append(loss)
self.log(self.make_log(epoch, step, loss)) self.log(self.make_log(epoch, step, loss))




+ 2
- 2
model/base_model.py View File

@@ -24,7 +24,7 @@ class BaseModel(object):
def grad_backward(self): def grad_backward(self):
raise NotImplementedError raise NotImplementedError


def loss(self, pred, truth):
def get_loss(self, pred, truth):
raise NotImplementedError raise NotImplementedError




@@ -50,7 +50,7 @@ class ToyModel(BaseModel):
def grad_backward(self): def grad_backward(self):
print("loss gradient backward") print("loss gradient backward")


def loss(self, pred, truth):
def get_loss(self, pred, truth):
self._loss = np.mean(np.square(pred - truth)) self._loss = np.mean(np.square(pred - truth))
return self._loss return self._loss




+ 38
- 29
model/char_language_model.py View File

@@ -10,6 +10,8 @@ from torch.autograd import Variable


from model.base_model import BaseModel from model.base_model import BaseModel


USE_GPU = True



class CharLM(BaseModel): class CharLM(BaseModel):


@@ -20,16 +22,16 @@ class CharLM(BaseModel):
""" """
DataTuple = namedtuple("DataTuple", ["feature", "label"]) DataTuple = namedtuple("DataTuple", ["feature", "label"])


def __init__(self):
def __init__(self, lstm_batch_size, lstm_seq_len):
super(CharLM, self).__init__() super(CharLM, self).__init__()
""" """
Settings: should come from config loader or pre-processing Settings: should come from config loader or pre-processing
""" """
self.word_embed_dim = 100
self.word_embed_dim = 300
self.char_embedding_dim = 15 self.char_embedding_dim = 15
self.cnn_batch_size = 40
self.lstm_seq_len = 10
self.lstm_batch_size = 4
self.cnn_batch_size = lstm_batch_size * lstm_seq_len
self.lstm_seq_len = lstm_seq_len
self.lstm_batch_size = lstm_batch_size
self.num_epoch = 10 self.num_epoch = 10
self.old_PPL = 100000 self.old_PPL = 100000
self.best_PPL = 100000 self.best_PPL = 100000
@@ -45,8 +47,9 @@ class CharLM(BaseModel):
self.data = None # named tuple to store all data set self.data = None # named tuple to store all data set
self.data_ready = False self.data_ready = False
self.criterion = nn.CrossEntropyLoss() self.criterion = nn.CrossEntropyLoss()
self.loss = None
self.use_gpu = False
self._loss = None
self.use_gpu = USE_GPU

# word_emb_dim == hidden_size / num of hidden units # word_emb_dim == hidden_size / num of hidden units
self.hidden = (to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim)), self.hidden = (to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim)),
to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim))) to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim)))
@@ -64,7 +67,7 @@ class CharLM(BaseModel):


def prepare_input(self, raw_text): def prepare_input(self, raw_text):
""" """
:param raw_text: raw input data
:param raw_text: raw input text consisting of words
:return: torch.Tensor, torch.Tensor :return: torch.Tensor, torch.Tensor
feature matrix, label vector feature matrix, label vector
This function is only called once in Trainer.train, but may called multiple times in Tester.test This function is only called once in Trainer.train, but may called multiple times in Tester.test
@@ -78,17 +81,12 @@ class CharLM(BaseModel):
max_word_len = self.max_word_len max_word_len = self.max_word_len
print("word/char dictionary built. Start making inputs.") print("word/char dictionary built. Start making inputs.")


input_vec = np.array(text2vec(raw_text, char_dict, max_word_len))
words = raw_text
input_vec = np.array(text2vec(words, char_dict, max_word_len))
# Labels are next-word index in word_dict with the same length as inputs # Labels are next-word index in word_dict with the same length as inputs
input_label = np.array([word_dict[w] for w in raw_text[1:]] + [word_dict[raw_text[-1]]])

data = self.DataTuple(feature=input_vec, label=input_label)
feature_input = torch.from_numpy(data.feature)
label_input = torch.from_numpy(data.label)
num_seq = feature_input.size()[0] // self.lstm_seq_len
feature_input = feature_input[:num_seq * self.lstm_seq_len, :]
feature_input = feature_input.view(-1, self.lstm_seq_len, self.max_word_len + 2)

input_label = np.array([word_dict[w] for w in words[1:]] + [word_dict[words[-1]]])
feature_input = torch.from_numpy(input_vec)
label_input = torch.from_numpy(input_label)
return feature_input, label_input return feature_input, label_input


def mode(self, test=False): def mode(self, test=False):
@@ -98,6 +96,15 @@ class CharLM(BaseModel):
self.model.train() self.model.train()


def data_forward(self, x): def data_forward(self, x):
"""
:param x: Tensor of size [lstm_batch_size, lstm_seq_len, max_word_len+2]
:return: Tensor of size [num_words, ?]
"""
# additional processing of inputs after batching
num_seq = x.size()[0] // self.lstm_seq_len
x = x[:num_seq * self.lstm_seq_len, :]
x = x.view(-1, self.lstm_seq_len, self.max_word_len + 2)

# detach hidden state of LSTM from last batch # detach hidden state of LSTM from last batch
hidden = [state.detach() for state in self.hidden] hidden = [state.detach() for state in self.hidden]
output, self.hidden = self.model(to_var(x), hidden) output, self.hidden = self.model(to_var(x), hidden)
@@ -105,13 +112,13 @@ class CharLM(BaseModel):


def grad_backward(self): def grad_backward(self):
self.model.zero_grad() self.model.zero_grad()
self.loss.backward()
self._loss.backward()
torch.nn.utils.clip_grad_norm(self.model.parameters(), 5, norm_type=2) torch.nn.utils.clip_grad_norm(self.model.parameters(), 5, norm_type=2)
self.optimizer.step() self.optimizer.step()


def loss(self, predict, truth):
self.loss = self.criterion(predict, to_var(truth))
return self.loss
def get_loss(self, predict, truth):
self._loss = self.criterion(predict, to_var(truth))
return self._loss.data # No pytorch data structure exposed outsides


def define_optimizer(self): def define_optimizer(self):
# redefine optimizer for every new epoch # redefine optimizer for every new epoch
@@ -123,12 +130,13 @@ class CharLM(BaseModel):


def preprocess(self, all_text_files): def preprocess(self, all_text_files):
word_dict, char_dict = create_word_char_dict(all_text_files) word_dict, char_dict = create_word_char_dict(all_text_files)
self.num_char = len(char_dict)
num_char = len(char_dict)
self.vocab_size = len(word_dict) self.vocab_size = len(word_dict)
char_dict["BOW"] = self.num_char + 1
char_dict["EOW"] = self.num_char + 2
char_dict["BOW"] = num_char + 1
char_dict["EOW"] = num_char + 2
char_dict["PAD"] = 0 char_dict["PAD"] = 0
# dict of (int, string)
self.num_char = num_char + 3
# char_dict is a dict of (int, string), int counting from 0 to 47
reverse_word_dict = {value: key for key, value in word_dict.items()} reverse_word_dict = {value: key for key, value in word_dict.items()}
self.max_word_len = max([len(word) for word in word_dict]) self.max_word_len = max([len(word) for word in word_dict])
objects = { objects = {
@@ -194,7 +202,7 @@ def create_word_char_dict(*file_name):




def to_var(x): def to_var(x):
if torch.cuda.is_available():
if torch.cuda.is_available() and USE_GPU:
x = x.cuda() x = x.cuda()
return Variable(x) return Variable(x)


@@ -246,7 +254,8 @@ class charLM(nn.Module):
self.convolutions = [] self.convolutions = []


# list of tuples: (the number of filter, width) # list of tuples: (the number of filter, width)
self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)]
# self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)]
self.filter_num_width = [(25, 1), (50, 2), (75, 3)]


for out_channel, filter_width in self.filter_num_width: for out_channel, filter_width in self.filter_num_width:
self.convolutions.append( self.convolutions.append(
@@ -304,7 +313,7 @@ class charLM(nn.Module):
# [num_seq*seq_len, max_word_len+2, char_emb_dim] # [num_seq*seq_len, max_word_len+2, char_emb_dim]


x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3) x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3)
# [num_seq*seq_len, 1, max_word_len+2, char_emb_dim]
# [num_seq*seq_len, 1, char_emb_dim, max_word_len+2]


x = self.conv_layers(x) x = self.conv_layers(x)
# [num_seq*seq_len, total_num_filters] # [num_seq*seq_len, total_num_filters]


+ 4
- 3
tests/test_charlm.py View File

@@ -6,10 +6,11 @@ from model.char_language_model import CharLM


def test_charlm(): def test_charlm():
train_config = Trainer.TrainConfig(epochs=1, validate=True, save_when_better=True, train_config = Trainer.TrainConfig(epochs=1, validate=True, save_when_better=True,
log_per_step=10, log_validation=True)
log_per_step=10, log_validation=True, batch_size=160)
trainer = Trainer(train_config) trainer = Trainer(train_config)


model = CharLM()
model = CharLM(lstm_batch_size=16, lstm_seq_len=10)

train_data = ToyLoader0("load_train", "./data_for_tests/charlm.txt").load() train_data = ToyLoader0("load_train", "./data_for_tests/charlm.txt").load()
valid_data = ToyLoader0("load_valid", "./data_for_tests/charlm.txt").load() valid_data = ToyLoader0("load_valid", "./data_for_tests/charlm.txt").load()


@@ -18,7 +19,7 @@ def test_charlm():
trainer.save_model(model) trainer.save_model(model)


test_config = Tester.TestConfig(save_output=True, validate_in_training=True, test_config = Tester.TestConfig(save_output=True, validate_in_training=True,
save_dev_input=True, save_loss=True, batch_size=16)
save_dev_input=True, save_loss=True, batch_size=160)
tester = Tester(test_config) tester = Tester(test_config)


test_data = ToyLoader0("load_test", "./data_for_tests/charlm.txt").load() test_data = ToyLoader0("load_test", "./data_for_tests/charlm.txt").load()


Loading…
Cancel
Save