From b93cf0869122058dece64d732ba8128f1deca460 Mon Sep 17 00:00:00 2001 From: HENRY L Date: Mon, 2 Jul 2018 01:40:17 +0800 Subject: [PATCH 1/3] initial commit --- fastNLP/modules/prototype/Word2Idx.py | 62 +++++++++++++ fastNLP/modules/prototype/aggregation.py | 41 +++++++++ fastNLP/modules/prototype/dataloader.py | 82 +++++++++++++++++ fastNLP/modules/prototype/embedding.py | 23 +++++ fastNLP/modules/prototype/encoder.py | 25 ++++++ fastNLP/modules/prototype/example.py | 108 +++++++++++++++++++++++ fastNLP/modules/prototype/predict.py | 25 ++++++ 7 files changed, 366 insertions(+) create mode 100644 fastNLP/modules/prototype/Word2Idx.py create mode 100644 fastNLP/modules/prototype/aggregation.py create mode 100644 fastNLP/modules/prototype/dataloader.py create mode 100644 fastNLP/modules/prototype/embedding.py create mode 100644 fastNLP/modules/prototype/encoder.py create mode 100644 fastNLP/modules/prototype/example.py create mode 100644 fastNLP/modules/prototype/predict.py diff --git a/fastNLP/modules/prototype/Word2Idx.py b/fastNLP/modules/prototype/Word2Idx.py new file mode 100644 index 00000000..544126be --- /dev/null +++ b/fastNLP/modules/prototype/Word2Idx.py @@ -0,0 +1,62 @@ +import collections +import pickle + +class Word2Idx(): + """ + Build a word index according to word frequency. + If "min_freq" is given, then only words with a frequncy not lesser than min_freq will be kept. + If "max_num" is given, then at most the most frequent $max_num words will be kept. + "words" should be a list [ w_1,w_2,...,w_i,...,w_n ] where each w_i is a string representing a word. + + num is the size of the lookup table. + w2i is a lookup table assigning each word an index. + Note that index 0 will be returned for any unregistered words. + i2w is a vector which serves as an invert mapping of w2i. + Token "" will be returned for index 0 + e.g. i2w[w2i["word"]] == "word" + """ + def __init__(self): + self.__w2i = dict() + self.__i2w = [] + self.num = 0 + + def build(self, words, min_freq=0, max_num=None): + """build a model from words""" + counter = collections.Counter(words) + word_set = set(words) + if max_num is not None: + most_common = counter.most_common(min(len(word_set), max_num - 1)) + else: + most_common = counter.most_common() + self.__w2i = dict((w[0],i + 1) for i,w in enumerate(most_common) if w[1] >= min_freq) + self.__w2i[""] = 0 + self.__i2w = [""] + [ w[0] for w in most_common if w[1] >= min_freq ] + self.num = len(self.__i2w) + + def w2i(self,word): + """word to index""" + if word in self.__w2i: + return self.__w2i[word] + return 0 + + def i2w(self,idx): + """index to word""" + if idx >= self.num: + raise Exception("out of range\n") + return self.__i2w[idx] + + def save(self,addr): + """save the model to a file with address "addr" """ + f = open(addr,"wb") + pickle.dump([self.__i2w, self.__w2i, self.num], f) + f.close() + + def load(self,addr): + """load a model from a file with address "addr" """ + f = open(addr,"rb") + paras = pickle.load(f) + self.__i2w, self.__w2i, self.num = paras[0], paras[1], paras[2] + f.close() + + + diff --git a/fastNLP/modules/prototype/aggregation.py b/fastNLP/modules/prototype/aggregation.py new file mode 100644 index 00000000..e87862b8 --- /dev/null +++ b/fastNLP/modules/prototype/aggregation.py @@ -0,0 +1,41 @@ +import torch +import torch.nn as nn + +class Selfattention(nn.Module): + """ + Self Attention Module. + + Args: + input_size : the size for the input vector + d_a : the width of weight matrix + r : the number of encoded vectors + """ + def __init__(self, input_size, d_a, r): + super(Selfattention, self).__init__() + self.W_s1 = nn.Parameter(torch.randn(d_a, input_size), requires_grad=True) + self.W_s2 = nn.Parameter(torch.randn(r, d_a), requires_grad=True) + self.softmax = nn.Softmax(dim=2) + self.tanh = nn.Tanh() + + def penalization(self, A): + """ + compute the penalization term for attention module + """ + if self.W_s1.is_cuda: + I = Variable(torch.eye(A.size(1)).cuda(), requires_grad=False) + else: + I = Variable(torch.eye(A.size(1)), requires_grad=False) + M = torch.matmul(A, torch.transpose(A, 1, 2)) - I + M = M.view(M.size(0), -1) + return torch.sum(M ** 2, dim=1) + + def forward(self, x): + inter = self.tanh(torch.matmul(self.W_s1, torch.transpose(x, 1, 2))) + A = self.softmax(torch.matmul(self.W_s2, inter)) + out = torch.matmul(A, H) + out = out.view(out.size(0), -1) + penalty = self.penalization(A) + return out, penalty + +if __name__ == "__main__": + model = Selfattention(100, 10, 20) diff --git a/fastNLP/modules/prototype/dataloader.py b/fastNLP/modules/prototype/dataloader.py new file mode 100644 index 00000000..a7eafdc2 --- /dev/null +++ b/fastNLP/modules/prototype/dataloader.py @@ -0,0 +1,82 @@ +import random +import pickle +import torch +import numpy as np +from torch.autograd import Variable + +def float_wrapper(x, requires_grad=True, using_cuda=True): + """ + transform float type list to pytorch variable + """ + if using_cuda==True: + return Variable(torch.FloatTensor(x).cuda(), requires_grad=requires_grad) + else: + return Variable(torch.FloatTensor(x), requires_grad=requires_grad) + +def long_wrapper(x, requires_grad=True, using_cuda=True): + """ + transform long type list to pytorch variable + """ + if using_cuda==True: + return Variable(torch.LongTensor(x).cuda(), requires_grad=requires_grad) + else: + return Variable(torch.LongTensor(x), requires_grad=requires_grad) + +def pad(X, using_cuda): + """ + zero-pad sequnces to same length then pack them together + """ + maxlen = max([x.size(0) for x in X]) + Y = [] + for x in X: + padlen = maxlen - x.size(0) + if padlen > 0: + if using_cuda: + paddings = torch.zeros(padlen).cuda() + else: + paddings = torch.zeros(padlen) + x_ = torch.cat(x, paddings) + Y.append(x_) + else: + Y.append(x) + return torch.stack(Y) + +class DataLoader(object): + """ + load data with form {"feature", "class"} + + Args: + fdir : data file address + batch_size : batch_size + shuffle : if True, shuffle dataset every epoch + using_cuda : if True, return tensors on GPU + """ + def __init__(self, fdir, batch_size, shuffle=True, using_cuda=True): + with open(fdir, "rb") as f: + self.data = pickle.load(f) + self.batch_size = batch_size + self.num = len(self.data) + self.count = 0 + self.iters = int(self.num / batch_size) + self.shuffle = shuffle + self.using_cuda = using_cuda + + def __iter__(self): + return self + + def __next__(self): + if self.count == self.iters: + self.count = 0 + if self.shuffle: + random.shuffle(self.data) + raise StopIteration() + else: + X = self.data[self.count * self.batch_size : (self.count + 1) * self.batch_size] + self.count += 1 + X = [long_wrapper(x["sent"], using_cuda=self.using_cuda) for x in X] + X = pad(X, self.using_cuda) + y = [long_wrapper(x["class"], using_cuda=self.using_cuda) for x in X] + y = torch.stack(y) + return {"feature" : X, "class" : y} + + diff --git a/fastNLP/modules/prototype/embedding.py b/fastNLP/modules/prototype/embedding.py new file mode 100644 index 00000000..1ee88a92 --- /dev/null +++ b/fastNLP/modules/prototype/embedding.py @@ -0,0 +1,23 @@ +import torch +import torch.nn as nn + +class Lookuptable(nn.Module): + """ + A simple lookup table + + Args: + nums : the size of the lookup table + dims : the size of each vector + padding_idx : pads the tensor with zeros whenever it encounters this index + sparse : If True, gradient matrix will be a sparse tensor. In this case, + only optim.SGD(cuda and cpu) and optim.Adagrad(cpu) can be used + """ + def __init__(self, nums, dims, padding_idx=0, sparse=False): + super(Lookuptable, self).__init__() + self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse) + + def forward(self, x): + return self.embed(x) + +if __name__ == "__main__": + model = Lookuptable(10, 20) diff --git a/fastNLP/modules/prototype/encoder.py b/fastNLP/modules/prototype/encoder.py new file mode 100644 index 00000000..249eaf8c --- /dev/null +++ b/fastNLP/modules/prototype/encoder.py @@ -0,0 +1,25 @@ +import torch +import torch.nn as nn + +class Lstm(nn.Module): + """ + LSTM module + + Args: + input_size : input size + hidden_size : hidden size + num_layers : number of hidden layers + dropout : dropout rate + bidirectional : If True, becomes a bidirectional RNN + """ + def __init__(self, input_size, hidden_size, num_layers, dropout, bidirectional): + super(Lstm, self).__init__() + self.lstm = nn.LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=True,\ + dropout=dropout, bidirectional=bidirectional) + + def forward(self, x): + x, _ = self.lstm(x) + return x + +if __name__ == "__main__": + model = Lstm(20, 30, 1, 0.5, False) diff --git a/fastNLP/modules/prototype/example.py b/fastNLP/modules/prototype/example.py new file mode 100644 index 00000000..9dffc59a --- /dev/null +++ b/fastNLP/modules/prototype/example.py @@ -0,0 +1,108 @@ +import torch +import torch.nn as nn +import encoder +import aggregation +import embedding +import predict +import torch.optim as optim +import time +import dataloader + +WORD_SIZE = 100 +HIDDEN_SIZE = 300 +D_A = 350 +R = 20 +MLP_HIDDEN = 2000 +CLASSES_NUM = 5 +WORD_NUM = 357361 + +class Net(nn.Module): + """ + A model for sentiment analysis using lstm and self-attention + """ + def __init__(self): + super(Net, self).__init__() + self.embedding = embedding.Lookuptable(WORD_NUM, WORD_SIZE) + self.encoder = encoder.Lstm(WORD_SIZE, HIDDEN_SIZE, 1, 0.5, True) + self.aggregation = aggregation.Selfattention(2 * HIDDEN_SIZE, D_A, R) + self.predict = predict.MLP(R * HIDDEN_SIZE * 2, MLP_HIDDEN, CLASSES_NUM) + + def forward(self, x): + x = self.embedding(x) + x = self.encoder(x) + x, penalty = self.aggregation(x) + x = self.predict(x) + return r, x + +def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ + momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10): + """ + training procedure + + Args: + If model_dict is given (a file address), it will continue training on the given model. + Otherwise, it would train a new model from scratch. + If using_cuda is true, the training would be conducted on GPU. + Learning_rate and momentum is for SGD optimizer. + coef is the coefficent between the cross-entropy loss and the penalization term. + interval is the frequncy of reporting. + + the result will be saved with a form "model_dict_+current time", which could be used for further training + """ + + if using_cuda == True: + net = Net().cuda() + else: + net = Net() + + if model_dict != None: + net.load_state_dict(torch.load(model_dict)) + + optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum) + criterion = nn.CrossEntropyLoss() + dataset = dataloader.DataLoader("trainset.pkl", using_cuda=using_cuda) + + #statistics + loss_count = 0 + prepare_time = 0 + run_time = 0 + count = 0 + + for epoch in range(epochs): + for i, batch in enumerate(dataset): + t1 = time.time() + X = batch["feature"] + y = batch["class"] + + t2 = time.time() + y_pred, y_penl = net(X) + loss = criterion(y_pred, y) + torch.sum(y_penl) / batch_size * coef + optimizer.zero_grad() + loss.backward() + nn.utils.clip_grad_norm(net.parameters(), 0.5) + optimizer.step() + t3 = time.time() + + loss_count += torch.sum(y_penl).data[0] + prepare_time += (t2 - t1) + run_time += (t3 - t2) + p, idx = torch.max(y_pred, dim=1) + idx = idx.data + count += torch.sum(torch.eq(idx.cpu(), y)) + + if i % interval == 0: + print(i) + print("loss count:" + str(loss_count / batch_size)) + print("acuracy:" + str(count / batch_size)) + print("penalty:" + str(torch.sum(y_penl).data[0] / batch_size)) + print("prepare time:" + str(prepare_time / batch_size)) + print("run time:" + str(run_time / batch_size)) + prepare_time = 0 + run_time = 0 + loss_count = 0 + count = 0 + torch.save(net.state_dict(), "model_dict_%s.pkl"%(str(time.time()))) + +if __name__ == "__main__": + train(using_cuda=torch.cuda.is_available()) + diff --git a/fastNLP/modules/prototype/predict.py b/fastNLP/modules/prototype/predict.py new file mode 100644 index 00000000..c8e72629 --- /dev/null +++ b/fastNLP/modules/prototype/predict.py @@ -0,0 +1,25 @@ +import torch +import torch.nn as nn + +class MLP(nn.Module): + """ + A two layers perceptron for classification. + + Output : Unnormalized possibility distribution + Args: + input_size : the size of input + hidden_size : the size of hidden layer + output_size : the size of output + """ + def __init__(self, input_size, hidden_size, output_size): + super(MLP,self).__init__() + self.L1 = nn.Linear(input_size, hidden_size) + self.L2 = nn.Linear(hidden_size, output_size) + self.softmax = nn.Softmax(dim=1) + + def forward(self, x): + out = self.L2(F.relu(self.L1(x))) + return out + +if __name__ == "__main__": + MLP(20, 30, 20) \ No newline at end of file From 561305e03d51eb9209300fb21a32f7b5c0560ff8 Mon Sep 17 00:00:00 2001 From: HENRY L Date: Mon, 2 Jul 2018 02:06:33 +0800 Subject: [PATCH 2/3] update and add readme --- fastNLP/modules/prototype/README.md | 41 +++++++++++++++++++ fastNLP/modules/prototype/Word2Idx.py | 19 ++++----- fastNLP/modules/prototype/aggregation.py | 5 +-- fastNLP/modules/prototype/dataloader.py | 13 +++--- fastNLP/modules/prototype/encoder.py | 3 -- fastNLP/modules/prototype/example.py | 51 +++++++++++++++++------- fastNLP/modules/prototype/predict.py | 2 +- fastNLP/modules/prototype/prepare.py | 50 +++++++++++++++++++++++ 8 files changed, 146 insertions(+), 38 deletions(-) create mode 100644 fastNLP/modules/prototype/README.md create mode 100644 fastNLP/modules/prototype/prepare.py diff --git a/fastNLP/modules/prototype/README.md b/fastNLP/modules/prototype/README.md new file mode 100644 index 00000000..2dff7caa --- /dev/null +++ b/fastNLP/modules/prototype/README.md @@ -0,0 +1,41 @@ +# Prototype + +## Word2Idx.py +A mapping model between words and indexes + +## embedding.py +embedding modules + +Contains a simple encapsulation for torch.nn.Embedding + +## encoder.py +encoder modules + +Contains a simple encapsulation for torch.nn.LSTM + +## aggregation.py +aggregation modules + +Contains a self-attention model, according to paper "A Structured Self-attentive Sentence Embedding", https://arxiv.org/abs/1703.03130 + +## predict.py +predict modules + +Contains a two layers perceptron for classification + +## example.py +An example showing how to use above modules to build a model + +Contains a model for sentiment analysis on Yelp dataset, and its training and testing procedures. See https://arxiv.org/abs/1703.03130 for more details. + +## prepare.py +A case of using Word2Idx to build Yelp datasets + +## dataloader.py +A dataloader for Yelp dataset + +It is an iterable object, returning a zero-padded batch every iteration. + + + + diff --git a/fastNLP/modules/prototype/Word2Idx.py b/fastNLP/modules/prototype/Word2Idx.py index 544126be..2499aeae 100644 --- a/fastNLP/modules/prototype/Word2Idx.py +++ b/fastNLP/modules/prototype/Word2Idx.py @@ -4,15 +4,15 @@ import pickle class Word2Idx(): """ Build a word index according to word frequency. + If "min_freq" is given, then only words with a frequncy not lesser than min_freq will be kept. If "max_num" is given, then at most the most frequent $max_num words will be kept. "words" should be a list [ w_1,w_2,...,w_i,...,w_n ] where each w_i is a string representing a word. - num is the size of the lookup table. w2i is a lookup table assigning each word an index. - Note that index 0 will be returned for any unregistered words. i2w is a vector which serves as an invert mapping of w2i. - Token "" will be returned for index 0 + Note that index 0 is token "" for padding + index 1 is token "" for unregistered words e.g. i2w[w2i["word"]] == "word" """ def __init__(self): @@ -29,29 +29,30 @@ class Word2Idx(): else: most_common = counter.most_common() self.__w2i = dict((w[0],i + 1) for i,w in enumerate(most_common) if w[1] >= min_freq) - self.__w2i[""] = 0 - self.__i2w = [""] + [ w[0] for w in most_common if w[1] >= min_freq ] + self.__w2i[""] = 0 + self.__w2i[""] = 1 + self.__i2w = ["", ""] + [ w[0] for w in most_common if w[1] >= min_freq ] self.num = len(self.__i2w) - def w2i(self,word): + def w2i(self, word): """word to index""" if word in self.__w2i: return self.__w2i[word] return 0 - def i2w(self,idx): + def i2w(self, idx): """index to word""" if idx >= self.num: raise Exception("out of range\n") return self.__i2w[idx] - def save(self,addr): + def save(self, addr): """save the model to a file with address "addr" """ f = open(addr,"wb") pickle.dump([self.__i2w, self.__w2i, self.num], f) f.close() - def load(self,addr): + def load(self, addr): """load a model from a file with address "addr" """ f = open(addr,"rb") paras = pickle.load(f) diff --git a/fastNLP/modules/prototype/aggregation.py b/fastNLP/modules/prototype/aggregation.py index e87862b8..59e50e99 100644 --- a/fastNLP/modules/prototype/aggregation.py +++ b/fastNLP/modules/prototype/aggregation.py @@ -1,5 +1,6 @@ import torch import torch.nn as nn +from torch.autograd import Variable class Selfattention(nn.Module): """ @@ -32,10 +33,8 @@ class Selfattention(nn.Module): def forward(self, x): inter = self.tanh(torch.matmul(self.W_s1, torch.transpose(x, 1, 2))) A = self.softmax(torch.matmul(self.W_s2, inter)) - out = torch.matmul(A, H) + out = torch.matmul(A, x) out = out.view(out.size(0), -1) penalty = self.penalization(A) return out, penalty -if __name__ == "__main__": - model = Selfattention(100, 10, 20) diff --git a/fastNLP/modules/prototype/dataloader.py b/fastNLP/modules/prototype/dataloader.py index a7eafdc2..af5cd8b8 100644 --- a/fastNLP/modules/prototype/dataloader.py +++ b/fastNLP/modules/prototype/dataloader.py @@ -32,10 +32,10 @@ def pad(X, using_cuda): padlen = maxlen - x.size(0) if padlen > 0: if using_cuda: - paddings = torch.zeros(padlen).cuda() + paddings = Variable(torch.zeros(padlen).long()).cuda() else: - paddings = torch.zeros(padlen) - x_ = torch.cat(x, paddings) + paddings = Variable(torch.zeros(padlen).long()) + x_ = torch.cat((x, paddings), 0) Y.append(x_) else: Y.append(x) @@ -71,12 +71,11 @@ class DataLoader(object): random.shuffle(self.data) raise StopIteration() else: - X = self.data[self.count * self.batch_size : (self.count + 1) * self.batch_size] + batch = self.data[self.count * self.batch_size : (self.count + 1) * self.batch_size] self.count += 1 - X = [long_wrapper(x["sent"], using_cuda=self.using_cuda) for x in X] + X = [long_wrapper(x["sent"], using_cuda=self.using_cuda, requires_grad=False) for x in batch] X = pad(X, self.using_cuda) - y = [long_wrapper(x["class"], using_cuda=self.using_cuda) for x in X] - y = torch.stack(y) + y = long_wrapper([x["class"] for x in batch], using_cuda=self.using_cuda, requires_grad=False) return {"feature" : X, "class" : y} diff --git a/fastNLP/modules/prototype/encoder.py b/fastNLP/modules/prototype/encoder.py index 249eaf8c..142496e1 100644 --- a/fastNLP/modules/prototype/encoder.py +++ b/fastNLP/modules/prototype/encoder.py @@ -20,6 +20,3 @@ class Lstm(nn.Module): def forward(self, x): x, _ = self.lstm(x) return x - -if __name__ == "__main__": - model = Lstm(20, 30, 1, 0.5, False) diff --git a/fastNLP/modules/prototype/example.py b/fastNLP/modules/prototype/example.py index 9dffc59a..782937fe 100644 --- a/fastNLP/modules/prototype/example.py +++ b/fastNLP/modules/prototype/example.py @@ -8,13 +8,13 @@ import torch.optim as optim import time import dataloader +WORD_NUM = 357361 WORD_SIZE = 100 HIDDEN_SIZE = 300 D_A = 350 -R = 20 +R = 10 MLP_HIDDEN = 2000 CLASSES_NUM = 5 -WORD_NUM = 357361 class Net(nn.Module): """ @@ -32,7 +32,7 @@ class Net(nn.Module): x = self.encoder(x) x, penalty = self.aggregation(x) x = self.predict(x) - return r, x + return x, penalty def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10): @@ -50,7 +50,7 @@ def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ the result will be saved with a form "model_dict_+current time", which could be used for further training """ - if using_cuda == True: + if using_cuda: net = Net().cuda() else: net = Net() @@ -60,7 +60,7 @@ def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum) criterion = nn.CrossEntropyLoss() - dataset = dataloader.DataLoader("trainset.pkl", using_cuda=using_cuda) + dataset = dataloader.DataLoader("test_set.pkl", batch_size, using_cuda=using_cuda) #statistics loss_count = 0 @@ -69,6 +69,7 @@ def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ count = 0 for epoch in range(epochs): + print("epoch: %d"%(epoch)) for i, batch in enumerate(dataset): t1 = time.time() X = batch["feature"] @@ -86,23 +87,43 @@ def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ loss_count += torch.sum(y_penl).data[0] prepare_time += (t2 - t1) run_time += (t3 - t2) - p, idx = torch.max(y_pred, dim=1) - idx = idx.data - count += torch.sum(torch.eq(idx.cpu(), y)) + p, idx = torch.max(y_pred.data, dim=1) + count += torch.sum(torch.eq(idx.cpu(), y.data.cpu())) - if i % interval == 0: - print(i) - print("loss count:" + str(loss_count / batch_size)) - print("acuracy:" + str(count / batch_size)) + if (i + 1) % interval == 0: + print("epoch : %d, iters: %d"%(epoch, i + 1)) + print("loss count:" + str(loss_count / (interval * batch_size))) + print("acuracy:" + str(count / (interval * batch_size))) print("penalty:" + str(torch.sum(y_penl).data[0] / batch_size)) - print("prepare time:" + str(prepare_time / batch_size)) - print("run time:" + str(run_time / batch_size)) + print("prepare time:" + str(prepare_time)) + print("run time:" + str(run_time)) prepare_time = 0 run_time = 0 loss_count = 0 count = 0 - torch.save(net.state_dict(), "model_dict_%s.pkl"%(str(time.time()))) + string = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()) + torch.save(net.state_dict(), "model_dict_%s.dict"%(string)) + +def test(model_dict, using_cuda=True): + if using_cuda: + net = Net().cuda() + else: + net = Net() + net.load_state_dict(torch.load(model_dict)) + dataset = dataloader.DataLoader("test_set.pkl", batch_size=1, using_cuda=using_cuda) + count = 0 + for i, batch in enumerate(dataset): + X = batch["feature"] + y = batch["class"] + y_pred, _ = net(X) + p, idx = torch.max(y_pred.data, dim=1) + count += torch.sum(torch.eq(idx.cpu(), y.data.cpu())) + print("accuracy: %f"%(count / dataset.num)) + if __name__ == "__main__": train(using_cuda=torch.cuda.is_available()) + + + diff --git a/fastNLP/modules/prototype/predict.py b/fastNLP/modules/prototype/predict.py index c8e72629..d5346c0e 100644 --- a/fastNLP/modules/prototype/predict.py +++ b/fastNLP/modules/prototype/predict.py @@ -1,5 +1,6 @@ import torch import torch.nn as nn +import torch.nn.functional as F class MLP(nn.Module): """ @@ -15,7 +16,6 @@ class MLP(nn.Module): super(MLP,self).__init__() self.L1 = nn.Linear(input_size, hidden_size) self.L2 = nn.Linear(hidden_size, output_size) - self.softmax = nn.Softmax(dim=1) def forward(self, x): out = self.L2(F.relu(self.L1(x))) diff --git a/fastNLP/modules/prototype/prepare.py b/fastNLP/modules/prototype/prepare.py new file mode 100644 index 00000000..02fd19c5 --- /dev/null +++ b/fastNLP/modules/prototype/prepare.py @@ -0,0 +1,50 @@ +import pickle +import Word2Idx + +def get_sets(m, n): + """ + get a train set containing m samples and a test set containing n samples + """ + samples = pickle.load(open("tuples.pkl","rb")) + if m+n > len(samples): + print("asking for too many tuples\n") + return + train_samples = samples[ : m] + test_samples = samples[m: m+n] + return train_samples, test_samples + +def build_wordidx(): + """ + build wordidx using word2idx + """ + train, test = get_sets(500000, 2000) + words = [] + for x in train: + words += x[0] + wordidx = Word2Idx.Word2Idx() + wordidx.build(words) + print(wordidx.num) + print(wordidx.i2w(0)) + wordidx.save("wordidx.pkl") + +def build_sets(): + """ + build train set and test set, transform word to index + """ + train, test = get_sets(500000, 2000) + wordidx = Word2Idx.Word2Idx() + wordidx.load("wordidx.pkl") + train_set = [] + for x in train: + sent = [wordidx.w2i(w) for w in x[0]] + train_set.append({"sent" : sent, "class" : x[1]}) + test_set = [] + for x in test: + sent = [wordidx.w2i(w) for w in x[0]] + test_set.append({"sent" : sent, "class" : x[1]}) + pickle.dump(train_set, open("train_set.pkl", "wb")) + pickle.dump(test_set, open("test_set.pkl", "wb")) + +if __name__ == "__main__": + build_wordidx() + build_sets() From f585a9aa7df9b73e757dd51526a45bf3380b2ead Mon Sep 17 00:00:00 2001 From: HENRY L Date: Mon, 2 Jul 2018 02:49:55 +0800 Subject: [PATCH 3/3] update --- fastNLP/modules/prototype/example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastNLP/modules/prototype/example.py b/fastNLP/modules/prototype/example.py index 782937fe..a19898c6 100644 --- a/fastNLP/modules/prototype/example.py +++ b/fastNLP/modules/prototype/example.py @@ -60,7 +60,7 @@ def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum) criterion = nn.CrossEntropyLoss() - dataset = dataloader.DataLoader("test_set.pkl", batch_size, using_cuda=using_cuda) + dataset = dataloader.DataLoader("train_set.pkl", batch_size, using_cuda=using_cuda) #statistics loss_count = 0