Browse Source

initial commit

tags/v0.1.0
HENRY L 7 years ago
parent
commit
b93cf08691
7 changed files with 366 additions and 0 deletions
  1. +62
    -0
      fastNLP/modules/prototype/Word2Idx.py
  2. +41
    -0
      fastNLP/modules/prototype/aggregation.py
  3. +82
    -0
      fastNLP/modules/prototype/dataloader.py
  4. +23
    -0
      fastNLP/modules/prototype/embedding.py
  5. +25
    -0
      fastNLP/modules/prototype/encoder.py
  6. +108
    -0
      fastNLP/modules/prototype/example.py
  7. +25
    -0
      fastNLP/modules/prototype/predict.py

+ 62
- 0
fastNLP/modules/prototype/Word2Idx.py View File

@@ -0,0 +1,62 @@
import collections
import pickle

class Word2Idx():
"""
Build a word index according to word frequency.
If "min_freq" is given, then only words with a frequncy not lesser than min_freq will be kept.
If "max_num" is given, then at most the most frequent $max_num words will be kept.
"words" should be a list [ w_1,w_2,...,w_i,...,w_n ] where each w_i is a string representing a word.
num is the size of the lookup table.
w2i is a lookup table assigning each word an index.
Note that index 0 will be returned for any unregistered words.
i2w is a vector which serves as an invert mapping of w2i.
Token "<UNK>" will be returned for index 0
e.g. i2w[w2i["word"]] == "word"
"""
def __init__(self):
self.__w2i = dict()
self.__i2w = []
self.num = 0

def build(self, words, min_freq=0, max_num=None):
"""build a model from words"""
counter = collections.Counter(words)
word_set = set(words)
if max_num is not None:
most_common = counter.most_common(min(len(word_set), max_num - 1))
else:
most_common = counter.most_common()
self.__w2i = dict((w[0],i + 1) for i,w in enumerate(most_common) if w[1] >= min_freq)
self.__w2i["<UNK>"] = 0
self.__i2w = ["<UNK>"] + [ w[0] for w in most_common if w[1] >= min_freq ]
self.num = len(self.__i2w)

def w2i(self,word):
"""word to index"""
if word in self.__w2i:
return self.__w2i[word]
return 0

def i2w(self,idx):
"""index to word"""
if idx >= self.num:
raise Exception("out of range\n")
return self.__i2w[idx]

def save(self,addr):
"""save the model to a file with address "addr" """
f = open(addr,"wb")
pickle.dump([self.__i2w, self.__w2i, self.num], f)
f.close()

def load(self,addr):
"""load a model from a file with address "addr" """
f = open(addr,"rb")
paras = pickle.load(f)
self.__i2w, self.__w2i, self.num = paras[0], paras[1], paras[2]
f.close()



+ 41
- 0
fastNLP/modules/prototype/aggregation.py View File

@@ -0,0 +1,41 @@
import torch
import torch.nn as nn

class Selfattention(nn.Module):
"""
Self Attention Module.

Args:
input_size : the size for the input vector
d_a : the width of weight matrix
r : the number of encoded vectors
"""
def __init__(self, input_size, d_a, r):
super(Selfattention, self).__init__()
self.W_s1 = nn.Parameter(torch.randn(d_a, input_size), requires_grad=True)
self.W_s2 = nn.Parameter(torch.randn(r, d_a), requires_grad=True)
self.softmax = nn.Softmax(dim=2)
self.tanh = nn.Tanh()

def penalization(self, A):
"""
compute the penalization term for attention module
"""
if self.W_s1.is_cuda:
I = Variable(torch.eye(A.size(1)).cuda(), requires_grad=False)
else:
I = Variable(torch.eye(A.size(1)), requires_grad=False)
M = torch.matmul(A, torch.transpose(A, 1, 2)) - I
M = M.view(M.size(0), -1)
return torch.sum(M ** 2, dim=1)
def forward(self, x):
inter = self.tanh(torch.matmul(self.W_s1, torch.transpose(x, 1, 2)))
A = self.softmax(torch.matmul(self.W_s2, inter))
out = torch.matmul(A, H)
out = out.view(out.size(0), -1)
penalty = self.penalization(A)
return out, penalty

if __name__ == "__main__":
model = Selfattention(100, 10, 20)

+ 82
- 0
fastNLP/modules/prototype/dataloader.py View File

@@ -0,0 +1,82 @@
import random
import pickle
import torch
import numpy as np
from torch.autograd import Variable

def float_wrapper(x, requires_grad=True, using_cuda=True):
"""
transform float type list to pytorch variable
"""
if using_cuda==True:
return Variable(torch.FloatTensor(x).cuda(), requires_grad=requires_grad)
else:
return Variable(torch.FloatTensor(x), requires_grad=requires_grad)

def long_wrapper(x, requires_grad=True, using_cuda=True):
"""
transform long type list to pytorch variable
"""
if using_cuda==True:
return Variable(torch.LongTensor(x).cuda(), requires_grad=requires_grad)
else:
return Variable(torch.LongTensor(x), requires_grad=requires_grad)
def pad(X, using_cuda):
"""
zero-pad sequnces to same length then pack them together
"""
maxlen = max([x.size(0) for x in X])
Y = []
for x in X:
padlen = maxlen - x.size(0)
if padlen > 0:
if using_cuda:
paddings = torch.zeros(padlen).cuda()
else:
paddings = torch.zeros(padlen)
x_ = torch.cat(x, paddings)
Y.append(x_)
else:
Y.append(x)
return torch.stack(Y)

class DataLoader(object):
"""
load data with form {"feature", "class"}

Args:
fdir : data file address
batch_size : batch_size
shuffle : if True, shuffle dataset every epoch
using_cuda : if True, return tensors on GPU
"""
def __init__(self, fdir, batch_size, shuffle=True, using_cuda=True):
with open(fdir, "rb") as f:
self.data = pickle.load(f)
self.batch_size = batch_size
self.num = len(self.data)
self.count = 0
self.iters = int(self.num / batch_size)
self.shuffle = shuffle
self.using_cuda = using_cuda
def __iter__(self):
return self

def __next__(self):
if self.count == self.iters:
self.count = 0
if self.shuffle:
random.shuffle(self.data)
raise StopIteration()
else:
X = self.data[self.count * self.batch_size : (self.count + 1) * self.batch_size]
self.count += 1
X = [long_wrapper(x["sent"], using_cuda=self.using_cuda) for x in X]
X = pad(X, self.using_cuda)
y = [long_wrapper(x["class"], using_cuda=self.using_cuda) for x in X]
y = torch.stack(y)
return {"feature" : X, "class" : y}


+ 23
- 0
fastNLP/modules/prototype/embedding.py View File

@@ -0,0 +1,23 @@
import torch
import torch.nn as nn

class Lookuptable(nn.Module):
"""
A simple lookup table

Args:
nums : the size of the lookup table
dims : the size of each vector
padding_idx : pads the tensor with zeros whenever it encounters this index
sparse : If True, gradient matrix will be a sparse tensor. In this case,
only optim.SGD(cuda and cpu) and optim.Adagrad(cpu) can be used
"""
def __init__(self, nums, dims, padding_idx=0, sparse=False):
super(Lookuptable, self).__init__()
self.embed = nn.Embedding(nums, dims, padding_idx, sparse=sparse)
def forward(self, x):
return self.embed(x)

if __name__ == "__main__":
model = Lookuptable(10, 20)

+ 25
- 0
fastNLP/modules/prototype/encoder.py View File

@@ -0,0 +1,25 @@
import torch
import torch.nn as nn

class Lstm(nn.Module):
"""
LSTM module

Args:
input_size : input size
hidden_size : hidden size
num_layers : number of hidden layers
dropout : dropout rate
bidirectional : If True, becomes a bidirectional RNN
"""
def __init__(self, input_size, hidden_size, num_layers, dropout, bidirectional):
super(Lstm, self).__init__()
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=True,\
dropout=dropout, bidirectional=bidirectional)
def forward(self, x):
x, _ = self.lstm(x)
return x

if __name__ == "__main__":
model = Lstm(20, 30, 1, 0.5, False)

+ 108
- 0
fastNLP/modules/prototype/example.py View File

@@ -0,0 +1,108 @@
import torch
import torch.nn as nn
import encoder
import aggregation
import embedding
import predict
import torch.optim as optim
import time
import dataloader

WORD_SIZE = 100
HIDDEN_SIZE = 300
D_A = 350
R = 20
MLP_HIDDEN = 2000
CLASSES_NUM = 5
WORD_NUM = 357361

class Net(nn.Module):
"""
A model for sentiment analysis using lstm and self-attention
"""
def __init__(self):
super(Net, self).__init__()
self.embedding = embedding.Lookuptable(WORD_NUM, WORD_SIZE)
self.encoder = encoder.Lstm(WORD_SIZE, HIDDEN_SIZE, 1, 0.5, True)
self.aggregation = aggregation.Selfattention(2 * HIDDEN_SIZE, D_A, R)
self.predict = predict.MLP(R * HIDDEN_SIZE * 2, MLP_HIDDEN, CLASSES_NUM)

def forward(self, x):
x = self.embedding(x)
x = self.encoder(x)
x, penalty = self.aggregation(x)
x = self.predict(x)
return r, x

def train(model_dict=None, using_cuda=True, learning_rate=0.06,\
momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10):
"""
training procedure

Args:
If model_dict is given (a file address), it will continue training on the given model.
Otherwise, it would train a new model from scratch.
If using_cuda is true, the training would be conducted on GPU.
Learning_rate and momentum is for SGD optimizer.
coef is the coefficent between the cross-entropy loss and the penalization term.
interval is the frequncy of reporting.

the result will be saved with a form "model_dict_+current time", which could be used for further training
"""
if using_cuda == True:
net = Net().cuda()
else:
net = Net()
if model_dict != None:
net.load_state_dict(torch.load(model_dict))

optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)
criterion = nn.CrossEntropyLoss()
dataset = dataloader.DataLoader("trainset.pkl", using_cuda=using_cuda)

#statistics
loss_count = 0
prepare_time = 0
run_time = 0
count = 0

for epoch in range(epochs):
for i, batch in enumerate(dataset):
t1 = time.time()
X = batch["feature"]
y = batch["class"]
t2 = time.time()
y_pred, y_penl = net(X)
loss = criterion(y_pred, y) + torch.sum(y_penl) / batch_size * coef
optimizer.zero_grad()
loss.backward()
nn.utils.clip_grad_norm(net.parameters(), 0.5)
optimizer.step()
t3 = time.time()

loss_count += torch.sum(y_penl).data[0]
prepare_time += (t2 - t1)
run_time += (t3 - t2)
p, idx = torch.max(y_pred, dim=1)
idx = idx.data
count += torch.sum(torch.eq(idx.cpu(), y))

if i % interval == 0:
print(i)
print("loss count:" + str(loss_count / batch_size))
print("acuracy:" + str(count / batch_size))
print("penalty:" + str(torch.sum(y_penl).data[0] / batch_size))
print("prepare time:" + str(prepare_time / batch_size))
print("run time:" + str(run_time / batch_size))
prepare_time = 0
run_time = 0
loss_count = 0
count = 0
torch.save(net.state_dict(), "model_dict_%s.pkl"%(str(time.time())))

if __name__ == "__main__":
train(using_cuda=torch.cuda.is_available())


+ 25
- 0
fastNLP/modules/prototype/predict.py View File

@@ -0,0 +1,25 @@
import torch
import torch.nn as nn

class MLP(nn.Module):
"""
A two layers perceptron for classification.

Output : Unnormalized possibility distribution
Args:
input_size : the size of input
hidden_size : the size of hidden layer
output_size : the size of output
"""
def __init__(self, input_size, hidden_size, output_size):
super(MLP,self).__init__()
self.L1 = nn.Linear(input_size, hidden_size)
self.L2 = nn.Linear(hidden_size, output_size)
self.softmax = nn.Softmax(dim=1)

def forward(self, x):
out = self.L2(F.relu(self.L1(x)))
return out

if __name__ == "__main__":
MLP(20, 30, 20)

Loading…
Cancel
Save