@@ -147,7 +147,7 @@ if __name__ == '__main__': | |||||
import gensim | import gensim | ||||
from gensim import models | from gensim import models | ||||
# train_word_vec() | |||||
train_word_vec() | |||||
embed_model = Word2Vec.load('yelp.word2vec') | embed_model = Word2Vec.load('yelp.word2vec') | ||||
embedding = Embedding_layer(embed_model.wv, embed_model.wv.vector_size) | embedding = Embedding_layer(embed_model.wv, embed_model.wv.vector_size) | ||||
@@ -158,6 +158,10 @@ if __name__ == '__main__': | |||||
net = HAN(input_size=200, output_size=5, | net = HAN(input_size=200, output_size=5, | ||||
word_hidden_size=50, word_num_layers=1, word_context_size=100, | word_hidden_size=50, word_num_layers=1, word_context_size=100, | ||||
sent_hidden_size=50, sent_num_layers=1, sent_context_size=100) | sent_hidden_size=50, sent_num_layers=1, sent_context_size=100) | ||||
net.load_state_dict(torch.load('model.dict')) | |||||
try: | |||||
net.load_state_dict(torch.load('model.dict')) | |||||
print("last time trained model has loaded") | |||||
except Exception: | |||||
print("cannot load model, train the inital model") | |||||
train(net, dataset, num_epoch=5, batch_size=64, use_cuda=True) | train(net, dataset, num_epoch=5, batch_size=64, use_cuda=True) |
@@ -1 +0,0 @@ | |||||
*.pyc |
@@ -1,36 +0,0 @@ | |||||
## Introduction | |||||
This is the implementation of [Hierarchical Attention Networks for Document Classification](https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf) paper in PyTorch. | |||||
* Dataset is 600k documents extracted from [Yelp 2018](https://www.yelp.com/dataset) customer reviews | |||||
* Use [NLTK](http://www.nltk.org/) and [Stanford CoreNLP](https://stanfordnlp.github.io/CoreNLP/) to tokenize documents and sentences | |||||
* Both CPU & GPU support | |||||
* The best accuracy is 71%, reaching the same performance in the paper | |||||
## Requirement | |||||
* python 3.6 | |||||
* pytorch = 0.3.0 | |||||
* numpy | |||||
* gensim | |||||
* nltk | |||||
* coreNLP | |||||
## Parameters | |||||
According to the paper and experiment, I set model parameters: | |||||
|word embedding dimension|GRU hidden size|GRU layer|word/sentence context vector dimension| | |||||
|---|---|---|---| | |||||
|200|50|1|100| | |||||
And the training parameters: | |||||
|Epoch|learning rate|momentum|batch size| | |||||
|---|---|---|---| | |||||
|3|0.01|0.9|64| | |||||
## Run | |||||
1. Prepare dataset. Download the [data set](https://www.yelp.com/dataset), and unzip the custom reviews as a file. Use preprocess.py to transform file into data set foe model input. | |||||
2. Train the model. The model will trained and autosaved in 'model.dict' | |||||
``` | |||||
python train | |||||
``` | |||||
3. Test the model. | |||||
``` | |||||
python evaluate | |||||
``` |