From d5be1d2cabbde72148f387dda95c5a7629df449f Mon Sep 17 00:00:00 2001
From: JingyuanLiu <fdjingyuan@outlook.com>
Date: Mon, 19 Mar 2018 12:04:48 +0800
Subject: [PATCH] update

---
 CNN-sentence_classification/dataset.py |  6 +++---
 CNN-sentence_classification/model.py   | 20 +-------------------
 CNN-sentence_classification/train.py   | 18 ++++++++++--------
 3 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/CNN-sentence_classification/dataset.py b/CNN-sentence_classification/dataset.py
index b4ce1115..5a8fe2b7 100644
--- a/CNN-sentence_classification/dataset.py
+++ b/CNN-sentence_classification/dataset.py
@@ -83,9 +83,9 @@ class MRDataset(Dataset):
 	    embedding_weights = np.random.uniform(-0.25, 0.25, (len(self.word2id_dict), 300))
 
 	    for word in word_dict:
-		word_id = word_dict[word]
-		if word in model.wv.vocab:
-		    embedding_weights[word_id, :] = model[word]
+            word_id = word_dict[word]
+            if word in model.wv.vocab:
+                embedding_weights[word_id, :] = model[word]
 
 	    return embedding_weights
 
diff --git a/CNN-sentence_classification/model.py b/CNN-sentence_classification/model.py
index cfb5154b..be0098f4 100644
--- a/CNN-sentence_classification/model.py
+++ b/CNN-sentence_classification/model.py
@@ -5,28 +5,10 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.autograd import Variable
-from torch.utils.data import DataLoader, TensorDataset
 
 import dataset
 
-"""
-#some information 
-mode = "static"
-use_pretrained_embedding = "gensim.word2vec"
-print('MODE      = {}'.format(mode))
-print('EMBEDDING = {}\n'.format(use_pretrained_embeddings)
-
-embedding_weights = dataset.word_embedding_300()
-embed_num = len(embedding_weights)
-embed_dim = 300
-class_num = 2
-len_sentence = 64
-
-print('embedding size   = {}'.format(embed_num))
-print('embedding dimension = {}'.format(embed_dim))
-print('sentence len n   = {}'.format(len_sentence))
-print('num of classes   = {}'.format(class_num))
-"""
+
 
 class CNN_text(nn.Module):
     def __init__(self, kernel_h=[3,4,5], kernel_num=100, embed_num=1000, embed_dim=300, dropout=0.5, L2_constrain=3, batchsize=50, pretrained_embeddings=None):
diff --git a/CNN-sentence_classification/train.py b/CNN-sentence_classification/train.py
index e2e0b8e6..8b3801d2 100644
--- a/CNN-sentence_classification/train.py
+++ b/CNN-sentence_classification/train.py
@@ -40,7 +40,7 @@ test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=batch_size, 
                                           shuffle=False)
  
-
+#cnn 
 
 cnn = CNN_text(embed_num=len(dataset.word2id()), pretrained_embeddings=dataset.word_embeddings())
 if cuda:
@@ -51,6 +51,8 @@ if cuda:
 criterion = nn.CrossEntropyLoss()
 optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
 
+
+#train and test
 best_acc = None
 
 for epoch in range(num_epochs):
@@ -59,9 +61,9 @@ for epoch in range(num_epochs):
     for i, (sents,labels) in enumerate(train_loader):
         sents = Variable(sents)
         labels = Variable(labels)        
-	if cuda:
-	    sents = sents.cuda()
-	    labels = labels.cuda()
+	   if cuda:
+	       sents = sents.cuda()
+	       labels = labels.cuda()
         optimizer.zero_grad()
         outputs = cnn(sents)
         loss = criterion(outputs, labels)
@@ -78,8 +80,8 @@ for epoch in range(num_epochs):
     total = 0
     for sents, labels in test_loader:
         sents = Variable(sents)
-	if cuda:
-	    sents = sents.cuda()
+        if cuda:
+            sents = sents.cuda()
             labels = labels.cuda()
         outputs = cnn(sents)
         _, predicted = torch.max(outputs.data, 1)
@@ -90,8 +92,8 @@ for epoch in range(num_epochs):
     
     if best_acc is None or acc > best_acc:
         best_acc = acc
-	if os.path.exists("models") is False:
-	    os.makedirs("models")
+        if os.path.exists("models") is False:
+            os.makedirs("models")
         torch.save(cnn.state_dict(), 'models/cnn.pkl')
     else:
         learning_rate = learning_rate * 0.8