|
|
@@ -71,20 +71,19 @@ class MRDataset(Dataset): |
|
|
|
|
|
|
|
def word_embeddings(self, path="./GoogleNews-vectors-negative300.bin/GoogleNews-vectors-negative300.bin"): |
|
|
|
# establish from google |
|
|
|
model = gensim.models.KeyedVectors.load_word2vec_format(path, binary=True) |
|
|
|
model = gensim.models.KeyedVectors.load_word2vec_format(path, binary=True) |
|
|
|
|
|
|
|
print('Please wait ... (it could take a while to load the file : {})'.format(path)) |
|
|
|
word_dict = self.word2id_dict |
|
|
|
embedding_weights = np.random.uniform(-0.25, 0.25, (len(self.word2id_dict), 300)) |
|
|
|
print('Please wait ... (it could take a while to load the file : {})'.format(path)) |
|
|
|
word_dict = self.word2id_dict |
|
|
|
embedding_weights = np.random.uniform(-0.25, 0.25, (len(self.word2id_dict), 300)) |
|
|
|
|
|
|
|
for word in word_dict: |
|
|
|
word_id = word_dict[word] |
|
|
|
if word in model.wv.vocab: |
|
|
|
embedding_weights[word_id, :] = model[word] |
|
|
|
return embedding_weights |
|
|
|
for word in word_dict: |
|
|
|
word_id = word_dict[word] |
|
|
|
if word in model.wv.vocab: |
|
|
|
embedding_weights[word_id, :] = model[word] |
|
|
|
return embedding_weights |
|
|
|
|
|
|
|
def __len__(self): |
|
|
|
|
|
|
|
return len(self.MRDataset_frame) |
|
|
|
|
|
|
|
def __getitem__(self,idx): |
|
|
|