From e5e4381eb2937c878993c1ecaaba24defc3484d7 Mon Sep 17 00:00:00 2001 From: xuyige Date: Thu, 13 Jun 2019 20:05:59 +0800 Subject: [PATCH 1/2] add fasttext embedding --- fastNLP/modules/encoder/embedding.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fastNLP/modules/encoder/embedding.py b/fastNLP/modules/encoder/embedding.py index e54c1980..d8d6f533 100644 --- a/fastNLP/modules/encoder/embedding.py +++ b/fastNLP/modules/encoder/embedding.py @@ -157,7 +157,9 @@ class StaticEmbedding(TokenEmbedding): 'en-glove-840b-300': 'glove.840B.300d-cc1ad5e1.tar.gz', 'en-glove-6b-50': "glove.6B.50d-a6028c70.tar.gz", 'en-word2vec-300': "GoogleNews-vectors-negative300-be166d9d.tar.gz", - 'cn': "tencent_cn-dab24577.tar.gz" + 'en-fasttext': "cc.en.300.vec-d53187b2.gz", + 'cn': "tencent_cn-dab24577.tar.gz", + 'cn-fasttext': "cc.zh.300.vec-d68a9bcf.gz", } # 得到cache_path From 1a4c3c2d2070213b48a35fe1a3161d2be4897683 Mon Sep 17 00:00:00 2001 From: xuyige Date: Thu, 13 Jun 2019 20:33:15 +0800 Subject: [PATCH 2/2] fix some bugs in test --- fastNLP/models/bert.py | 2 +- test/core/test_vocabulary.py | 5 +++-- test/models/test_cnn_text_classification.py | 3 +-- test/test_tutorials.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fastNLP/models/bert.py b/fastNLP/models/bert.py index 02227c0d..4846c7fa 100644 --- a/fastNLP/models/bert.py +++ b/fastNLP/models/bert.py @@ -30,7 +30,7 @@ class BertConfig: self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads - self.intermediate = intermediate_size + self.intermediate_size = intermediate_size self.hidden_act = hidden_act self.hidden_dropout_prob = hidden_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob diff --git a/test/core/test_vocabulary.py b/test/core/test_vocabulary.py index df0ebb1a..c161ad9d 100644 --- a/test/core/test_vocabulary.py +++ b/test/core/test_vocabulary.py @@ -100,13 +100,14 @@ class TestIndexing(unittest.TestCase): self.assertEqual(text, [vocab.to_word(idx) for idx in [vocab[w] for w in text]]) def test_iteration(self): - vocab = Vocabulary() + vocab = Vocabulary(padding=None, unknown=None) text = ["FastNLP", "works", "well", "in", "most", "cases", "and", "scales", "well", "in", "works", "well", "in", "most", "cases", "scales", "well"] vocab.update(text) text = set(text) - for word in vocab: + for word, idx in vocab: self.assertTrue(word in text) + self.assertTrue(idx < len(vocab)) class TestOther(unittest.TestCase): diff --git a/test/models/test_cnn_text_classification.py b/test/models/test_cnn_text_classification.py index b83b7bad..2ea48220 100644 --- a/test/models/test_cnn_text_classification.py +++ b/test/models/test_cnn_text_classification.py @@ -12,7 +12,6 @@ class TestCNNText(unittest.TestCase): model = CNNText(init_emb, NUM_CLS, kernel_nums=(1, 3, 5), - kernel_sizes=(2, 2, 2), - padding=0, + kernel_sizes=(1, 3, 5), dropout=0.5) RUNNER.run_model_with_task(TEXT_CLS, model) diff --git a/test/test_tutorials.py b/test/test_tutorials.py index 128e4235..a38d5ae1 100644 --- a/test/test_tutorials.py +++ b/test/test_tutorials.py @@ -70,7 +70,7 @@ class TestTutorial(unittest.TestCase): break from fastNLP.models import CNNText - model = CNNText((len(vocab), 50), num_classes=5, padding=2, dropout=0.1) + model = CNNText((len(vocab), 50), num_classes=5, dropout=0.1) from fastNLP import Trainer from copy import deepcopy @@ -143,7 +143,7 @@ class TestTutorial(unittest.TestCase): is_input=True) from fastNLP.models import CNNText - model = CNNText((len(vocab), 50), num_classes=5, padding=2, dropout=0.1) + model = CNNText((len(vocab), 50), num_classes=5, dropout=0.1) from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric, Adam