From acf18e2e89c49567959b0da271bd89baf0ba440b Mon Sep 17 00:00:00 2001 From: yh_cc Date: Thu, 13 Jun 2019 21:25:47 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9DataSet=20split=E7=9A=84?= =?UTF-8?q?=E4=B8=80=E4=B8=AA=E6=B3=A8=E9=87=8A=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fastNLP/core/dataset.py | 2 +- fastNLP/modules/encoder/embedding.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fastNLP/core/dataset.py b/fastNLP/core/dataset.py index b011d15a..4cd1ad9c 100644 --- a/fastNLP/core/dataset.py +++ b/fastNLP/core/dataset.py @@ -805,7 +805,7 @@ class DataSet(object): """ 将DataSet按照ratio的比例拆分,返回两个DataSet - :param float ratio: 0int: + return len(self) + + def __len__(self): + return len(self.embed) + @property def embed_size(self) -> int: return self._embed_size @@ -109,9 +116,8 @@ class TokenEmbedding(nn.Module): for param in self.parameters(): param.requires_grad = value - @abstractmethod - def get_original_vocab(self): - pass + def __len__(self): + return len(self._word_vocab) @property def embed_size(self) -> int: @@ -505,7 +511,7 @@ class CNNCharEmbedding(TokenEmbedding): :param embed_size: 该word embedding的大小,默认值为50. :param char_emb_size: character的embed的大小。character是从vocab中生成的。默认值为50. :param filter_nums: filter的数量. 长度需要和kernels一致。默认值为[40, 30, 20]. - :param kernels: kernel的大小. 默认值为[5, 3, 1]. + :param kernel_sizes: kernel的大小. 默认值为[5, 3, 1]. :param pool_method: character的表示在合成一个表示时所使用的pool方法,支持'avg', 'max'. :param activation: CNN之后使用的激活方法,支持'relu', 'sigmoid', 'tanh' 或者自定义函数. :param min_char_freq: character的最少出现次数。默认值为2.