From 6c09f53c6bff8a57720b39cd806b38380a75c2fa Mon Sep 17 00:00:00 2001 From: yunfan Date: Sat, 20 Oct 2018 11:17:08 +0800 Subject: [PATCH] update --- fastNLP/core/dataset.py | 14 ++++++++++++++ fastNLP/loader/dataset_loader.py | 7 +++++++ 2 files changed, 21 insertions(+) diff --git a/fastNLP/core/dataset.py b/fastNLP/core/dataset.py index a10a24d2..f1f3f2a8 100644 --- a/fastNLP/core/dataset.py +++ b/fastNLP/core/dataset.py @@ -99,12 +99,26 @@ class DataSet(list): return self def update_vocab(self, **name_vocab): + """using certain field data to update vocabulary. + + e.g. :: + + # update word vocab and label vocab seperately + dataset.update_vocab(word_seq=word_vocab, label_seq=label_vocab) + """ for field_name, vocab in name_vocab.items(): for ins in self: vocab.update(ins[field_name].contents()) return self def set_origin_len(self, origin_field, origin_len_name=None): + """make dataset tensor output contain origin_len field. + + e.g. :: + + # output "word_seq_origin_len", lengths based on "word_seq" field + dataset.set_origin_len("word_seq") + """ if origin_field is None: self.origin_len = None else: diff --git a/fastNLP/loader/dataset_loader.py b/fastNLP/loader/dataset_loader.py index c9e76622..5feb62a6 100644 --- a/fastNLP/loader/dataset_loader.py +++ b/fastNLP/loader/dataset_loader.py @@ -75,6 +75,13 @@ class DataSetLoader(BaseLoader): super(DataSetLoader, self).__init__() def load(self, path): + """ load data in `path` into a dataset + """ + raise NotImplementedError + + def convert(self, data): + """convert list of data into dataset + """ raise NotImplementedError class RawDataSetLoader(DataSetLoader):