Browse Source

update

tags/v0.2.0
yunfan 6 years ago
parent
commit
6c09f53c6b
2 changed files with 21 additions and 0 deletions
  1. +14
    -0
      fastNLP/core/dataset.py
  2. +7
    -0
      fastNLP/loader/dataset_loader.py

+ 14
- 0
fastNLP/core/dataset.py View File

@@ -99,12 +99,26 @@ class DataSet(list):
return self

def update_vocab(self, **name_vocab):
"""using certain field data to update vocabulary.

e.g. ::

# update word vocab and label vocab seperately
dataset.update_vocab(word_seq=word_vocab, label_seq=label_vocab)
"""
for field_name, vocab in name_vocab.items():
for ins in self:
vocab.update(ins[field_name].contents())
return self

def set_origin_len(self, origin_field, origin_len_name=None):
"""make dataset tensor output contain origin_len field.

e.g. ::

# output "word_seq_origin_len", lengths based on "word_seq" field
dataset.set_origin_len("word_seq")
"""
if origin_field is None:
self.origin_len = None
else:


+ 7
- 0
fastNLP/loader/dataset_loader.py View File

@@ -75,6 +75,13 @@ class DataSetLoader(BaseLoader):
super(DataSetLoader, self).__init__()

def load(self, path):
""" load data in `path` into a dataset
"""
raise NotImplementedError

def convert(self, data):
"""convert list of data into dataset
"""
raise NotImplementedError

class RawDataSetLoader(DataSetLoader):


Loading…
Cancel
Save