Browse Source

fix some doc errors

tags/v0.4.10
ChenXin 5 years ago
parent
commit
13d8978953
4 changed files with 39 additions and 30 deletions
  1. +4
    -4
      fastNLP/io/config_io.py
  2. +27
    -20
      fastNLP/io/dataset_loader.py
  3. +1
    -1
      fastNLP/io/embed_loader.py
  4. +7
    -5
      fastNLP/io/model_io.py

+ 4
- 4
fastNLP/io/config_io.py View File

@@ -26,10 +26,10 @@ class ConfigLoader(BaseLoader):


:param str file_path: the path of config file :param str file_path: the path of config file
:param dict sections: the dict of ``{section_name(string): ConfigSection object}`` :param dict sections: the dict of ``{section_name(string): ConfigSection object}``
Example::
test_args = ConfigSection()
ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS_test": test_args})
Example::
test_args = ConfigSection()
ConfigLoader("config.cfg").load_config("./data_for_tests/config", {"POS_test": test_args})


""" """
assert isinstance(sections, dict) assert isinstance(sections, dict)


+ 27
- 20
fastNLP/io/dataset_loader.py View File

@@ -9,7 +9,7 @@ from fastNLP.io.base_loader import DataLoaderRegister
def convert_seq_dataset(data): def convert_seq_dataset(data):
"""Create an DataSet instance that contains no labels. """Create an DataSet instance that contains no labels.


:param data: list of list of strings, [num_examples, *].
:param data: list of list of strings, [num_examples, \*].
Example:: Example::


[ [
@@ -28,7 +28,7 @@ def convert_seq_dataset(data):
def convert_seq2tag_dataset(data): def convert_seq2tag_dataset(data):
"""Convert list of data into DataSet. """Convert list of data into DataSet.


:param data: list of list of strings, [num_examples, *].
:param data: list of list of strings, [num_examples, \*].
Example:: Example::


[ [
@@ -48,7 +48,7 @@ def convert_seq2tag_dataset(data):
def convert_seq2seq_dataset(data): def convert_seq2seq_dataset(data):
"""Convert list of data into DataSet. """Convert list of data into DataSet.


:param data: list of list of strings, [num_examples, *].
:param data: list of list of strings, [num_examples, \*].
Example:: Example::


[ [
@@ -177,18 +177,18 @@ DataLoaderRegister.set_reader(RawDataSetLoader, 'read_rawdata')
class DummyPOSReader(DataSetLoader): class DummyPOSReader(DataSetLoader):
"""A simple reader for a dummy POS tagging dataset. """A simple reader for a dummy POS tagging dataset.


In these datasets, each line are divided by "\t". The first Col is the vocabulary and the second
In these datasets, each line are divided by "\\\\t". The first Col is the vocabulary and the second
Col is the label. Different sentence are divided by an empty line. Col is the label. Different sentence are divided by an empty line.
E.g::
E.g::


Tom label1
and label2
Jerry label1
. label3
(separated by an empty line)
Hello label4
world label5
! label3
Tom label1
and label2
Jerry label1
. label3
(separated by an empty line)
Hello label4
world label5
! label3


In this example, there are two sentences "Tom and Jerry ." and "Hello world !". Each word has its own label. In this example, there are two sentences "Tom and Jerry ." and "Hello world !". Each word has its own label.
""" """
@@ -200,11 +200,13 @@ class DummyPOSReader(DataSetLoader):
""" """
:return data: three-level list :return data: three-level list
Example:: Example::
[ [
[ [word_11, word_12, ...], [label_1, label_1, ...] ], [ [word_11, word_12, ...], [label_1, label_1, ...] ],
[ [word_21, word_22, ...], [label_2, label_1, ...] ], [ [word_21, word_22, ...], [label_2, label_1, ...] ],
... ...
] ]
""" """
with open(data_path, "r", encoding="utf-8") as f: with open(data_path, "r", encoding="utf-8") as f:
lines = f.readlines() lines = f.readlines()
@@ -550,6 +552,7 @@ class SNLIDataSetReader(DataSetLoader):


:param data: A 3D tensor. :param data: A 3D tensor.
Example:: Example::
[ [
[ [premise_word_11, premise_word_12, ...], [hypothesis_word_11, hypothesis_word_12, ...], [label_1] ], [ [premise_word_11, premise_word_12, ...], [hypothesis_word_11, hypothesis_word_12, ...], [label_1] ],
[ [premise_word_21, premise_word_22, ...], [hypothesis_word_21, hypothesis_word_22, ...], [label_2] ], [ [premise_word_21, premise_word_22, ...], [hypothesis_word_21, hypothesis_word_22, ...], [label_2] ],
@@ -647,7 +650,7 @@ class NaiveCWSReader(DataSetLoader):
例如:: 例如::


这是 fastNLP , 一个 非常 good 的 包 . 这是 fastNLP , 一个 非常 good 的 包 .
或者,即每个part后面还有一个pos tag 或者,即每个part后面还有一个pos tag
例如:: 例如::


@@ -661,12 +664,15 @@ class NaiveCWSReader(DataSetLoader):


def load(self, filepath, in_word_splitter=None, cut_long_sent=False): def load(self, filepath, in_word_splitter=None, cut_long_sent=False):
""" """
允许使用的情况有(默认以\t或空格作为seg)
允许使用的情况有(默认以\\\\t或空格作为seg)::
这是 fastNLP , 一个 非常 good 的 包 . 这是 fastNLP , 一个 非常 good 的 包 .
和::
也/D 在/P 團員/Na 之中/Ng ,/COMMACATEGORY 也/D 在/P 團員/Na 之中/Ng ,/COMMACATEGORY
如果splitter不为None则认为是第二种情况, 且我们会按splitter分割"也/D", 然后取第一部分. 例如"也/D".split('/')[0] 如果splitter不为None则认为是第二种情况, 且我们会按splitter分割"也/D", 然后取第一部分. 例如"也/D".split('/')[0]

:param filepath: :param filepath:
:param in_word_splitter: :param in_word_splitter:
:param cut_long_sent: :param cut_long_sent:
@@ -737,11 +743,12 @@ class ZhConllPOSReader(object):


def load(self, path): def load(self, path):
""" """
返回的DataSet, 包含以下的field
返回的DataSet, 包含以下的field::
words:list of str, words:list of str,
tag: list of str, 被加入了BMES tag, 比如原来的序列为['VP', 'NN', 'NN', ..],会被认为是["S-VP", "B-NN", "M-NN",..] tag: list of str, 被加入了BMES tag, 比如原来的序列为['VP', 'NN', 'NN', ..],会被认为是["S-VP", "B-NN", "M-NN",..]
假定了输入为conll的格式,以空行隔开两个句子,每行共7列,即
::
假定了输入为conll的格式,以空行隔开两个句子,每行共7列,即::


1 编者按 编者按 NN O 11 nmod:topic 1 编者按 编者按 NN O 11 nmod:topic
2 : : PU O 11 punct 2 : : PU O 11 punct


+ 1
- 1
fastNLP/io/embed_loader.py View File

@@ -132,7 +132,7 @@ class EmbedLoader(BaseLoader):
def load_with_vocab(embed_filepath, vocab, dtype=np.float32, normalize=True, error='ignore'): def load_with_vocab(embed_filepath, vocab, dtype=np.float32, normalize=True, error='ignore'):
""" """
load pretraining embedding in {embed_file} based on words in vocab. Words in vocab but not in the pretraining load pretraining embedding in {embed_file} based on words in vocab. Words in vocab but not in the pretraining
embedding are initialized from a normal distribution which has the mean and std of the found words vectors.
embedding are initialized from a normal distribution which has the mean and std of the found words vectors.
The embedding type is determined automatically, support glove and word2vec(the first line only has two elements). The embedding type is determined automatically, support glove and word2vec(the first line only has two elements).


:param embed_filepath: str, where to read pretrain embedding :param embed_filepath: str, where to read pretrain embedding


+ 7
- 5
fastNLP/io/model_io.py View File

@@ -31,16 +31,18 @@ class ModelLoader(BaseLoader):


class ModelSaver(object): class ModelSaver(object):
"""Save a model """Save a model
Example::


:param str save_path: the path to the saving directory.
Example::

saver = ModelSaver("./save/model_ckpt_100.pkl")
saver.save_pytorch(model)
saver = ModelSaver("./save/model_ckpt_100.pkl")
saver.save_pytorch(model)


""" """


def __init__(self, save_path): def __init__(self, save_path):
"""

:param save_path: the path to the saving directory.
"""
self.save_path = save_path self.save_path = save_path


def save_pytorch(self, model, param_only=True): def save_pytorch(self, model, param_only=True):


Loading…
Cancel
Save