更新了一些文档格式

5 years ago · 96437f9e26
--- a/fastNLP/core/batch.py
+++ b/fastNLP/core/batch.py
@@ -30,11 +30,8 @@ class Batch(object):
    """
    别名：:class:`fastNLP.Batch` :class:`fastNLP.core.batch.Batch`

    Batch 用于从 `DataSet` 中按一定的顺序, 依次按 ``batch_size`` 的大小将数据取出.
    组成 `x` 和 `y`


    Example::
    Batch 用于从 `DataSet` 中按一定的顺序, 依次按 ``batch_size`` 的大小将数据取出，
    组成 `x` 和 `y`::

        batch = Batch(data_set, batch_size=16, sampler=SequentialSampler())
        num_batch = len(batch)
--- a/fastNLP/core/dataset.py
+++ b/fastNLP/core/dataset.py
@@ -571,9 +571,7 @@ class DataSet(object):
    
    def set_input(self, *field_names, flag=True):
        """
        将field_names的field设置为input

        Example::
        将field_names的field设置为input::

            dataset.set_input('words', 'seq_len')   # 将words和seq_len这两个field的input属性设置为True
            dataset.set_input('words', flag=False)  # 将words这个field的input属性设置为False
@@ -605,9 +603,7 @@ class DataSet(object):
    
    def set_padder(self, field_name, padder):
        """
        为field_name设置padder

        Example::
        为field_name设置padder::

            from fastNLP import EngChar2DPadder
            padder = EngChar2DPadder()
--- a/fastNLP/core/field.py
+++ b/fastNLP/core/field.py
@@ -448,9 +448,7 @@ class EngChar2DPadder(Padder):
    但这个Padder只能处理index为int的情况。

    padded过后的batch内容，形状为(batch_size, max_sentence_length, max_word_length). max_sentence_length为这个batch中最大句
    子长度；max_word_length为这个batch中最长的word的长度

    Example::
    子长度；max_word_length为这个batch中最长的word的长度::

        from fastNLP import DataSet
        from fastNLP import EngChar2DPadder
--- a/fastNLP/core/instance.py
+++ b/fastNLP/core/instance.py
@@ -13,9 +13,7 @@ class Instance(object):
    别名：:class:`fastNLP.Instance` :class:`fastNLP.core.instance.Instance`

    Instance是fastNLP中对应一个sample的类。每个sample在fastNLP中是一个Instance对象。
    Instance一般与 :class:`~fastNLP.DataSet` 一起使用, Instance的初始化如下面的Example所示

    Example::
    Instance一般与 :class:`~fastNLP.DataSet` 一起使用, Instance的初始化如下面的Example所示::
    
        >>>from fastNLP import Instance
        >>>ins = Instance(field_1=[1, 1, 1], field_2=[2, 2, 2])
--- a/fastNLP/core/losses.py
+++ b/fastNLP/core/losses.py
@@ -190,10 +190,10 @@ class LossFunc(LossBase):
                         找到相对应的参数名为value的参数，并传入func中作为参数名为key的参数
    :param kwargs: 除了参数映射表以外可以用key word args的方式设置参数映射关系

    Example::
    使用方法::

        >>> func = torch.nn.CrossEntropyLoss()
        >>> loss_func = LossFunc(func, input="pred", target="label")
        func = torch.nn.CrossEntropyLoss()
        loss_func = LossFunc(func, input="pred", target="label")
        # 这表示构建了一个损失函数类，由func计算损失函数，其中将从模型返回值或者DataSet的target=True的field
        # 当中找到一个参数名为`pred`的参数传入func一个参数名为`input`的参数；找到一个参数名为`label`的参数
        # 传入func作为一个名为`target`的参数
--- a/fastNLP/core/utils.py
+++ b/fastNLP/core/utils.py
@@ -68,18 +68,14 @@ def cache_results(_cache_fp, _refresh=False, _verbose=1):
        # res = [5 4 9 1 8]
        # 0.0040721893310546875

    可以看到第二次运行的时候，只用了0.0001s左右，是由于第二次运行将直接从cache.pkl这个文件读取数据，而不会经过再次预处理

    Example::
    可以看到第二次运行的时候，只用了0.0001s左右，是由于第二次运行将直接从cache.pkl这个文件读取数据，而不会经过再次预处理::

        # 还是以上面的例子为例，如果需要重新生成另一个cache，比如另一个数据集的内容，通过如下的方式调用即可
        process_data(_cache_fp='cache2.pkl')  # 完全不影响之前的‘cache.pkl'

    上面的_cache_fp是cache_results会识别的参数，它将从'cache2.pkl'这里缓存/读取数据，即这里的'cache2.pkl'覆盖默认的
    'cache.pkl'。如果在你的函数前面加上了@cache_results()则你的函数会增加三个参数[_cache_fp, _refresh, _verbose]。
    上面的例子即为使用_cache_fp的情况，这三个参数不会传入到你的函数中，当然你写的函数参数名也不可能包含这三个名称。

    Example::
    上面的例子即为使用_cache_fp的情况，这三个参数不会传入到你的函数中，当然你写的函数参数名也不可能包含这三个名称::

        process_data(_cache_fp='cache2.pkl', _refresh=True)  # 这里强制重新生成一份对预处理的cache。
        #  _verbose是用于控制输出信息的，如果为0,则不输出任何内容;如果为1,则会提醒当前步骤是读取的cache还是生成了新的cache
--- a/fastNLP/core/vocabulary.py
+++ b/fastNLP/core/vocabulary.py
@@ -44,9 +44,7 @@ class Vocabulary(object):
    """
    别名：:class:`fastNLP.Vocabulary` :class:`fastNLP.core.vocabulary.Vocabulary`
    
    用于构建, 存储和使用 `str` 到 `int` 的一一映射

    Example::
    用于构建, 存储和使用 `str` 到 `int` 的一一映射::

        vocab = Vocabulary()
        word_list = "this is a word list".split()
@@ -159,9 +157,7 @@ class Vocabulary(object):
    
    def has_word(self, w):
        """
        检查词是否被记录

        Example::
        检查词是否被记录::

            has_abc = vocab.has_word('abc')
            # equals to
@@ -189,9 +185,7 @@ class Vocabulary(object):
    @_check_build_vocab
    def index_dataset(self, *datasets, field_name, new_field_name=None):
        """
        将DataSet中对应field的词转为数字.

        Example::
        将DataSet中对应field的词转为数字，Example::

            # remember to use `field_name`
            vocab.index_dataset(train_data, dev_data, test_data, field_name='words')
@@ -234,9 +228,7 @@ class Vocabulary(object):
    
    def from_dataset(self, *datasets, field_name):
        """
        使用dataset的对应field中词构建词典

         Example::
        使用dataset的对应field中词构建词典::

            # remember to use `field_name`
            vocab.from_dataset(train_data1, train_data2, field_name='words')
@@ -280,9 +272,7 @@ class Vocabulary(object):
    def to_index(self, w):
        """
        将词转为数字. 若词不再词典中被记录, 将视为 unknown, 若 ``unknown=None`` , 将抛出
        ``ValueError``

        Example::
        ``ValueError``::

            index = vocab.to_index('abc')
            # equals to
--- a/fastNLP/io/dataset_loader.py
+++ b/fastNLP/io/dataset_loader.py
@@ -106,7 +106,7 @@ class DataSetLoader:
    """
    别名：:class:`fastNLP.io.DataSetLoader` :class:`fastNLP.io.dataset_loader.DataSetLoader`

    定义了各种 DataSetLoader 所需的API 接口，开发者应该继承它实现各种的 DataSetLoader。
    定义了各种 DataSetLoader (针对特定数据上的特定任务) 所需的API 接口，开发者应该继承它实现各种的 DataSetLoader。
    
    开发者至少应该编写如下内容: