Browse Source

增加pipe相关的测试

tags/v1.0.0alpha
yhcc 2 years ago
parent
commit
24092e3114
100 changed files with 3864 additions and 365 deletions
  1. +53
    -53
      fastNLP/core/collators/padders/get_padder.py
  2. +5
    -6
      fastNLP/core/dataloaders/jittor_dataloader/fdl.py
  3. +5
    -6
      fastNLP/core/dataloaders/paddle_dataloader/fdl.py
  4. +4
    -5
      fastNLP/core/dataloaders/torch_dataloader/fdl.py
  5. +0
    -5
      fastNLP/io/loader/__init__.py
  6. +1
    -1
      fastNLP/io/loader/conll.py
  7. +0
    -64
      fastNLP/io/loader/coreference.py
  8. +0
    -2
      fastNLP/io/pipe/__init__.py
  9. +3
    -11
      fastNLP/io/pipe/classification.py
  10. +0
    -6
      fastNLP/io/pipe/conll.py
  11. +0
    -186
      fastNLP/io/pipe/coreference.py
  12. +0
    -2
      fastNLP/io/pipe/cws.py
  13. +2
    -10
      fastNLP/io/pipe/matching.py
  14. +0
    -2
      fastNLP/io/pipe/qa.py
  15. +0
    -3
      fastNLP/io/pipe/summarization.py
  16. +1
    -0
      fastNLP/modules/torch/__init__.py
  17. +5
    -2
      fastNLP/modules/torch/decoder/__init__.py
  18. +97
    -0
      fastNLP/modules/torch/decoder/mlp.py
  19. +8
    -1
      tests/core/dataloaders/torch_dataloader/test_fdl.py
  20. +442
    -0
      tests/data_for_tests/conll_2003_example.txt
  21. +15
    -0
      tests/data_for_tests/conll_example.txt
  22. +56
    -0
      tests/data_for_tests/cws_pku_utf_8
  23. +1018
    -0
      tests/data_for_tests/cws_test
  24. +1002
    -0
      tests/data_for_tests/cws_train
  25. +6
    -0
      tests/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt
  26. +7
    -0
      tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt
  27. +6
    -0
      tests/data_for_tests/io/20ng/dev.csv
  28. +6
    -0
      tests/data_for_tests/io/20ng/test.csv
  29. +6
    -0
      tests/data_for_tests/io/20ng/train.csv
  30. +6
    -0
      tests/data_for_tests/io/BQCorpus/dev.txt
  31. +6
    -0
      tests/data_for_tests/io/BQCorpus/test.txt
  32. +6
    -0
      tests/data_for_tests/io/BQCorpus/train.txt
  33. +7
    -0
      tests/data_for_tests/io/ChnSentiCorp/dev.txt
  34. +7
    -0
      tests/data_for_tests/io/ChnSentiCorp/test.txt
  35. +7
    -0
      tests/data_for_tests/io/ChnSentiCorp/train.txt
  36. +6
    -0
      tests/data_for_tests/io/LCQMC/dev.txt
  37. +5
    -0
      tests/data_for_tests/io/LCQMC/test.txt
  38. +6
    -0
      tests/data_for_tests/io/LCQMC/train.txt
  39. +6
    -0
      tests/data_for_tests/io/MNLI/dev_matched.tsv
  40. +6
    -0
      tests/data_for_tests/io/MNLI/dev_mismatched.tsv
  41. +6
    -0
      tests/data_for_tests/io/MNLI/test_matched.tsv
  42. +6
    -0
      tests/data_for_tests/io/MNLI/test_mismatched.tsv
  43. +7
    -0
      tests/data_for_tests/io/MNLI/train.tsv
  44. +38
    -0
      tests/data_for_tests/io/MSRA_NER/dev.conll
  45. +31
    -0
      tests/data_for_tests/io/MSRA_NER/test.conll
  46. +60
    -0
      tests/data_for_tests/io/MSRA_NER/train.conll
  47. +10
    -0
      tests/data_for_tests/io/OntoNotes/dev.txt
  48. +10
    -0
      tests/data_for_tests/io/OntoNotes/test.txt
  49. +50
    -0
      tests/data_for_tests/io/OntoNotes/train.txt
  50. +6
    -0
      tests/data_for_tests/io/QNLI/dev.tsv
  51. +6
    -0
      tests/data_for_tests/io/QNLI/test.tsv
  52. +6
    -0
      tests/data_for_tests/io/QNLI/train.tsv
  53. +2
    -0
      tests/data_for_tests/io/Quora/dev.tsv
  54. +2
    -0
      tests/data_for_tests/io/Quora/test.tsv
  55. +2
    -0
      tests/data_for_tests/io/Quora/train.tsv
  56. +6
    -0
      tests/data_for_tests/io/R52/dev.csv
  57. +6
    -0
      tests/data_for_tests/io/R52/test.csv
  58. +6
    -0
      tests/data_for_tests/io/R52/train.csv
  59. +6
    -0
      tests/data_for_tests/io/R8/dev.csv
  60. +6
    -0
      tests/data_for_tests/io/R8/test.csv
  61. +6
    -0
      tests/data_for_tests/io/R8/train.csv
  62. +6
    -0
      tests/data_for_tests/io/RTE/dev.tsv
  63. +6
    -0
      tests/data_for_tests/io/RTE/test.tsv
  64. +6
    -0
      tests/data_for_tests/io/RTE/train.tsv
  65. +5
    -0
      tests/data_for_tests/io/SNLI/snli_1.0_dev.jsonl
  66. +5
    -0
      tests/data_for_tests/io/SNLI/snli_1.0_test.jsonl
  67. +5
    -0
      tests/data_for_tests/io/SNLI/snli_1.0_train.jsonl
  68. +6
    -0
      tests/data_for_tests/io/SST-2/dev.tsv
  69. +6
    -0
      tests/data_for_tests/io/SST-2/test.tsv
  70. +6
    -0
      tests/data_for_tests/io/SST-2/train.tsv
  71. +6
    -0
      tests/data_for_tests/io/SST/dev.txt
  72. +6
    -0
      tests/data_for_tests/io/SST/test.txt
  73. +6
    -0
      tests/data_for_tests/io/SST/train.txt
  74. +9
    -0
      tests/data_for_tests/io/THUCNews/dev.txt
  75. +9
    -0
      tests/data_for_tests/io/THUCNews/test.txt
  76. +9
    -0
      tests/data_for_tests/io/THUCNews/train.txt
  77. +7
    -0
      tests/data_for_tests/io/WeiboSenti100k/dev.txt
  78. +8
    -0
      tests/data_for_tests/io/WeiboSenti100k/test.txt
  79. +7
    -0
      tests/data_for_tests/io/WeiboSenti100k/train.txt
  80. +7
    -0
      tests/data_for_tests/io/XNLI/dev.txt
  81. +7
    -0
      tests/data_for_tests/io/XNLI/test.txt
  82. +9
    -0
      tests/data_for_tests/io/XNLI/train.txt
  83. +5
    -0
      tests/data_for_tests/io/ag/test.csv
  84. +4
    -0
      tests/data_for_tests/io/ag/train.csv
  85. +155
    -0
      tests/data_for_tests/io/cmrc/dev.json
  86. +161
    -0
      tests/data_for_tests/io/cmrc/train.json
  87. +4
    -0
      tests/data_for_tests/io/cnndm/dev.label.jsonl
  88. +4
    -0
      tests/data_for_tests/io/cnndm/test.label.jsonl
  89. +10
    -0
      tests/data_for_tests/io/cnndm/train.cnndm.jsonl
  90. +100
    -0
      tests/data_for_tests/io/cnndm/vocab
  91. +49
    -0
      tests/data_for_tests/io/conll2003/dev.txt
  92. +51
    -0
      tests/data_for_tests/io/conll2003/test.txt
  93. +48
    -0
      tests/data_for_tests/io/conll2003/train.txt
  94. +6
    -0
      tests/data_for_tests/io/cws_as/dev.txt
  95. +6
    -0
      tests/data_for_tests/io/cws_as/test.txt
  96. +6
    -0
      tests/data_for_tests/io/cws_as/train.txt
  97. +6
    -0
      tests/data_for_tests/io/cws_cityu/dev.txt
  98. +6
    -0
      tests/data_for_tests/io/cws_cityu/test.txt
  99. +6
    -0
      tests/data_for_tests/io/cws_cityu/train.txt
  100. +2
    -0
      tests/data_for_tests/io/cws_msra/dev.txt

+ 53
- 53
fastNLP/core/collators/padders/get_padder.py View File

@@ -24,62 +24,62 @@ def get_padder(batch_field:Sequence[Any], pad_val, dtype, backend, field_name)->
:param field_name: 方便报错的。 :param field_name: 方便报错的。
:return: :return:
""" """
assert len(batch_field)!=0, "Empty batch encountered."
logger.debug(f"The content in the field:`{field_name}` is:\n" + str(batch_field))
if pad_val is None:
logger.debug(f"The pad_val for field:{field_name} is None, not padding this field.")
return NullPadder()
if backend is None:
logger.debug(f"The backend for field:{field_name} is None, not padding this field.")
return NullPadder()

# 首先判断当前 field 是否是必须要 pad ,根据用户设置的 pad_val、dtype 等判断。
must_pad = False
if pad_val != 0 or dtype is not None:
must_pad = True

catalog = _get_element_shape_dtype(batch_field) # 首先获取数据的基本信息。

# 根据 catalog 来判定当前是否可以进行 pad 。
# 首先检查是否所有的 key 是一样长的,表明深度是一致的
depths = set(map(len, catalog.keys()))
num_depth = len(depths)
if num_depth != 1:
msg = f'Field:`{field_name}` cannot pad, since it has various depths({depths}) of data. To view more ' \
f"information please set logger's level to DEBUG."
if must_pad:
raise InconsistencyError(msg)
raise NoProperPadderError(msg)

# 再检查所有的元素 shape 是否一致?
shape_lens = set([len(v[0]) for v in catalog.values()])
num_shape = len(shape_lens)
if num_shape != 1:
msg = f'Field:`{field_name}` cannot pad, since it has various shape length({shape_lens}) of data. To view more ' \
f"information please set logger's level to DEBUG."
if must_pad:
raise InconsistencyError(msg)
raise NoProperPadderError(msg)

# 再检查所有的元素 type 是否一致
try: try:
ele_dtypes = set([v[1] for v in catalog.values()])
except TypeError:
ele_dtypes = set([str(v[1]) for v in catalog.values()])
num_eletypes = len(ele_dtypes)
if num_eletypes != 1:
msg = f'Field:`{field_name}` cannot pad, since it has various types({ele_dtypes}) of data. To view more ' \
f"information please set logger's level to DEBUG."
if must_pad:
raise InconsistencyError(msg)
raise NoProperPadderError(msg)
assert len(batch_field)!=0, "Empty batch encountered."
logger.debug(f"The content in the field:`{field_name}` is:\n" + str(batch_field))
if pad_val is None:
logger.debug(f"The pad_val for field:{field_name} is None, not padding this field.")
return NullPadder()
if backend is None:
logger.debug(f"The backend for field:{field_name} is None, not padding this field.")
return NullPadder()

# 首先判断当前 field 是否是必须要 pad ,根据用户设置的 pad_val、dtype 等判断。
must_pad = False
if pad_val != 0 or dtype is not None:
must_pad = True

catalog = _get_element_shape_dtype(batch_field) # 首先获取数据的基本信息。

# 根据 catalog 来判定当前是否可以进行 pad 。
# 首先检查是否所有的 key 是一样长的,表明深度是一致的
depths = set(map(len, catalog.keys()))
num_depth = len(depths)
if num_depth != 1:
msg = f'Field:`{field_name}` cannot pad, since it has various depths({depths}) of data. To view more ' \
f"information please set logger's level to DEBUG."
if must_pad:
raise InconsistencyError(msg)
raise NoProperPadderError(msg)


depth = depths.pop()
shape_len = shape_lens.pop()
ele_dtype = list(catalog.values())[0][1] # 因为上面有except的情况,所以这样处理了
# 再检查所有的元素 shape 是否一致?
shape_lens = set([len(v[0]) for v in catalog.values()])
num_shape = len(shape_lens)
if num_shape != 1:
msg = f'Field:`{field_name}` cannot pad, since it has various shape length({shape_lens}) of data. To view more ' \
f"information please set logger's level to DEBUG."
if must_pad:
raise InconsistencyError(msg)
raise NoProperPadderError(msg)


# 需要由 padder 自己决定是否能够 pad 。
try:
# 再检查所有的元素 type 是否一致
try:
ele_dtypes = set([v[1] for v in catalog.values()])
except TypeError:
ele_dtypes = set([str(v[1]) for v in catalog.values()])
num_eletypes = len(ele_dtypes)
if num_eletypes != 1:
msg = f'Field:`{field_name}` cannot pad, since it has various types({ele_dtypes}) of data. To view more ' \
f"information please set logger's level to DEBUG."
if must_pad:
raise InconsistencyError(msg)
raise NoProperPadderError(msg)

depth = depths.pop()
shape_len = shape_lens.pop()
ele_dtype = list(catalog.values())[0][1] # 因为上面有except的情况,所以这样处理了

# 需要由 padder 自己决定是否能够 pad 。
if depth == 1 and shape_len == 0: # 形如 [0, 1, 2] 或 [True, False, True] if depth == 1 and shape_len == 0: # 形如 [0, 1, 2] 或 [True, False, True]
if backend == 'raw': if backend == 'raw':
return RawNumberPadder(pad_val=pad_val, ele_dtype=ele_dtype, dtype=dtype) return RawNumberPadder(pad_val=pad_val, ele_dtype=ele_dtype, dtype=dtype)


+ 5
- 6
fastNLP/core/dataloaders/jittor_dataloader/fdl.py View File

@@ -221,13 +221,12 @@ def prepare_jittor_dataloader(ds_or_db, batch_size: int = 16, shuffle: bool = Fa
其他 key 不包含 'train' 字符串的数据集则使用 non_train_size 和 non_train_sampler 作为参数。最终根据 ``key: JittorDataLoader`` 组成 其他 key 不包含 'train' 字符串的数据集则使用 non_train_size 和 non_train_sampler 作为参数。最终根据 ``key: JittorDataLoader`` 组成
``Dict[key, JittorDataLoader]`` 的字典返回。 ``Dict[key, JittorDataLoader]`` 的字典返回。


:param ds_or_db: 实现 __getitem__() 和 __len__() 的对象;或这种对象的序列;或字典。其取值只能为 ``[DataSet, DataBundle,
Dict[str, DataSet]]``.
:param ds_or_db: 可以有以下三种取值,

* ds_or_db 为 :class:`~fastNLP.io.DataBundle`, 返回值为 ``Dict[str, TorchDataLoader]`` 的字典
* ds_or_db 为 ``Dict[str, DataSet]`` 字典, 返回值为 ``Dict[str, TorchDataLoader]`` 的字典
* ds_or_db 为实现了 __getitem__() 和 __len__() 的对象 ,返回值为:class:`~fastNLP.TorchDataLoader`


* ds_or_db 为 :class:`~fastNLP.core.dataset.DataSet`,返回值为 :class:`~fastNLP.core.dataloaders.JittorDataLoader`
* ds_or_db 为 :class:`~fastNLP.io.DataBundle`, 返回值为 :class:`Dict[str, JittorDataLoader]` 的字典
* ds_or_db 为 :class:`Dict[str, DataSet]` 字典, 返回值也为 :class:`Dict[str, JittorDataLoader]` 的字典
:param non_train_batch_size: 如果传入的 ``ds_or_db`` 为 :class:`Dict` 或 :class:`~fastNLP.io.DataBundle` 对象,可以通过改参数 :param non_train_batch_size: 如果传入的 ``ds_or_db`` 为 :class:`Dict` 或 :class:`~fastNLP.io.DataBundle` 对象,可以通过改参数
设置名称不为 `train` 的其他 ``dataset`` 的 ``batch_size``。 默认为 ``16``。 设置名称不为 `train` 的其他 ``dataset`` 的 ``batch_size``。 默认为 ``16``。
:param batch_size: 批次大小,默认为 ``16`` 且当 batch_sampler 为 None 有效。 :param batch_size: 批次大小,默认为 ``16`` 且当 batch_sampler 为 None 有效。


+ 5
- 6
fastNLP/core/dataloaders/paddle_dataloader/fdl.py View File

@@ -258,7 +258,7 @@ def prepare_paddle_dataloader(ds_or_db, feed_list=None, places=None,
non_train_batch_size: int = None) \ non_train_batch_size: int = None) \
-> Union[Dict[str, PaddleDataLoader], PaddleDataLoader]: -> Union[Dict[str, PaddleDataLoader], PaddleDataLoader]:
""" """
``prepare_paddle_dataloader`` 的功能是将输入的单个或多个 dataset 同时转为 ``PaddleDataloader``对象, 详见 :class:`~fastNLP.core.dataloaders.PaddleDataLoader`。
``prepare_paddle_dataloader`` 的功能是将输入的单个或多个 dataset 同时转为 ``PaddleDataloader``对象, 详见 :class:`~fastNLP.PaddleDataLoader`。
根据 ds_or_db 的类型 ``[DataSet, DataBundle, Dict[name, Dataset]]`` 不同而有不同返回结果, 具体如下: 根据 ds_or_db 的类型 ``[DataSet, DataBundle, Dict[name, Dataset]]`` 不同而有不同返回结果, 具体如下:


* 当 ds_or_db 为 ``DataSet``时,``prepare_paddle_dataloader`` 会将使用的除了 non_train_batch_size 和 non_train_sampler 以外的参数来 * 当 ds_or_db 为 ``DataSet``时,``prepare_paddle_dataloader`` 会将使用的除了 non_train_batch_size 和 non_train_sampler 以外的参数来
@@ -272,12 +272,11 @@ def prepare_paddle_dataloader(ds_or_db, feed_list=None, places=None,
其他 key 不包含 'train' 字符串的数据集则使用 non_train_size 和 non_train_sampler 作为参数。最终根据 ``key: PaddleDataLoader`` 组成 其他 key 不包含 'train' 字符串的数据集则使用 non_train_size 和 non_train_sampler 作为参数。最终根据 ``key: PaddleDataLoader`` 组成
``Dict[key, PaddleDataLoader]`` 的字典返回。 ``Dict[key, PaddleDataLoader]`` 的字典返回。


::param ds_or_db: 实现 __getitem__() 和 __len__() 的对象;或这种对象的序列;或字典。其取值只能为 ``[DataSet, DataBundle,
Dict[str, DataSet]]``.
:param ds_or_db: 可以有以下三种取值,


* ds_or_db 为 :class:`~fastNLP.core.dataset.DataSet`,返回值为:class:`~fastNLP.core.dataloaders.PaddleDataLoader`
* ds_or_db 为 :class:`~fastNLP.io.DataBundle`, 返回值为 ``Dict[str, PaddleDataLoader]`` 的字典
* ds_or_db 为 ``Dict[str, DataSet]`` 字典, 返回值也为 ``Dict[str, PaddleDataLoader]`` 的字典
* ds_or_db 为 :class:`~fastNLP.io.DataBundle`, 返回值为 ``Dict[str, TorchDataLoader]`` 的字典
* ds_or_db 为 ``Dict[str, DataSet]`` 字典, 返回值为 ``Dict[str, TorchDataLoader]`` 的字典
* ds_or_db 为实现了 __getitem__() 和 __len__() 的对象 ,返回值为:class:`~fastNLP.TorchDataLoader`


:param feed_list: (list(Tensor)|tuple(Tensor)): feed Tensor list. :param feed_list: (list(Tensor)|tuple(Tensor)): feed Tensor list.
这个张量能被 :code:`paddle.static.data()` 创建。 如果:attr:`return_list` 是 ``False``, 那么 :attr:`feed_list` 这个张量能被 :code:`paddle.static.data()` 创建。 如果:attr:`return_list` 是 ``False``, 那么 :attr:`feed_list`


+ 4
- 5
fastNLP/core/dataloaders/torch_dataloader/fdl.py View File

@@ -227,7 +227,7 @@ def prepare_torch_dataloader(ds_or_db,
non_train_batch_size: int = None) \ non_train_batch_size: int = None) \
-> Union[TorchDataLoader, Dict[str, TorchDataLoader]]: -> Union[TorchDataLoader, Dict[str, TorchDataLoader]]:
""" """
``prepare_torch_dataloader`` 的功能是将输入的单个或多个 dataset 同时转为 ``TorchDataloader``对象, 详见 :class:`~fastNLP.core.dataloaders.TorchDataLoader`。
``prepare_torch_dataloader`` 的功能是将输入的单个或多个 dataset 同时转为 ``TorchDataloader``对象, 详见 :class:`~fastNLP.TorchDataLoader`。
根据 ds_or_db 的类型 ``[DataSet, DataBundle, Dict[name, Dataset]]`` 不同而有不同返回结果, 具体如下: 根据 ds_or_db 的类型 ``[DataSet, DataBundle, Dict[name, Dataset]]`` 不同而有不同返回结果, 具体如下:


* 当 ds_or_db 为 ``DataSet``时,``prepare_torch_dataloader`` 会将使用的除了 non_train_batch_size 和 non_train_sampler 以外的参数来 * 当 ds_or_db 为 ``DataSet``时,``prepare_torch_dataloader`` 会将使用的除了 non_train_batch_size 和 non_train_sampler 以外的参数来
@@ -241,12 +241,11 @@ def prepare_torch_dataloader(ds_or_db,
其他 key 不包含 'train' 字符串的数据集则使用 non_train_size 和 non_train_sampler 作为参数。最终根据 ``key: TorchDataLoader`` 组成 其他 key 不包含 'train' 字符串的数据集则使用 non_train_size 和 non_train_sampler 作为参数。最终根据 ``key: TorchDataLoader`` 组成
``Dict[key, TorchDataLoader]`` 的字典返回。 ``Dict[key, TorchDataLoader]`` 的字典返回。


:param ds_or_db: 实现 __getitem__() 和 __len__() 的对象;或这种对象的序列;或字典。其取值只能为 ``[DataSet, DataBundle,
Dict[str, DataSet]]``.
:param ds_or_db: 可以有以下三种取值,


* ds_or_db 为 :class:`~fastNLP.core.dataset.DataSet`,返回值为:class:`~fastNLP.core.dataloaders.TorchDataLoader`
* ds_or_db 为 :class:`~fastNLP.io.DataBundle`, 返回值为 ``Dict[str, TorchDataLoader]`` 的字典 * ds_or_db 为 :class:`~fastNLP.io.DataBundle`, 返回值为 ``Dict[str, TorchDataLoader]`` 的字典
* ds_or_db 为 ``Dict[str, DataSet]`` 字典, 返回值也为 ``Dict[str, TorchDataLoader]`` 的字典
* ds_or_db 为 ``Dict[str, DataSet]`` 字典, 返回值为 ``Dict[str, TorchDataLoader]`` 的字典
* ds_or_db 为实现了 __getitem__() 和 __len__() 的对象 ,返回值为:class:`~fastNLP.TorchDataLoader`


:param batch_size: 批次大小,默认为 ``16`` 且当 batch_sampler 为 None 有效。 :param batch_size: 批次大小,默认为 ``16`` 且当 batch_sampler 为 None 有效。
:param non_train_batch_size: 非 'train' 数据集的 ``TorchDataLoader`` 批次大小,默认为 ``16`` 且当 batch_sampler 为 None 有效。 :param non_train_batch_size: 非 'train' 数据集的 ``TorchDataLoader`` 批次大小,默认为 ``16`` 且当 batch_sampler 为 None 有效。


+ 0
- 5
fastNLP/io/loader/__init__.py View File

@@ -84,8 +84,6 @@ __all__ = [
"BQCorpusLoader", "BQCorpusLoader",
"LCQMCLoader", "LCQMCLoader",


"CoReferenceLoader",

"CMRC2018Loader" "CMRC2018Loader"
] ]


@@ -95,7 +93,6 @@ from .classification import CLSBaseLoader, YelpFullLoader, YelpPolarityLoader, A
MRLoader, R8Loader, R52Loader, OhsumedLoader, NG20Loader MRLoader, R8Loader, R52Loader, OhsumedLoader, NG20Loader
from .conll import ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader from .conll import ConllLoader, Conll2003Loader, Conll2003NERLoader, OntoNotesNERLoader, CTBLoader
from .conll import MsraNERLoader, PeopleDailyNERLoader, WeiboNERLoader from .conll import MsraNERLoader, PeopleDailyNERLoader, WeiboNERLoader
from .coreference import CoReferenceLoader
from .csv import CSVLoader from .csv import CSVLoader
from .cws import CWSLoader from .cws import CWSLoader
from .json import JsonLoader from .json import JsonLoader
@@ -103,5 +100,3 @@ from .loader import Loader
from .matching import MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, CNXNLILoader, BQCorpusLoader, \ from .matching import MNLILoader, QuoraLoader, SNLILoader, QNLILoader, RTELoader, CNXNLILoader, BQCorpusLoader, \
LCQMCLoader LCQMCLoader
from .qa import CMRC2018Loader from .qa import CMRC2018Loader



+ 1
- 1
fastNLP/io/loader/conll.py View File

@@ -56,7 +56,7 @@ class ConllLoader(Loader):
r""" r"""
:param list headers: 每一列数据的名称,需为List or Tuple of str。``header`` 与 ``indexes`` 一一对应 :param list headers: 每一列数据的名称,需为List or Tuple of str。``header`` 与 ``indexes`` 一一对应
:param list sep: 指定分隔符,默认为制表符
:param str sep: 指定分隔符,默认为制表符
:param list indexes: 需要保留的数据列下标,从0开始。若为 ``None`` ,则所有列都保留。Default: ``None`` :param list indexes: 需要保留的数据列下标,从0开始。若为 ``None`` ,则所有列都保留。Default: ``None``
:param bool dropna: 是否忽略非法数据,若 ``False`` ,遇到非法数据时抛出 ``ValueError`` 。Default: ``True`` :param bool dropna: 是否忽略非法数据,若 ``False`` ,遇到非法数据时抛出 ``ValueError`` 。Default: ``True``
:param bool drophashtag: 是否忽略以 ``#`` 开头的句子。 :param bool drophashtag: 是否忽略以 ``#`` 开头的句子。


+ 0
- 64
fastNLP/io/loader/coreference.py View File

@@ -1,64 +0,0 @@
r"""undocumented"""

__all__ = [
"CoReferenceLoader",
]

from ...core.dataset import DataSet
from ..file_reader import _read_json
from fastNLP.core.dataset import Instance
# from ...core.const import Const
from .json import JsonLoader


class CoReferenceLoader(JsonLoader):
r"""
原始数据中内容应该为, 每一行为一个json对象,其中doc_key包含文章的种类信息,speakers包含每句话的说话者信息,cluster是指向现实中同一个事物的聚集,sentences是文本信息内容。

Example::

{"doc_key": "bc/cctv/00/cctv_0000_0",
"speakers": [["Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1"], ["Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1"], ["Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1", "Speaker#1"]],
"clusters": [[[70, 70], [485, 486], [500, 500], [73, 73], [55, 55], [153, 154], [366, 366]]],
"sentences": [["In", "the", "summer", "of", "2005", ",", "a", "picture", "that", "people", "have", "long", "been", "looking", "forward", "to", "started", "emerging", "with", "frequency", "in", "various", "major", "Hong", "Kong", "media", "."], ["With", "their", "unique", "charm", ",", "these", "well", "-", "known", "cartoon", "images", "once", "again", "caused", "Hong", "Kong", "to", "be", "a", "focus", "of", "worldwide", "attention", "."]]
}

读取预处理好的Conll2012数据,数据结构如下:

.. csv-table::
:header: "raw_words1", "raw_words2", "raw_words3", "raw_words4"

"bc/cctv/00/cctv_0000_0", "[['Speaker#1', 'Speaker#1', 'Speaker#1...", "[[[70, 70], [485, 486], [500, 500], [7...", "[['In', 'the', 'summer', 'of', '2005',..."
"...", "...", "...", "..."

"""
def __init__(self, fields=None, dropna=False):
super().__init__(fields, dropna)
self.fields = {"doc_key": "raw_words1", "speakers": "raw_words2", "clusters": "raw_words3",
"sentences": "raw_words4"}

def _load(self, path):
r"""
加载数据
:param path: 数据文件路径,文件为json

:return:
"""
dataset = DataSet()
for idx, d in _read_json(path, fields=self.fields_list, dropna=self.dropna):
if self.fields:
ins = {self.fields[k]: v for k, v in d.items()}
else:
ins = d
dataset.append(Instance(**ins))
return dataset

def download(self):
r"""
由于版权限制,不能提供自动下载功能。可参考

https://www.aclweb.org/anthology/W12-4501

:return:
"""
raise RuntimeError("CoReference cannot be downloaded automatically.")

+ 0
- 2
fastNLP/io/pipe/__init__.py View File

@@ -54,7 +54,6 @@ __all__ = [
"GranularizePipe", "GranularizePipe",
"MachingTruncatePipe", "MachingTruncatePipe",


"CoReferencePipe",


"CMRC2018BertPipe", "CMRC2018BertPipe",


@@ -72,7 +71,6 @@ from .classification import CLSBasePipe, YelpFullPipe, YelpPolarityPipe, SSTPipe
WeiboSenti100kPipe, AGsNewsPipe, DBPediaPipe, MRPipe, R8Pipe, R52Pipe, OhsumedPipe, NG20Pipe WeiboSenti100kPipe, AGsNewsPipe, DBPediaPipe, MRPipe, R8Pipe, R52Pipe, OhsumedPipe, NG20Pipe
from .conll import Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, WeiboNERPipe, PeopleDailyPipe from .conll import Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, WeiboNERPipe, PeopleDailyPipe
from .conll import Conll2003Pipe, iob2, iob2bioes from .conll import Conll2003Pipe, iob2, iob2bioes
from .coreference import CoReferencePipe
from .cws import CWSPipe from .cws import CWSPipe
from .matching import MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, \ from .matching import MatchingBertPipe, RTEBertPipe, SNLIBertPipe, QuoraBertPipe, QNLIBertPipe, MNLIBertPipe, \
MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, CNXNLIBertPipe, CNXNLIPipe, BQCorpusBertPipe, \ MatchingPipe, RTEPipe, SNLIPipe, QuoraPipe, QNLIPipe, MNLIPipe, CNXNLIBertPipe, CNXNLIPipe, BQCorpusBertPipe, \


+ 3
- 11
fastNLP/io/pipe/classification.py View File

@@ -37,7 +37,7 @@ from fastNLP.core.log import logger


class CLSBasePipe(Pipe): class CLSBasePipe(Pipe):
def __init__(self, lower: bool = False, tokenizer: str = 'spacy', lang='en'):
def __init__(self, lower: bool = False, tokenizer: str = 'raw', lang='en'):
super().__init__() super().__init__()
self.lower = lower self.lower = lower
self.tokenizer = get_tokenizer(tokenizer, lang=lang) self.tokenizer = get_tokenizer(tokenizer, lang=lang)
@@ -81,8 +81,6 @@ class CLSBasePipe(Pipe):
for name, dataset in data_bundle.datasets.items(): for name, dataset in data_bundle.datasets.items():
dataset.add_seq_len('words') dataset.add_seq_len('words')


data_bundle.set_input('words', 'seq_len', 'target')

return data_bundle return data_bundle


def process_from_file(self, paths) -> DataBundle: def process_from_file(self, paths) -> DataBundle:
@@ -409,11 +407,11 @@ class SST2Pipe(CLSBasePipe):


""" """


def __init__(self, lower=False, tokenizer='spacy'):
def __init__(self, lower=False, tokenizer='raw'):
r""" r"""


:param bool lower: 是否对输入进行小写化。 :param bool lower: 是否对输入进行小写化。
:param str tokenizer: 使用哪种tokenize方式将数据切成单词。支持'spacy'和'raw'。raw使用空格作为切分。
:param str tokenizer: 使用哪种tokenize方式将数据切成单词。
""" """
super().__init__(lower=lower, tokenizer=tokenizer, lang='en') super().__init__(lower=lower, tokenizer=tokenizer, lang='en')


@@ -594,8 +592,6 @@ class ChnSentiCorpPipe(Pipe):
for name, dataset in data_bundle.datasets.items(): for name, dataset in data_bundle.datasets.items():
dataset.add_seq_len('chars') dataset.add_seq_len('chars')


data_bundle.set_input(*input_fields, *target_fields)

return data_bundle return data_bundle


def process_from_file(self, paths=None): def process_from_file(self, paths=None):
@@ -707,8 +703,6 @@ class THUCNewsPipe(CLSBasePipe):
input_fields = ['target', 'seq_len'] + input_field_names input_fields = ['target', 'seq_len'] + input_field_names
target_fields = ['target'] target_fields = ['target']


data_bundle.set_input(*input_fields, *target_fields)

return data_bundle return data_bundle


def process_from_file(self, paths=None): def process_from_file(self, paths=None):
@@ -809,8 +803,6 @@ class WeiboSenti100kPipe(CLSBasePipe):


input_fields = ['target', 'seq_len'] + input_field_names input_fields = ['target', 'seq_len'] + input_field_names
target_fields = ['target'] target_fields = ['target']
data_bundle.set_input(*input_fields, *target_fields)


return data_bundle return data_bundle


+ 0
- 6
fastNLP/io/pipe/conll.py View File

@@ -72,8 +72,6 @@ class _NERPipe(Pipe):
for name, dataset in data_bundle.iter_datasets(): for name, dataset in data_bundle.iter_datasets():
dataset.add_seq_len('words') dataset.add_seq_len('words')
data_bundle.set_input(*input_fields, *target_fields)


return data_bundle return data_bundle


@@ -202,8 +200,6 @@ class Conll2003Pipe(Pipe):
for name, dataset in data_bundle.iter_datasets(): for name, dataset in data_bundle.iter_datasets():
dataset.add_seq_len('words') dataset.add_seq_len('words')
data_bundle.set_input(*input_fields, *target_fields)


return data_bundle return data_bundle
@@ -325,8 +321,6 @@ class _CNNERPipe(Pipe):
for name, dataset in data_bundle.iter_datasets(): for name, dataset in data_bundle.iter_datasets():
dataset.add_seq_len('chars') dataset.add_seq_len('chars')
data_bundle.set_input(*input_fields, *target_fields)


return data_bundle return data_bundle




+ 0
- 186
fastNLP/io/pipe/coreference.py View File

@@ -1,186 +0,0 @@
r"""undocumented"""

__all__ = [
"CoReferencePipe"
]

import collections

import numpy as np

from fastNLP.core.vocabulary import Vocabulary
from .pipe import Pipe
from ..data_bundle import DataBundle
from ..loader.coreference import CoReferenceLoader


# from ...core.const import Const


class CoReferencePipe(Pipe):
r"""
对Coreference resolution问题进行处理,得到文章种类/说话者/字符级信息/序列长度。

处理完成后数据包含文章类别、speaker信息、句子信息、句子对应的index、char、句子长度、target:

.. csv-table::
:header: "words1", "words2","words3","words4","chars","seq_len","target"

"bc", "[[0,0],[1,1]]","[['I','am'],[]]","[[1,2],[]]","[[[1],[2,3]],[]]","[2,3]","[[[2,3],[6,7]],[[10,12],[20,22]]]"
"[...]", "[...]","[...]","[...]","[...]","[...]","[...]"

dataset的print_field_meta()函数输出的各个field的被设置成input和target的情况为::

+-------------+-----------+--------+-------+---------+
| field_names | raw_chars | target | chars | seq_len |
+-------------+-----------+--------+-------+---------+
| is_input | False | True | True | True |
| is_target | False | True | False | True |
| ignore_type | | False | False | False |
| pad_value | | 0 | 0 | 0 |
+-------------+-----------+--------+-------+---------+

"""

def __init__(self, config):
super().__init__()
self.config = config

def process(self, data_bundle: DataBundle):
r"""
对load进来的数据进一步处理原始数据包含:raw_key,raw_speaker,raw_words,raw_clusters
.. csv-table::
:header: "raw_key", "raw_speaker","raw_words","raw_clusters"

"bc/cctv/00/cctv_0000_0", "[[Speaker#1, Speaker#1],[]]","[['I','am'],[]]","[[[2,3],[6,7]],[[10,12],[20,22]]]"
"bc/cctv/00/cctv_0000_1", "[['Speaker#1', 'peaker#1'],[]]","[['He','is'],[]]","[[[2,3],[6,7]],[[10,12],[20,22]]]"
"[...]", "[...]","[...]","[...]"


:param data_bundle:
:return:
"""
genres = {g: i for i, g in enumerate(["bc", "bn", "mz", "nw", "pt", "tc", "wb"])}
vocab = Vocabulary().from_dataset(*data_bundle.datasets.values(), field_name='raw_words4')
vocab.build_vocab()
word2id = vocab.word2idx
data_bundle.set_vocab(vocab, 'words1')
if self.config.char_path:
char_dict = get_char_dict(self.config.char_path)
else:
char_set = set()
for i, w in enumerate(word2id):
if i < 2:
continue
for c in w:
char_set.add(c)

char_dict = collections.defaultdict(int)
char_dict.update({c: i for i, c in enumerate(char_set)})

for name, ds in data_bundle.iter_datasets():
# genre
ds.apply(lambda x: genres[x['raw_words1'][:2]], new_field_name='words1')

# speaker_ids_np
ds.apply(lambda x: speaker2numpy(x['raw_words2'], self.config.max_sentences, is_train=name == 'train'),
new_field_name='words2')

# sentences
ds.rename_field('raw_words4', 'words3')

# doc_np
ds.apply(lambda x: doc2numpy(x['words3'], word2id, char_dict, max(self.config.filter),
self.config.max_sentences, is_train=name == 'train')[0],
new_field_name='words4')
# char_index
ds.apply(lambda x: doc2numpy(x['words3'], word2id, char_dict, max(self.config.filter),
self.config.max_sentences, is_train=name == 'train')[1],
new_field_name='chars')
# seq len
ds.apply(lambda x: doc2numpy(x['words3'], word2id, char_dict, max(self.config.filter),
self.config.max_sentences, is_train=name == 'train')[2],
new_field_name='seq_len')

# clusters
ds.rename_field('raw_words3', 'target')

ds.set_input('words1', 'words2', 'words3', 'words4', 'chars', 'seq_len', 'target')

return data_bundle

def process_from_file(self, paths):
bundle = CoReferenceLoader().load(paths)
return self.process(bundle)


# helper

def doc2numpy(doc, word2id, chardict, max_filter, max_sentences, is_train):
docvec, char_index, length, max_len = _doc2vec(doc, word2id, chardict, max_filter, max_sentences, is_train)
assert max(length) == max_len
assert char_index.shape[0] == len(length)
assert char_index.shape[1] == max_len
doc_np = np.zeros((len(docvec), max_len), int)
for i in range(len(docvec)):
for j in range(len(docvec[i])):
doc_np[i][j] = docvec[i][j]
return doc_np, char_index, length


def _doc2vec(doc, word2id, char_dict, max_filter, max_sentences, is_train):
max_len = 0
max_word_length = 0
docvex = []
length = []
if is_train:
sent_num = min(max_sentences, len(doc))
else:
sent_num = len(doc)

for i in range(sent_num):
sent = doc[i]
length.append(len(sent))
if (len(sent) > max_len):
max_len = len(sent)
sent_vec = []
for j, word in enumerate(sent):
if len(word) > max_word_length:
max_word_length = len(word)
if word in word2id:
sent_vec.append(word2id[word])
else:
sent_vec.append(word2id["UNK"])
docvex.append(sent_vec)

char_index = np.zeros((sent_num, max_len, max_word_length), dtype=int)
for i in range(sent_num):
sent = doc[i]
for j, word in enumerate(sent):
char_index[i, j, :len(word)] = [char_dict[c] for c in word]

return docvex, char_index, length, max_len


def speaker2numpy(speakers_raw, max_sentences, is_train):
if is_train and len(speakers_raw) > max_sentences:
speakers_raw = speakers_raw[0:max_sentences]
speakers = flatten(speakers_raw)
speaker_dict = {s: i for i, s in enumerate(set(speakers))}
speaker_ids = np.array([speaker_dict[s] for s in speakers])
return speaker_ids


# 展平
def flatten(l):
return [item for sublist in l for item in sublist]


def get_char_dict(path):
vocab = ["<UNK>"]
with open(path) as f:
vocab.extend(c.strip() for c in f.readlines())
char_dict = collections.defaultdict(int)
char_dict.update({c: i for i, c in enumerate(vocab)})
return char_dict

+ 0
- 2
fastNLP/io/pipe/cws.py View File

@@ -262,8 +262,6 @@ class CWSPipe(Pipe):
target_fields = ['target', 'seq_len'] target_fields = ['target', 'seq_len']
for name, dataset in data_bundle.iter_datasets(): for name, dataset in data_bundle.iter_datasets():
dataset.add_seq_len('chars') dataset.add_seq_len('chars')
data_bundle.set_input(*input_fields, *target_fields)


return data_bundle return data_bundle


+ 2
- 10
fastNLP/io/pipe/matching.py View File

@@ -161,11 +161,7 @@ class MatchingBertPipe(Pipe):
for name, dataset in data_bundle.iter_datasets(): for name, dataset in data_bundle.iter_datasets():
dataset.add_seq_len('words') dataset.add_seq_len('words')
dataset.set_input(*input_fields)
for fields in target_fields:
if dataset.has_field(fields):
dataset.set_input(fields)

return data_bundle return data_bundle




@@ -311,11 +307,7 @@ class MatchingPipe(Pipe):
for name, dataset in data_bundle.datasets.items(): for name, dataset in data_bundle.datasets.items():
dataset.add_seq_len('words1', 'seq_len1') dataset.add_seq_len('words1', 'seq_len1')
dataset.add_seq_len('words2', 'seq_len2') dataset.add_seq_len('words2', 'seq_len2')
dataset.set_input(*input_fields)
for fields in target_fields:
if dataset.has_field(fields):
dataset.set_input(fields)

return data_bundle return data_bundle






+ 0
- 2
fastNLP/io/pipe/qa.py View File

@@ -135,8 +135,6 @@ class CMRC2018BertPipe(Pipe):
src_vocab.index_dataset(*data_bundle.datasets.values(), field_name='raw_chars', new_field_name='chars') src_vocab.index_dataset(*data_bundle.datasets.values(), field_name='raw_chars', new_field_name='chars')
data_bundle.set_vocab(src_vocab, 'chars') data_bundle.set_vocab(src_vocab, 'chars')


data_bundle.set_input('chars', 'raw_chars', 'answers', 'target_start', 'target_end', 'context_len')

return data_bundle return data_bundle


def process_from_file(self, paths=None) -> DataBundle: def process_from_file(self, paths=None) -> DataBundle:


+ 0
- 3
fastNLP/io/pipe/summarization.py View File

@@ -80,9 +80,6 @@ class ExtCNNDMPipe(Pipe):


data_bundle = _drop_empty_instance(data_bundle, "label") data_bundle = _drop_empty_instance(data_bundle, "label")


# set input and target
data_bundle.set_input('words', 'seq_len', 'target', 'seq_len')

# print("[INFO] Load existing vocab from %s!" % self.vocab_path) # print("[INFO] Load existing vocab from %s!" % self.vocab_path)
word_list = [] word_list = []
with open(self.vocab_path, 'r', encoding='utf8') as vocab_f: with open(self.vocab_path, 'r', encoding='utf8') as vocab_f:


+ 1
- 0
fastNLP/modules/torch/__init__.py View File

@@ -5,6 +5,7 @@ __all__ = [
"Seq2SeqDecoder", "Seq2SeqDecoder",
"LSTMSeq2SeqDecoder", "LSTMSeq2SeqDecoder",
"TransformerSeq2SeqDecoder", "TransformerSeq2SeqDecoder",
"MLP",


"LSTM", "LSTM",
"Seq2SeqEncoder", "Seq2SeqEncoder",


+ 5
- 2
fastNLP/modules/torch/decoder/__init__.py View File

@@ -7,9 +7,12 @@ __all__ = [


"Seq2SeqDecoder", "Seq2SeqDecoder",
"LSTMSeq2SeqDecoder", "LSTMSeq2SeqDecoder",
"TransformerSeq2SeqDecoder"
"TransformerSeq2SeqDecoder",

"MLP"
] ]


from .crf import ConditionalRandomField, allowed_transitions from .crf import ConditionalRandomField, allowed_transitions
from .seq2seq_state import State from .seq2seq_state import State
from .seq2seq_decoder import LSTMSeq2SeqDecoder, TransformerSeq2SeqDecoder, Seq2SeqDecoder
from .seq2seq_decoder import LSTMSeq2SeqDecoder, TransformerSeq2SeqDecoder, Seq2SeqDecoder
from .mlp import MLP

+ 97
- 0
fastNLP/modules/torch/decoder/mlp.py View File

@@ -0,0 +1,97 @@
r"""undocumented"""

__all__ = [
"MLP"
]

import torch
import torch.nn as nn


class MLP(nn.Module):
r"""
多层感知器

.. note::
隐藏层的激活函数通过activation定义。一个str/function或者一个str/function的list可以被传入activation。
如果只传入了一个str/function,那么所有隐藏层的激活函数都由这个str/function定义;
如果传入了一个str/function的list,那么每一个隐藏层的激活函数由这个list中对应的元素定义,其中list的长度为隐藏层数。
输出层的激活函数由output_activation定义,默认值为None,此时输出层没有激活函数。
Examples::

>>> net1 = MLP([5, 10, 5])
>>> net2 = MLP([5, 10, 5], 'tanh')
>>> net3 = MLP([5, 6, 7, 8, 5], 'tanh')
>>> net4 = MLP([5, 6, 7, 8, 5], 'relu', output_activation='tanh')
>>> net5 = MLP([5, 6, 7, 8, 5], ['tanh', 'relu', 'tanh'], 'tanh')
>>> for net in [net1, net2, net3, net4, net5]:
>>> x = torch.randn(5, 5)
>>> y = net(x)
>>> print(x)
>>> print(y)
"""

def __init__(self, size_layer, activation='relu', output_activation=None, initial_method=None, dropout=0.0):
r"""
:param List[int] size_layer: 一个int的列表,用来定义MLP的层数,列表中的数字为每一层是hidden数目。MLP的层数为 len(size_layer) - 1
:param Union[str,func,List[str]] activation: 一个字符串或者函数的列表,用来定义每一个隐层的激活函数,字符串包括relu,tanh和
sigmoid,默认值为relu
:param Union[str,func] output_activation: 字符串或者函数,用来定义输出层的激活函数,默认值为None,表示输出层没有激活函数
:param str initial_method: 参数初始化方式
:param float dropout: dropout概率,默认值为0
"""
super(MLP, self).__init__()
self.hiddens = nn.ModuleList()
self.output = None
self.output_activation = output_activation
for i in range(1, len(size_layer)):
if i + 1 == len(size_layer):
self.output = nn.Linear(size_layer[i - 1], size_layer[i])
else:
self.hiddens.append(nn.Linear(size_layer[i - 1], size_layer[i]))

self.dropout = nn.Dropout(p=dropout)

actives = {
'relu': nn.ReLU(),
'tanh': nn.Tanh(),
'sigmoid': nn.Sigmoid(),
}
if not isinstance(activation, list):
activation = [activation] * (len(size_layer) - 2)
elif len(activation) == len(size_layer) - 2:
pass
else:
raise ValueError(
f"the length of activation function list except {len(size_layer) - 2} but got {len(activation)}!")
self.hidden_active = []
for func in activation:
if callable(func):
self.hidden_active.append(func)
elif func.lower() in actives:
self.hidden_active.append(actives[func])
else:
raise ValueError("should set activation correctly: {}".format(activation))
if self.output_activation is not None:
if callable(self.output_activation):
pass
elif self.output_activation.lower() in actives:
self.output_activation = actives[self.output_activation]
else:
raise ValueError("should set activation correctly: {}".format(activation))

def forward(self, x):
r"""
:param torch.Tensor x: MLP接受的输入
:return: torch.Tensor : MLP的输出结果
"""
for layer, func in zip(self.hiddens, self.hidden_active):
x = self.dropout(func(layer(x)))
x = self.output(x)
if self.output_activation is not None:
x = self.output_activation(x)
x = self.dropout(x)
return x

+ 8
- 1
tests/core/dataloaders/torch_dataloader/test_fdl.py View File

@@ -4,10 +4,10 @@ from fastNLP.core.dataloaders.torch_dataloader import TorchDataLoader, prepare_t
from fastNLP.core.dataset import DataSet from fastNLP.core.dataset import DataSet
from fastNLP.io.data_bundle import DataBundle from fastNLP.io.data_bundle import DataBundle
from fastNLP.envs.imports import _NEED_IMPORT_TORCH from fastNLP.envs.imports import _NEED_IMPORT_TORCH
from fastNLP.core import Trainer
from pkg_resources import parse_version from pkg_resources import parse_version
from tests.helpers.utils import Capturing, recover_logger from tests.helpers.utils import Capturing, recover_logger
from fastNLP import logger from fastNLP import logger
import numpy as np


if _NEED_IMPORT_TORCH: if _NEED_IMPORT_TORCH:
import torch import torch
@@ -141,6 +141,13 @@ class TestFdl:
dl_dict1 = prepare_torch_dataloader(ds_dict1) dl_dict1 = prepare_torch_dataloader(ds_dict1)
assert isinstance(dl_dict1['train_1'], TorchDataLoader) assert isinstance(dl_dict1['train_1'], TorchDataLoader)
assert isinstance(dl_dict1['val'], TorchDataLoader) assert isinstance(dl_dict1['val'], TorchDataLoader)

ds = [[1, [1]], [2, [2, 2]]]
dl = prepare_torch_dataloader(ds, batch_size=2)
for batch in dl:
assert (batch[0] == torch.LongTensor([1, 2])).sum()==2
assert (batch[1] == torch.LongTensor([[1, 0], [2, 2]])).sum()==4

# sequence = [ds, ds1] # sequence = [ds, ds1]
# seq_ds = prepare_torch_dataloader(sequence) # seq_ds = prepare_torch_dataloader(sequence)
# assert isinstance(seq_ds[0], TorchDataLoader) # assert isinstance(seq_ds[0], TorchDataLoader)


+ 442
- 0
tests/data_for_tests/conll_2003_example.txt View File

@@ -0,0 +1,442 @@
-DOCSTART- -X- -X- O

SOCCER NN B-NP O
- : O O
JAPAN NNP B-NP B-LOC
GET VB B-VP O
LUCKY NNP B-NP O
WIN NNP I-NP O
, , O O
CHINA NNP B-NP B-PER
IN IN B-PP O
SURPRISE DT B-NP O
DEFEAT NN I-NP O
. . O O

Nadim NNP B-NP B-PER
Ladki NNP I-NP I-PER

AL-AIN NNP B-NP B-LOC
, , O O
United NNP B-NP B-LOC
Arab NNP I-NP I-LOC
Emirates NNPS I-NP I-LOC
1996-12-06 CD I-NP O

Japan NNP B-NP B-LOC
began VBD B-VP O
the DT B-NP O
defence NN I-NP O
of IN B-PP O
their PRP$ B-NP O
Asian JJ I-NP B-MISC
Cup NNP I-NP I-MISC
title NN I-NP O
with IN B-PP O
a DT B-NP O
lucky JJ I-NP O
2-1 CD I-NP O
win VBP B-VP O
against IN B-PP O
Syria NNP B-NP B-LOC
in IN B-PP O
a DT B-NP O
Group NNP I-NP O
C NNP I-NP O
championship NN I-NP O
match NN I-NP O
on IN B-PP O
Friday NNP B-NP O
. . O O

But CC O O
China NNP B-NP B-LOC
saw VBD B-VP O
their PRP$ B-NP O
luck NN I-NP O
desert VB B-VP O
them PRP B-NP O
in IN B-PP O
the DT B-NP O
second NN I-NP O
match NN I-NP O
of IN B-PP O
the DT B-NP O
group NN I-NP O
, , O O
crashing VBG B-VP O
to TO B-PP O
a DT B-NP O
surprise NN I-NP O
2-0 CD I-NP O
defeat NN I-NP O
to TO B-PP O
newcomers NNS B-NP O
Uzbekistan NNP I-NP B-LOC
. . O O

China NNP B-NP B-LOC
controlled VBD B-VP O
most JJS B-NP O
of IN B-PP O
the DT B-NP O
match NN I-NP O
and CC O O
saw VBD B-VP O
several JJ B-NP O
chances NNS I-NP O
missed VBD B-VP O
until IN B-SBAR O
the DT B-NP O
78th JJ I-NP O
minute NN I-NP O
when WRB B-ADVP O
Uzbek NNP B-NP B-MISC
striker NN I-NP O
Igor JJ B-NP B-PER
Shkvyrin NNP I-NP I-PER
took VBD B-VP O
advantage NN B-NP O
of IN B-PP O
a DT B-NP O
misdirected JJ I-NP O
defensive JJ I-NP O
header NN I-NP O
to TO B-VP O
lob VB I-VP O
the DT B-NP O
ball NN I-NP O
over IN B-PP O
the DT B-NP O
advancing VBG I-NP O
Chinese JJ I-NP B-MISC
keeper NN I-NP O
and CC O O
into IN B-PP O
an DT B-NP O
empty JJ I-NP O
net NN I-NP O
. . O O

Oleg NNP B-NP B-PER
Shatskiku NNP I-NP I-PER
made VBD B-VP O
sure JJ B-ADJP O
of IN B-PP O
the DT B-NP O
win VBP B-VP O
in IN B-PP O
injury NN B-NP O
time NN I-NP O
, , O O
hitting VBG B-VP O
an DT B-NP O
unstoppable JJ I-NP O
left VBD B-VP O
foot NN B-NP O
shot NN I-NP O
from IN B-PP O
just RB B-NP O
outside IN B-PP O
the DT B-NP O
area NN I-NP O
. . O O

The DT B-NP O
former JJ I-NP O
Soviet JJ I-NP B-MISC
republic NN I-NP O
was VBD B-VP O
playing VBG I-VP O
in IN B-PP O
an DT B-NP O
Asian NNP I-NP B-MISC
Cup NNP I-NP I-MISC
finals NNS I-NP O
tie NN I-NP O
for IN B-PP O
the DT B-NP O
first JJ I-NP O
time NN I-NP O
. . O O

Despite IN B-PP O
winning VBG B-VP O
the DT B-NP O
Asian JJ I-NP B-MISC
Games NNPS I-NP I-MISC
title NN I-NP O
two CD B-NP O
years NNS I-NP O
ago RB B-ADVP O
, , O O
Uzbekistan NNP B-NP B-LOC
are VBP B-VP O
in IN B-PP O
the DT B-NP O
finals NNS I-NP O
as IN B-SBAR O
outsiders NNS B-NP O
. . O O

Two CD B-NP O
goals NNS I-NP O
from IN B-PP O
defensive JJ B-NP O
errors NNS I-NP O
in IN B-PP O
the DT B-NP O
last JJ I-NP O
six CD I-NP O
minutes NNS I-NP O
allowed VBD B-VP O
Japan NNP B-NP B-LOC
to TO B-VP O
come VB I-VP O
from IN B-PP O
behind NN B-NP O
and CC O O
collect VB B-VP O
all DT B-NP O
three CD I-NP O
points NNS I-NP O
from IN B-PP O
their PRP$ B-NP O
opening NN I-NP O
meeting NN I-NP O
against IN B-PP O
Syria NNP B-NP B-LOC
. . O O

Takuya NNP B-NP B-PER
Takagi NNP I-NP I-PER
scored VBD B-VP O
the DT B-NP O
winner NN I-NP O
in IN B-PP O
the DT B-NP O
88th JJ I-NP O
minute NN I-NP O
, , O O
rising VBG B-VP O
to TO I-VP O
head VB I-VP O
a DT B-NP O
Hiroshige NNP I-NP B-PER
Yanagimoto NNP I-NP I-PER
cross VB B-VP O
towards IN B-PP O
the DT B-NP O
Syrian JJ I-NP B-MISC
goal NN I-NP O
which WDT B-NP O
goalkeeper VBD B-VP O
Salem NNP B-NP B-PER
Bitar NNP I-NP I-PER
appeared VBD B-VP O
to TO I-VP O
have VB I-VP O
covered VBN I-VP O
but CC O O
then RB B-VP O
allowed VBN I-VP O
to TO I-VP O
slip VB I-VP O
into IN B-PP O
the DT B-NP O
net NN I-NP O
. . O O

It PRP B-NP O
was VBD B-VP O
the DT B-NP O
second JJ I-NP O
costly JJ I-NP O
blunder NN I-NP O
by IN B-PP O
Syria NNP B-NP B-LOC
in IN B-PP O
four CD B-NP O
minutes NNS I-NP O
. . O O

Defender NNP B-NP O
Hassan NNP I-NP B-PER
Abbas NNP I-NP I-PER
rose VBD B-VP O
to TO I-VP O
intercept VB I-VP O
a DT B-NP O
long JJ I-NP O
ball NN I-NP O
into IN B-PP O
the DT B-NP O
area NN I-NP O
in IN B-PP O
the DT B-NP O
84th JJ I-NP O
minute NN I-NP O
but CC O O
only RB B-ADVP O
managed VBD B-VP O
to TO I-VP O
divert VB I-VP O
it PRP B-NP O
into IN B-PP O
the DT B-NP O
top JJ I-NP O
corner NN I-NP O
of IN B-PP O
Bitar NN B-NP B-PER
's POS B-NP O
goal NN I-NP O
. . O O

Nader NNP B-NP B-PER
Jokhadar NNP I-NP I-PER
had VBD B-VP O
given VBN I-VP O
Syria NNP B-NP B-LOC
the DT B-NP O
lead NN I-NP O
with IN B-PP O
a DT B-NP O
well-struck NN I-NP O
header NN I-NP O
in IN B-PP O
the DT B-NP O
seventh JJ I-NP O
minute NN I-NP O
. . O O

Japan NNP B-NP B-LOC
then RB B-ADVP O
laid VBD B-VP O
siege NN B-NP O
to TO B-PP O
the DT B-NP O
Syrian JJ I-NP B-MISC
penalty NN I-NP O
area NN I-NP O
for IN B-PP O
most JJS B-NP O
of IN B-PP O
the DT B-NP O
game NN I-NP O
but CC O O
rarely RB B-VP O
breached VBD I-VP O
the DT B-NP O
Syrian JJ I-NP B-MISC
defence NN I-NP O
. . O O

Bitar NN B-NP B-PER
pulled VBD B-VP O
off RP B-PRT O
fine JJ B-NP O
saves VBZ B-VP O
whenever WRB B-ADVP O
they PRP B-NP O
did VBD B-VP O
. . O O

Japan NNP B-NP B-LOC
coach NN I-NP O
Shu NNP I-NP B-PER
Kamo NNP I-NP I-PER
said VBD B-VP O
: : O O
' '' O O
' POS B-NP O
The DT I-NP O
Syrian JJ I-NP B-MISC
own JJ I-NP O
goal NN I-NP O
proved VBD B-VP O
lucky JJ B-ADJP O
for IN B-PP O
us PRP B-NP O
. . O O

The DT B-NP O
Syrians NNPS I-NP B-MISC
scored VBD B-VP O
early JJ B-NP O
and CC O O
then RB B-VP O
played VBN I-VP O
defensively RB B-ADVP O
and CC O O
adopted VBD B-VP O
long RB I-VP O
balls VBZ I-VP O
which WDT B-NP O
made VBD B-VP O
it PRP B-NP O
hard JJ B-ADJP O
for IN B-PP O
us PRP B-NP O
. . O O
' '' O O

' '' O O

Japan NNP B-NP B-LOC
, , O O
co-hosts VBZ B-VP O
of IN B-PP O
the DT B-NP O
World NNP I-NP B-MISC
Cup NNP I-NP I-MISC
in IN B-PP O
2002 CD B-NP O
and CC O O
ranked VBD B-VP O
20th JJ B-NP O
in IN B-PP O
the DT B-NP O
world NN I-NP O
by IN B-PP O
FIFA NNP B-NP B-ORG
, , O O
are VBP B-VP O
favourites JJ B-ADJP O
to TO B-VP O
regain VB I-VP O
their PRP$ B-NP O
title NN I-NP O
here RB B-ADVP O
. . O O

Hosts NNPS B-NP O
UAE NNP I-NP B-LOC
play NN I-NP O
Kuwait NNP I-NP B-LOC
and CC O O
South NNP B-NP B-LOC
Korea NNP I-NP I-LOC
take VBP B-VP O
on IN B-PP O
Indonesia NNP B-NP B-LOC
on IN B-PP O
Saturday NNP B-NP O
in IN B-PP O
Group NNP B-NP O
A NNP I-NP O
matches VBZ B-VP O
. . O O

All DT B-NP O
four CD I-NP O
teams NNS I-NP O
are VBP B-VP O
level NN B-NP O
with IN B-PP O
one CD B-NP O
point NN I-NP O
each DT B-NP O
from IN B-PP O
one CD B-NP O
game NN I-NP O
. . O O

+ 15
- 0
tests/data_for_tests/conll_example.txt View File

@@ -0,0 +1,15 @@
1 I _ PRP PRP _ 2 SUB
2 solved _ VBD VBD _ 0 ROOT
3 the _ DT DT _ 4 NMOD
4 problem _ NN NN _ 2 OBJ
5 with _ IN IN _ 2 VMOD
6 statistics _ NNS NNS _ 5 PMOD
7 . _ . . _ 2 P

1 I _ PRP PRP _ 2 SUB
2 solved _ VBD VBD _ 0 ROOT
3 the _ DT DT _ 4 NMOD
4 problem _ NN NN _ 2 OBJ
5 with _ IN IN _ 2 VMOD
6 statistics _ NNS NNS _ 5 PMOD
7 . _ . . _ 2 P

+ 56
- 0
tests/data_for_tests/cws_pku_utf_8 View File

@@ -0,0 +1,56 @@
迈向 充满 希望 的 新 世纪 —— 一九九八年 新年 讲话 ( 附 图片 1 张 )
中共中央 总书记 、 国家 主席 江 泽民
( 一九九七年 十二月 三十一日 )
12月 31日 , 中共中央 总书记 、 国家 主席 江 泽民 发表 1998年 新年 讲话 《 迈向 充满 希望 的 新 世纪 》 。 ( 新华社 记者 兰 红光 摄 )
同胞 们 、 朋友 们 、 女士 们 、 先生 们 :
在 1998年 来临 之际 , 我 十分 高兴 地 通过 中央 人民 广播 电台 、 中国 国际 广播 电台 和 中央 电视台 , 向 全国 各族 人民 , 向 香港 特别 行政区 同胞 、 澳门 和 台湾 同胞 、 海外 侨胞 , 向 世界 各国 的 朋友 们 , 致以 诚挚 的 问候 和 良好 的 祝愿 !
1997年 , 是 中国 发展 历史 上 非常 重要 的 很 不 平凡 的 一 年 。 中国 人民 决心 继承 邓 小平 同志 的 遗志 , 继续 把 建设 有 中国 特色 社会主义 事业 推向 前进 。 中国 政府 顺利 恢复 对 香港 行使 主权 , 并 按照 “ 一国两制 ” 、 “ 港人治港 ” 、 高度 自治 的 方针 保持 香港 的 繁荣 稳定 。 中国 共产党 成功 地 召开 了 第十五 次 全国 代表大会 , 高举 邓小平理论 伟大 旗帜 , 总结 百年 历史 , 展望 新 的 世纪 , 制定 了 中国 跨 世纪 发展 的 行动 纲领 。
在 这 一 年 中 , 中国 的 改革 开放 和 现代化 建设 继续 向前 迈进 。 国民经济 保持 了 “ 高 增长 、 低 通胀 ” 的 良好 发展 态势 。 农业 生产 再次 获得 好 的 收成 , 企业 改革 继续 深化 , 人民 生活 进一步 改善 。 对外 经济 技术 合作 与 交流 不断 扩大 。 民主 法制 建设 、 精神文明 建设 和 其他 各项 事业 都 有 新 的 进展 。 我们 十分 关注 最近 一个 时期 一些 国家 和 地区 发生 的 金融 风波 , 我们 相信 通过 这些 国家 和 地区 的 努力 以及 有关 的 国际 合作 , 情况 会 逐步 得到 缓解 。 总的来说 , 中国 改革 和 发展 的 全局 继续 保持 了 稳定 。
在 这 一 年 中 , 中国 的 外交 工作 取得 了 重要 成果 。 通过 高层 互访 , 中国 与 美国 、 俄罗斯 、 法国 、 日本 等 大国 确定 了 双方 关系 未来 发展 的 目标 和 指导 方针 。 中国 与 周边 国家 和 广大 发展中国家 的 友好 合作 进一步 加强 。 中国 积极 参与 亚太经合 组织 的 活动 , 参加 了 东盟 — 中 日 韩 和 中国 — 东盟 首脑 非正式 会晤 。 这些 外交 活动 , 符合 和平 与 发展 的 时代 主题 , 顺应 世界 走向 多极化 的 趋势 , 对于 促进 国际 社会 的 友好 合作 和 共同 发展 作出 了 积极 的 贡献 。
1998年 , 中国 人民 将 满怀信心 地 开创 新 的 业绩 。 尽管 我们 在 经济社会 发展 中 还 面临 不少 困难 , 但 我们 有 邓小平理论 的 指引 , 有 改革 开放 近 20 年 来 取得 的 伟大 成就 和 积累 的 丰富 经验 , 还有 其他 的 各种 有利 条件 , 我们 一定 能够 克服 这些 困难 , 继续 稳步前进 。 只要 我们 进一步 解放思想 , 实事求是 , 抓住 机遇 , 开拓进取 , 建设 有 中国 特色 社会主义 的 道路 就 会 越 走 越 宽广 。
实现 祖国 的 完全 统一 , 是 海内外 全体 中国 人 的 共同 心愿 。 通过 中 葡 双方 的 合作 和 努力 , 按照 “ 一国两制 ” 方针 和 澳门 《 基本法 》 , 1999年 12月 澳门 的 回归 一定 能够 顺利 实现 。
台湾 是 中国 领土 不可分割 的 一 部分 。 完成 祖国 统一 , 是 大势所趋 , 民心所向 。 任何 企图 制造 “ 两 个 中国 ” 、 “ 一中一台 ” 、 “ 台湾 独立 ” 的 图谋 , 都 注定 要 更 失败 。 希望 台湾 当局 以 民族 大义 为重 , 拿 出 诚意 , 采取 实际 的 行动 , 推动 两岸 经济 文化 交流 和 人员 往来 , 促进 两岸 直接 通邮 、 通航 、 通商 的 早日 实现 , 并 尽早 回应 我们 发出 的 在 一个 中国 的 原则 下 两岸 进行 谈判 的 郑重 呼吁 。
环顾 全球 , 日益 密切 的 世界 经济 联系 , 日新月异 的 科技 进步 , 正在 为 各国 经济 的 发展 提供 历史 机遇 。 但是 , 世界 还 不 安宁 。 南北 之间 的 贫富 差距 继续 扩大 ; 局部 冲突 时有发生 ; 不 公正 不 合理 的 旧 的 国际 政治经济 秩序 还 没有 根本 改变 ; 发展中国家 在 激烈 的 国际 经济 竞争 中 仍 处于 弱势 地位 ; 人类 的 生存 与 发展 还 面临 种种 威胁 和 挑战 。 和平 与 发展 的 前景 是 光明 的 , 21 世纪 将 是 充满 希望 的 世纪 。 但 前进 的 道路 不 会 也 不 可能 一帆风顺 , 关键 是 世界 各国 人民 要 进一步 团结 起来 , 共同 推动 早日 建立 公正 合理 的 国际 政治经济 新 秩序 。
中国 政府 将 继续 坚持 奉行 独立自主 的 和平 外交 政策 , 在 和平共处 五 项 原则 的 基础 上 努力 发展 同 世界 各国 的 友好 关系 。 中国 愿意 加强 同 联合国 和 其他 国际 组织 的 协调 , 促进 在 扩大 经贸 科技 交流 、 保护 环境 、 消除 贫困 、 打击 国际 犯罪 等 方面 的 国际 合作 。 中国 永远 是 维护 世界 和平 与 稳定 的 重要 力量 。 中国 人民 愿 与 世界 各国 人民 一道 , 为 开创 持久 和平 、 共同 发展 的 新 世纪 而 不懈努力 !
在 这 辞旧迎新 的 美好 时刻 , 我 祝 大家 新年 快乐 , 家庭 幸福 !
谢谢 ! ( 新华社 北京 12月 31日 电 )
在 十五大 精神 指引 下 胜利 前进 —— 元旦 献辞
我们 即将 以 丰收 的 喜悦 送 走 牛年 , 以 昂扬 的 斗志 迎来 虎年 。 我们 伟大 祖国 在 新 的 一 年 , 将 是 充满 生机 、 充满 希望 的 一 年 。
刚刚 过去 的 一 年 , 大气磅礴 , 波澜壮阔 。 在 这 一 年 , 以 江 泽民 同志 为 核心 的 党中央 , 继承 邓 小平 同志 的 遗志 , 高举 邓小平理论 的 伟大 旗帜 , 领导 全党 和 全国 各族 人民 坚定不移 地 沿着 建设 有 中国 特色 社会主义 道路 阔步 前进 , 写 下 了 改革 开放 和 社会主义 现代化 建设 的 辉煌 篇章 。 顺利 地 恢复 对 香港 行使 主权 , 胜利 地 召开 党 的 第十五 次 全国 代表大会 ——— 两 件 大事 办 得 圆满 成功 。 国民经济 稳中求进 , 国家 经济 实力 进一步 增强 , 人民 生活 继续 改善 , 对外 经济 技术 交流 日益 扩大 。 在 国际 金融 危机 的 风浪 波及 许多 国家 的 情况 下 , 我国 保持 了 金融 形势 和 整个 经济 形势 的 稳定 发展 。 社会主义 精神文明 建设 和 民主 法制 建设 取得 新 的 成绩 , 各项 社会 事业 全面 进步 。 外交 工作 取得 可喜 的 突破 , 我国 的 国际 地位 和 国际 威望 进一步 提高 。 实践 使 亿万 人民 对 邓小平理论 更加 信仰 , 对 以 江 泽民 同志 为 核心 的 党中央 更加 信赖 , 对 伟大 祖国 的 光辉 前景 更加 充满 信心 。
1998年 , 是 全面 贯彻 落实 党 的 十五大 提 出 的 任务 的 第一 年 , 各 条 战线 改革 和 发展 的 任务 都 十分 繁重 , 有 许多 深 层次 的 矛盾 和 问题 有待 克服 和 解决 , 特别 是 国有 企业 改革 已经 进入 攻坚 阶段 。 我们 必须 进一步 深入 学习 和 掌握 党 的 十五大 精神 , 统揽全局 , 精心 部署 , 狠抓 落实 , 团结 一致 , 艰苦奋斗 , 开拓 前进 , 为 夺取 今年 改革 开放 和 社会主义 现代化 建设 的 新 胜利 而 奋斗 。
今年 是 党 的 十一 届 三中全会 召开 20 周年 , 是 我们 党 和 国家 实现 伟大 的 历史 转折 、 进入 改革 开放 历史 新 时期 的 20 周年 。 在 新 的 一 年 里 , 大力 发扬 十一 届 三中全会 以来 我们 党 所 恢复 的 优良 传统 和 在 新 的 历史 条件 下 形成 的 优良 作风 , 对于 完成 好 今年 的 各项 任务 具有 十分 重要 的 意义 。
我们 要 更 好 地 坚持 解放思想 、 实事求是 的 思想 路线 。 解放思想 、 实事求是 , 是 邓小平理论 的 精髓 。 实践 证明 , 只有 解放思想 、 实事求是 , 才 能 冲破 各种 不 切合 实际 的 或者 过时 的 观念 的 束缚 , 真正 做到 尊重 、 认识 和 掌握 客观 规律 , 勇于 突破 , 勇于 创新 , 不断 开创 社会主义 现代化 建设 的 新 局面 。 党 的 十五大 是 我们 党 解放思想 、 实事求是 的 新 的 里程碑 。 进一步 认真 学习 和 掌握 十五大 精神 , 解放思想 、 实事求是 , 我们 的 各项 事业 就 能 结 出 更加 丰硕 的 成果 。
我们 要 更 好 地 坚持 以 经济 建设 为 中心 。 各项 工作 必须 以 经济 建设 为 中心 , 是 邓小平理论 的 基本 观点 , 是 党 的 基本 路线 的 核心 内容 , 近 20 年 来 的 实践 证明 , 坚持 这个 中心 , 是 完全 正确 的 。 今后 , 我们 能否 把 建设 有 中国 特色 社会主义 伟大 事业 全面 推向 21 世纪 , 关键 仍然 要 看 能否 把 经济 工作 搞 上去 。 各级 领导 干部 要 切实 把 精力 集中 到 贯彻 落实 好 中央 关于 今年 经济 工作 的 总体 要求 和 各项 重要 任务 上 来 , 不断 提高 领导 经济 建设 的 能力 和 水平 。
我们 要 更 好 地 坚持 “ 两手抓 、 两手 都 要 硬 ” 的 方针 。 在 坚持 以 经济 建设 为 中心 的 同时 , 积极 推进 社会主义 精神文明 建设 和 民主 法制 建设 , 是 建设 富强 、 民主 、 文明 的 社会主义 现代化 国家 的 重要 内容 。 实践 证明 , 经济 建设 的 顺利 进行 , 离 不 开 精神文明 建设 和 民主 法制 建设 的 保证 。 党 的 十五大 依据 邓小平理论 和 党 的 基本 路线 提 出 的 党 在 社会主义 初级阶段 经济 、 政治 、 文化 的 基本 纲领 , 为 “ 两手抓 、 两手 都 要 硬 ” 提供 了 新 的 理论 根据 , 提 出 了 更 高 要求 , 现在 的 关键 是 认真 抓好 落实 。
我们 要 更 好 地 发扬 求真务实 、 密切 联系 群众 的 作风 。 这 是 把 党 的 方针 、 政策 落到实处 , 使 改革 和 建设 取得 胜利 的 重要 保证 。 在 当前 改革 进一步 深化 , 经济 不断 发展 , 同时 又 出现 一些 新 情况 、 新 问题 和 新 困难 的 形势 下 , 更 要 发扬 这样 的 好 作风 。 要 尊重 群众 的 意愿 , 重视 群众 的 首创 精神 , 关心 群众 的 生活 疾苦 。 江 泽民 同志 最近 强调 指出 , 要 大力 倡导 说实话 、 办 实事 、 鼓 实劲 、 讲 实效 的 作风 , 坚决 制止 追求 表面文章 , 搞 花架子 等 形式主义 , 坚决 杜绝 脱离 群众 、 脱离 实际 、 浮躁 虚夸 等 官僚主义 。 这 是 非常 重要 的 。 因此 , 各级 领导 干部 务必 牢记 全心全意 为 人民 服务 的 宗旨 , 在 勤政廉政 、 艰苦奋斗 方面 以身作则 , 当 好 表率 。
1998 , 瞩目 中华 。 新 的 机遇 和 挑战 , 催 人 进取 ; 新 的 目标 和 征途 , 催 人 奋发 。 英雄 的 中国 人民 在 以 江 泽民 同志 为 核心 的 党中央 坚强 领导 和 党 的 十五大 精神 指引 下 , 更 高 地 举起 邓小平理论 的 伟大 旗帜 , 团结 一致 , 扎实 工作 , 奋勇前进 , 一定 能够 创造 出 更加 辉煌 的 业绩 !
北京 举行 新年 音乐会
江 泽民 李 鹏 乔 石 朱 镕基 李 瑞环 刘 华清 尉 健行 李 岚清 与 万 名 首都 各界 群众 和 劳动模范 代表 一起 辞旧迎新 ( 附 图片 1 张 )
党 和 国家 领导人 江 泽民 、 李 鹏 、 乔 石 、 朱 镕基 、 李 瑞环 、 刘 华清 、 尉 健行 、 李 岚清 等 与 万 名 首都 各界 群众 和 劳动模范 代表 一起 欣赏 了 ’98 北京 新年 音乐会 的 精彩 节目 。 这 是 江 泽民 等 在 演出 结束 后 同 演出 人员 合影 。
( 新华社 记者 樊 如钧 摄 )
本报 北京 12月 31日 讯 新华社 记者 陈 雁 、 本报 记者 何 加正 报道 : 在 度过 了 非凡 而 辉煌 的 1997年 , 迈向 充满 希望 的 1998年 之际 , ’98 北京 新年 音乐会 今晚 在 人民 大会堂 举行 。 党 和 国家 领导人 江 泽民 、 李 鹏 、 乔 石 、 朱 镕基 、 李 瑞环 、 刘 华清 、 尉 健行 、 李 岚清 与 万 名 首都 各界 群众 和 劳动模范 代表 一起 , 在 激昂 奋进 的 音乐声 中 辞旧迎新 。
今晚 的 长安街 流光溢彩 , 火树银花 ; 人民 大会堂 里 灯火辉煌 , 充满 欢乐 祥和 的 喜庆 气氛 。 在 这 场 由 中共 北京 市委 宣传部 、 市政府 办公厅 等 单位 主办 的 题 为 “ 世纪 携手 、 共 奏 华章 ” 的 新年 音乐会 上 , 中国 三 个 著名 交响乐团 ——— 中国 交响乐团 、 上海 交响乐团 、 北京 交响乐团 首 次 联袂 演出 。 著名 指挥家 陈 佐湟 、 陈 燮阳 、 谭 利华 分别 指挥 演奏 了 一 批 中外 名曲 , 京 沪 两地 200 多 位 音乐家 组成 的 大型 乐队 以 饱满 的 激情 和 精湛 的 技艺 为 观众 奉献 了 一 台 高 水准 的 交响音乐会 。
音乐会 在 雄壮 的 管弦乐 《 红旗 颂 》 中 拉开 帷幕 , 舒展 、 优美 的 乐曲声 使 人们 仿佛 看到 : 五星红旗 在 天安门 城楼 上 冉冉 升起 ; 仿佛 听到 : 在 红旗 的 指引 下 中国 人民 向 现代化 新 征程 迈进 的 脚步声 。 钢琴 与 管弦乐队 作品 《 东方 之 珠 》 , 把 广大 听众 耳熟能详 的 歌曲 改编 为 器乐曲 , 以 其 优美 感人 的 旋律 抒发 了 洗雪 百年 耻辱 的 香港 明天 会 更 好 的 情感 。 专程 回国 参加 音乐会 的 著名 女高音 歌唱家 迪里拜尔 演唱 的 《 春 之 声 》 , 把 人们 带 到 了 万象更新 的 田野 和 山谷 ; 享誉 国际 乐坛 的 男高音 歌唱家 莫 华伦 演唱 了 著名 歌剧 《 图兰朵 》 选段 “ 今夜 无 人 入睡 ” , 把 人们 带入 迷人 的 艺术 境地 。 音乐会 上 还 演奏 了 小提琴 协奏曲 《 梁 山伯 与 祝 英台 》 、 柴可夫斯基 的 《 第四 交响曲 ——— 第四 乐章 》 、 交响诗 《 罗马 的 松树 》 等 中外 著名 交响曲 。
万 人 大会堂 今晚 座无虚席 , 观众 被 艺术家 们 精湛 的 表演 深深 打动 , 不断 报 以 经久不息 的 热烈 掌声 。 艺术家 们 频频 谢幕 , 指挥家 依次 指挥 演出 返 场 曲目 , 最后 音乐会 在 《 红色 娘子军 》 选曲 、 《 白毛女 》 选曲 、 《 北京 喜讯 到 边寨 》 等 乐曲声 中 达到 高潮 。
演出 结束 后 , 江 泽民 等 党 和 国家 领导人 走 上 舞台 , 亲切 会见 了 参加 演出 的 全体 人员 , 祝贺 演出 成功 , 并 与 他们 合影 留念 。
李 铁映 、 贾 庆林 、 曾 庆红 等 领导 同志 也 出席 了 今晚 音乐会 。
李 鹏 在 北京 考察 企业
向 广大 职工 祝贺 新年 , 对 节日 坚守 岗位 的 同志 们 表示 慰问
新华社 北京 十二月 三十一日 电 ( 中央 人民 广播 电台 记者 刘 振英 、 新华社 记者 张 宿堂 ) 今天 是 一九九七年 的 最后 一 天 。 辞旧迎新 之际 , 国务院 总理 李 鹏 今天 上午 来到 北京 石景山 发电 总厂 考察 , 向 广大 企业 职工 表示 节日 的 祝贺 , 向 将要 在 节日 期间 坚守 工作 岗位 的 同志 们 表示 慰问 。
上午 九时 二十分 , 李 鹏 总理 在 北京 市委 书记 、 市长 贾 庆林 的 陪同 下 , 来到 位于 北京 西郊 的 北京 石景山 发电 总厂 。 始建 于 一九一九年 的 北京 石景山 发电 总厂 是 华北 电力 集团公司 骨干 发电 企业 , 承担 着 向 首都 供电 、 供热 任务 , 装机 总 容量 一百一十六点六万 千瓦 。 总厂 年发电量 四十五亿 千瓦时 , 供热 能力 八百 百万大卡/小时 , 现 供热 面积 已 达 八百 多 万 平方米 。 早 在 担任 华北 电管局 领导 时 , 李 鹏 就 曾 多次 到 发电 总厂 检查 指导 工作 。
在 总厂 所 属 的 石景山 热电厂 , 李 鹏 首先 向 华北 电管局 、 电厂 负责人 详细 询问 了 目前 电厂 生产 、 职工 生活 和 华北 电网 向 首都 供电 、 供热 的 有关 情况 。 随后 , 他 又 实地 察看 了 发电机组 的 运行 情况 和 电厂 一号机 、 二号机 控制室 。 在 控制室 , 李 鹏 与 职工 们 一一 握手 , 向 大家 表示 慰问 。 他 说 , 在 一九九八年 即将 到来之际 , 有 机会 再次 回到 石景山 发电 总厂 , 感到 十分 高兴 。 李 鹏 亲切 地 说 : 『 今天 我 看到 了 许多 新 的 、 年轻 的 面孔 , 这 说明 在 老 同志 们 作出 贡献 退 下来 后 , 新 一代 的 年轻人 成长 起来 了 、 成熟 起来 了 , 我 感到 十分 欣慰 。 』
( A 、 B )
李 鹏 说 : “ 作为 首都 的 电力 工作者 , 你们 为 首都 的 各项 重大 活动 的 顺利 进行 , 为 保障 人民 群众 的 工作 、 生活 和 学习 , 为 促进 首都 经济 的 发展 作出 了 自己 的 贡献 。 明天 就 是 元旦 , 你们 还有 许多 同志 要 坚守 岗位 , 我 向 你们 、 向 全体 电力 工作者 表示 感谢 。 现在 , 我们 的 首都 已经 结束 了 拉 闸 限 电 的 历史 , 希望 依靠 大家 , 使 拉 闸 限 电 的 历史 永远 不再 重演 。 同时 , 也 希望 你们 安全 生产 、 经济 调度 , 实现 经济 增长 方式 的 转变 。 ” 李 鹏 最后 向 电业 职工 , 向 全 北京市 的 人民 拜年 , 向 大家 致以 新春 的 问候 , 祝愿 电力 事业 取得 新 的 成绩 , 祝愿 北京市 在 改革 、 发展 和 稳定 的 各项 工作 中 取得 新 的 成就 。
参观 工厂 结束 后 , 李 鹏 又 来到 工厂 退休 职工 郭 树范 和 闫 戌麟 家 看望 慰问 , 向 他们 拜年 。 曾经 是 高级 工程师 的 郭 树范 退休 前 一直 在 发电厂 从事 土建工程 建设 , 退休 后 , 与 老伴 一起 抚养 着 身体 欠佳 的 孙子 。 李 鹏 对 他们 倾心 照顾 下 一 代 表示 肯定 。 他 说 : “ 人 老 了 , 照顾 照顾 后代 也 是 一 件 可以 带来 快乐 的 事 , 当然 , 对 孩子 们 不 能 溺爱 , 要 让 他们 健康 成长 。 ” 在 老工人 闫 戌麟 家 , 当 李 鹏 了解 到 老闫 退休 前 一直 都 是 厂里 的 先进 工作者 、 曾经 被 评为 北京市 “ 五好 职工 ” , 退休 后 仍然 为 改善 职工 的 住房 而 奔波 时 , 十分 高兴 , 对 他 为 工厂 建设 作出 的 贡献 表示 感谢 。 在 郭 家 和 闫 家 , 李 鹏 都 具体 地 了解 了 他们 退休 后 的 生活 保障 问题 , 并 与 一些 老 职工 一起 回忆 起 了 当年 建设 电厂 的 情景 。 李 鹏 说 : “ 当年 搞 建设 , 条件 比 现在 差 多 了 , 大家 也 很 少 计较 什么 , 只是 一心 想 着 把 电厂 建 好 。 现在 条件 好 了 , 但 艰苦奋斗 、 无私奉献 的 精神 可 不 能 丢 。 ” 李 鹏 最后 祝 他们 新春 快乐 , 身体 健康 , 家庭 幸福 。
陪同 考察 企业 并 看望 慰问 职工 的 国务院 有关 部门 和 北京市 负责人 还有 : 史 大桢 、 高 严 、 石 秀诗 、 阳 安江 等 。
挂 起 红灯 迎 新年 ( 图片 )
元旦 来临 , 安徽省 合肥市 长江路 悬挂 起 3300 盏 大 红灯笼 , 为 节日 营造 出 “ 千 盏 灯笼 凌空 舞 , 十 里 长街 别样 红 ” 的 欢乐 祥和 气氛 。 ( 新华社 记者 戴 浩 摄 )
( 传真 照片 )
全总 致 全国 各族 职工 慰问信
勉励 广大 职工 发挥 工人阶级 主力军 作用 , 为 企业 改革 发展 建功立业
本报 北京 1月 1日 讯 中华 全国 总工会 今日 发出 《 致 全国 各族 职工 慰问信 》 , 向 全国 各族 职工 祝贺 新年 。
慰问信 说 , 实现 党 的 十五大 提 出 的 宏伟 目标 , 必须 依靠 工人阶级 和 全体 人民 的 长期 奋斗 。 工人阶级 是 我们 国家 的 领导 阶级 , 是 先进 生产力 和 生产关系 的 代表 , 是 两 个 文明 建设 的 主力军 , 是 维护 社会 安定团结 的 中坚 力量 。 党 的 十五大 再次 强调 要 坚持 全心全意 依靠 工人阶级 的 方针 , 具有 重大 的 意义 。 广大 职工 要 以 邓小平理论 和 党 的 基本 路线 为 指导 , 坚持 党 的 基本 纲领 和 各项 方针 政策 , 积极 投身 于 改革 和 建设 事业 。 要 坚持 站 在 改革 的 前列 , 转变 思想 观念 , 增强 市场 意识 、 竞争 意识 和 效益 意识 , 以 实际 行动 促进 改革 的 不断 深化 。 要 发扬 工人阶级 的 首创 精神 , 不断 为 企业 转机建制 、 调整 结构 、 加强 管理 、 提高 效益 献计献策 。 要 大力 开展 劳动 竞赛 、 合理化 建议 、 技术 革新 、 技术 协作 和 发明 创造 等 活动 , 努力 提高 产品 质量 和 经济效益 , 推动 企业 加快 技术 进步 , 实现 增长 方式 的 根本 转变 , 再 创 国有 企业 的 辉煌 。 要 正确 对待 企业 改革 和 发展 中 的 困难 和 问题 , 树立 起 战胜 困难 的 勇气 和 信心 , 锲而不舍 , 迎难而上 , 为 企业 的 改革 和 发展 建功立业 。
慰问信 指出 , 广大 职工 要 以 主人翁 的 姿态 , 积极 行使 当家作主 的 权利 。 要 不断 提高 自身 素质 , 发扬 爱国 奉献 、 爱厂如家 、 爱岗敬业 的 精神 , 学习 掌握 先进 科学 文化 知识 , 成为 本职工作 的 行家里手 , 迎接 新 世纪 面临 的 挑战 。
慰问信 最后 说 , 让 我们 在 邓小平理论 和 党 的 基本 路线 指导 下 , 更加 紧密 地 团结 在 以 江 泽民 同志 为 核心 的 党中央 周围 , 统揽全局 , 精心 部署 , 狠抓 落实 , 团结 一致 , 艰苦奋斗 , 开拓 前进 , 在 两 个 文明 建设 中 充分 发挥 工人阶级 主力军 作用 , 为 实现 跨 世纪 宏伟 目标 作出 新 的 更 大 的 贡献 。
忠诚 的 共产主义 战士 , 久经考验 的 无产阶级 革命家 刘 澜涛 同志 逝世
( 附 图片 1 张 )

+ 1018
- 0
tests/data_for_tests/cws_test
File diff suppressed because it is too large
View File


+ 1002
- 0
tests/data_for_tests/cws_train
File diff suppressed because it is too large
View File


+ 6
- 0
tests/data_for_tests/embedding/small_static_embedding/glove.6B.50d_test.txt View File

@@ -0,0 +1,6 @@
the 0.418 0.24968 -0.41242 0.1217 0.34527 -0.044457 -0.49688 -0.17862 -0.00066023 -0.6566 0.27843 -0.14767 -0.55677 0.14658 -0.0095095 0.011658 0.10204 -0.12792 -0.8443 -0.12181 -0.016801 -0.33279 -0.1552 -0.23131 -0.19181 -1.8823 -0.76746 0.099051 -0.42125 -0.19526 4.0071 -0.18594 -0.52287 -0.31681 0.00059213 0.0074449 0.17778 -0.15897 0.012041 -0.054223 -0.29871 -0.15749 -0.34758 -0.045637 -0.44251 0.18785 0.0027849 -0.18411 -0.11514 -0.78581
of 0.70853 0.57088 -0.4716 0.18048 0.54449 0.72603 0.18157 -0.52393 0.10381 -0.17566 0.078852 -0.36216 -0.11829 -0.83336 0.11917 -0.16605 0.061555 -0.012719 -0.56623 0.013616 0.22851 -0.14396 -0.067549 -0.38157 -0.23698 -1.7037 -0.86692 -0.26704 -0.2589 0.1767 3.8676 -0.1613 -0.13273 -0.68881 0.18444 0.0052464 -0.33874 -0.078956 0.24185 0.36576 -0.34727 0.28483 0.075693 -0.062178 -0.38988 0.22902 -0.21617 -0.22562 -0.093918 -0.80375
to 0.68047 -0.039263 0.30186 -0.17792 0.42962 0.032246 -0.41376 0.13228 -0.29847 -0.085253 0.17118 0.22419 -0.10046 -0.43653 0.33418 0.67846 0.057204 -0.34448 -0.42785 -0.43275 0.55963 0.10032 0.18677 -0.26854 0.037334 -2.0932 0.22171 -0.39868 0.20912 -0.55725 3.8826 0.47466 -0.95658 -0.37788 0.20869 -0.32752 0.12751 0.088359 0.16351 -0.21634 -0.094375 0.018324 0.21048 -0.03088 -0.19722 0.082279 -0.09434 -0.073297 -0.064699 -0.26044
and 0.26818 0.14346 -0.27877 0.016257 0.11384 0.69923 -0.51332 -0.47368 -0.33075 -0.13834 0.2702 0.30938 -0.45012 -0.4127 -0.09932 0.038085 0.029749 0.10076 -0.25058 -0.51818 0.34558 0.44922 0.48791 -0.080866 -0.10121 -1.3777 -0.10866 -0.23201 0.012839 -0.46508 3.8463 0.31362 0.13643 -0.52244 0.3302 0.33707 -0.35601 0.32431 0.12041 0.3512 -0.069043 0.36885 0.25168 -0.24517 0.25381 0.1367 -0.31178 -0.6321 -0.25028 -0.38097
in 0.33042 0.24995 -0.60874 0.10923 0.036372 0.151 -0.55083 -0.074239 -0.092307 -0.32821 0.09598 -0.82269 -0.36717 -0.67009 0.42909 0.016496 -0.23573 0.12864 -1.0953 0.43334 0.57067 -0.1036 0.20422 0.078308 -0.42795 -1.7984 -0.27865 0.11954 -0.12689 0.031744 3.8631 -0.17786 -0.082434 -0.62698 0.26497 -0.057185 -0.073521 0.46103 0.30862 0.12498 -0.48609 -0.0080272 0.031184 -0.36576 -0.42699 0.42164 -0.11666 -0.50703 -0.027273 -0.53285
a 0.21705 0.46515 -0.46757 0.10082 1.0135 0.74845 -0.53104 -0.26256 0.16812 0.13182 -0.24909 -0.44185 -0.21739 0.51004 0.13448 -0.43141 -0.03123 0.20674 -0.78138 -0.20148 -0.097401 0.16088 -0.61836 -0.18504 -0.12461 -2.2526 -0.22321 0.5043 0.32257 0.15313 3.9636 -0.71365 -0.67012 0.28388 0.21738 0.14433 0.25926 0.23434 0.4274 -0.44451 0.13813 0.36973 -0.64289 0.024142 -0.039315 -0.26037 0.12017 -0.043782 0.41013 0.1796

+ 7
- 0
tests/data_for_tests/embedding/small_static_embedding/word2vec_test.txt View File

@@ -0,0 +1,7 @@
5 50
the 0.418 0.24968 -0.41242 0.1217 0.34527 -0.044457 -0.49688 -0.17862 -0.00066023 -0.6566 0.27843 -0.14767 -0.55677 0.14658 -0.0095095 0.011658 0.10204 -0.12792 -0.8443 -0.12181 -0.016801 -0.33279 -0.1552 -0.23131 -0.19181 -1.8823 -0.76746 0.099051 -0.42125 -0.19526 4.0071 -0.18594 -0.52287 -0.31681 0.00059213 0.0074449 0.17778 -0.15897 0.012041 -0.054223 -0.29871 -0.15749 -0.34758 -0.045637 -0.44251 0.18785 0.0027849 -0.18411 -0.11514 -0.78581
of 0.70853 0.57088 -0.4716 0.18048 0.54449 0.72603 0.18157 -0.52393 0.10381 -0.17566 0.078852 -0.36216 -0.11829 -0.83336 0.11917 -0.16605 0.061555 -0.012719 -0.56623 0.013616 0.22851 -0.14396 -0.067549 -0.38157 -0.23698 -1.7037 -0.86692 -0.26704 -0.2589 0.1767 3.8676 -0.1613 -0.13273 -0.68881 0.18444 0.0052464 -0.33874 -0.078956 0.24185 0.36576 -0.34727 0.28483 0.075693 -0.062178 -0.38988 0.22902 -0.21617 -0.22562 -0.093918 -0.80375
to 0.68047 -0.039263 0.30186 -0.17792 0.42962 0.032246 -0.41376 0.13228 -0.29847 -0.085253 0.17118 0.22419 -0.10046 -0.43653 0.33418 0.67846 0.057204 -0.34448 -0.42785 -0.43275 0.55963 0.10032 0.18677 -0.26854 0.037334 -2.0932 0.22171 -0.39868 0.20912 -0.55725 3.8826 0.47466 -0.95658 -0.37788 0.20869 -0.32752 0.12751 0.088359 0.16351 -0.21634 -0.094375 0.018324 0.21048 -0.03088 -0.19722 0.082279 -0.09434 -0.073297 -0.064699 -0.26044
and 0.26818 0.14346 -0.27877 0.016257 0.11384 0.69923 -0.51332 -0.47368 -0.33075 -0.13834 0.2702 0.30938 -0.45012 -0.4127 -0.09932 0.038085 0.029749 0.10076 -0.25058 -0.51818 0.34558 0.44922 0.48791 -0.080866 -0.10121 -1.3777 -0.10866 -0.23201 0.012839 -0.46508 3.8463 0.31362 0.13643 -0.52244 0.3302 0.33707 -0.35601 0.32431 0.12041 0.3512 -0.069043 0.36885 0.25168 -0.24517 0.25381 0.1367 -0.31178 -0.6321 -0.25028 -0.38097
in 0.33042 0.24995 -0.60874 0.10923 0.036372 0.151 -0.55083 -0.074239 -0.092307 -0.32821 0.09598 -0.82269 -0.36717 -0.67009 0.42909 0.016496 -0.23573 0.12864 -1.0953 0.43334 0.57067 -0.1036 0.20422 0.078308 -0.42795 -1.7984 -0.27865 0.11954 -0.12689 0.031744 3.8631 -0.17786 -0.082434 -0.62698 0.26497 -0.057185 -0.073521 0.46103 0.30862 0.12498 -0.48609 -0.0080272 0.031184 -0.36576 -0.42699 0.42164 -0.11666 -0.50703 -0.027273 -0.53285
a 0.21705 0.46515 -0.46757 0.10082 1.0135 0.74845 -0.53104 -0.26256 0.16812 0.13182 -0.24909 -0.44185 -0.21739 0.51004 0.13448 -0.43141 -0.03123 0.20674 -0.78138 -0.20148 -0.097401 0.16088 -0.61836 -0.18504 -0.12461 -2.2526 -0.22321 0.5043 0.32257 0.15313 3.9636 -0.71365 -0.67012 0.28388 0.21738 0.14433 0.25926 0.23434 0.4274 -0.44451 0.13813 0.36973 -0.64289 0.024142 -0.039315 -0.26037 0.12017 -0.043782 0.41013 0.1796

+ 6
- 0
tests/data_for_tests/io/20ng/dev.csv View File

@@ -0,0 +1,6 @@
talk.religion.misc,"sandvik newton apple com \( kent sandvik \) subject clarification organization cookamunga tourist bureau lines 14 sorry , san jose based rosicrucian order called r c , n't remember time stand r c ordo rosae crucis , words latin order rose cross sigh , seems loosing long term memory otherwise headquarters san jose pretty decent metaphysical bookstore , interested books son loves run around egyptian museum cheers , kent sandvik newton apple com alink ksand private activities net"
talk.religion.misc,"subject catholic lit nunnally acs harding edu \( john nunnally \) distribution world organization harding university , , ar nntp posting host acs harding edu x news reader vms news 1 reply dlphknob camelot bradley edu 's message 16 apr 93 18 57 20 gmtlines 45 lines 45 dlphknob camelot dlphknob camelot bradley edu writes 1993apr14 476 mtechca maintech com foster mtechca maintech com writes surprised saddened would expect kind behavior evangelical born gospel thumping face 're true christian protestants , always thought catholics behaved better please stoop level e b g f w c protestants , think best way witness strident , intrusive , loud , insulting self righteous \( pleading mode \) please ! i'm begging ! quit confusing religious groups , stop making generalizations ! i'm protestant ! i'm evangelical ! n't believe way way ! i'm creation scientist ! n't think homosexuals hung ! want discuss bible thumpers , would better singling \( making generalizations \) fundamentalists compared actions methodists southern baptists , would think different religions ! sarcasm sure pick correct groups bible thumpers , fundamentalists , southern baptists deserve hasty generalizations prejudicial statements n't pick methodists ! sarcasm please , prejudice thinking people group , please n't write protestants evangelicals ! \( pleading mode \) god wish could get ahold thomas stories n , n tha gb , gb n yvan sasha david cole iv chief research dlphknob camelot bradley edu"
talk.religion.misc,"sandvik newton apple com \( kent sandvik \) subject alt sex stories literary critical analysis \) organization cookamunga tourist bureau lines 16 article h7v agate berkeley edu , dzkriz ocf berkeley edu \( dennis kriz \) wrote i'm going try something , perhaps many would thought even possible want begin process initiating literary critical study pornography posted alt sex stories , identify major themes motifs present stories posted opening possibility objective moral evaluation material present dennis , i'm astounded n't know interested even study filth alt sex stories provide cheers , kent sandvik newton apple com alink ksand private activities net"
talk.religion.misc,"anthony landreneau ozonehole com \( anthony landreneau \) subject abortion distribution world organization ozone online operations , inc , dba ozone hole bbs reply anthony landreneau ozonehole com \( anthony landreneau \) lines 21 margoli watson ibm com \( larry margolis \) anthony landreneau ozonehole com lm rape passed , nothing ever take away lm true forcing remain pregnant continues violation lm body another 9 months see unbelievably cruel life violation cruel , killing living solely friend right cold anthony slmr 2 1 's difference orange \? ozone hole bbs private bulletin board service \( 504 \) 891 3142 3 full service nodes usrobotics 16 8k bps 10 gigs 100 , 000 files skydive new orleans ! rime network mail hub 500 usenet newsgroups please route questions inquiries postmaster ozonehole com"
talk.religion.misc,"kevin rotag mi org \( kevin darcy \) subject 2000 years , say christian morality organization , \? \? \? lines 15 article pww spac at1 59 rice edu pww spacsun rice edu \( peter walker \) writes article 1993apr18 rotag mi org , kevin rotag mi org \( kevin darcy \) wrote , one , considered intentionality primary ontological stuff built perceptions , consciousness , thoughts , etc frank means alone seeing intentionality \( values , puts \) underlying human experience , even called objective experiences , measurements natural world , output des chip others us see intellectual masturbation 'll defer greater firsthand knowledge matters kevin"
talk.religion.misc,"bil okcforum osrhe edu \( bill conner \) subject 2000 years , say christian morality nntp posting host okcforum osrhe edu organization okcforum unix users group x newsreader tin version 1 1 pl9 lines 54 mind , say science basis values bit reach science basis observable fact 'd say one chooses observe observation interpreted significance 's given depends great deal values observer science human activity , subject potential distortion human activity myth scientists moral influence ethical concern , knowledge whole pure nature biases scientist , nonsense bill one argue objective values \( moral sense \) one must first start demonstrating morality objective considering meaning word objective doubt ever happen , back original question objective morality \? may unfortunate choice words , almost self contradictory objective sense used means something immutable absolute morality describes behavior group people first term inclusive , second specific concept supposedly described may meaning however god described christians \( instance \) , existence apart independent humankind existence outside frame reference \( reality \) declares thing , necessarily since defined omnipotent , claims believed , least omnipotent relative us god intrinsically self defined reality whatever says objective sense god determines standard conduct , standard objective human beings held accountable conformance standard permitted ignore , substitute relative morality mode conduct , giving term morality nebulous , meaningless sense argued pretending misunderstand standard objective conduct required meet standard therefore objectively determined convenient pretend term morality infinitely , n't mean objective standard n't exist morality come mean little cultural norm , preferred conduct decent people , making seem subjective , derived absolute , objective , standard ironically , objective standard perfect accord true nature \( according christianity least \) , yet condemned contrary human , oppressive severe may due bill much amoral standard , like , 's x"

+ 6
- 0
tests/data_for_tests/io/20ng/test.csv View File

@@ -0,0 +1,6 @@
talk.religion.misc,"halat pooh bears \( jim halat \) subject 2000 years , say christian morality reply halat pooh bears \( jim halat \) lines 43 article 1993apr15 wam umd edu , wam umd edu \( jay stein objectively subjective \) writes horus ap mchp sni de frank d012s658 uucp \( frank o'dwyer \) discussion christianity objective morals question effective difference objective values exist , disagreement values subjective \? n't see difference saying absolute truth exists , people think lie truth relative \? think examples , first statement fundamental disagreement least two people second statement agreed upon put another way , someone says objective values exist agree values subjective jim halat"
talk.religion.misc,"halat pooh bears \( jim halat \) subject 2000 years , say christian morality reply halat pooh bears \( jim halat \) lines 17 article na4 horus ap mchp sni de , frank d012s658 uucp \( frank o'dwyer \) writes really \? n't know objective value \? offered people u , collectively , 1 land america , would sound like good deal \? happens subjective example people us would happen agree continue move price point people would accept probably would accept high enough number endpoints subjective scale given homes objective viewpoints jim halat"
talk.religion.misc,"halat pooh bears \( jim halat \) subject 2000 years , say christian morality reply halat pooh bears \( jim halat \) lines 34 article horus ap mchp sni de , frank d012s658 uucp \( frank o'dwyer \) writes firstly , science basis values , way round better explain objective atoms , get subjective values , go atoms objective n't even real scientists call atom nothing mathematical model describes certain physical , observable properties surroundings subjective objective , though , approach scientist takes discussing model observations objective science objective approach subjectively selected scientist objective case means specified , unchanging set rules colleagues use discuss science contrast objective morality may objective approach subjectively discuss beliefs morality exists objective morality also , science deals discuss observations physical world around us method discussion objective \( science discussion \) science makes claims know even sometimes observe simply gives us way discuss surroundings meaningful , consistent way think bohr said \( paraphrase \) science say physical world jim halat"
talk.religion.misc,"mwilson ncratl atlantaga ncr com \( mark wilson \) subject message mr president know happened \? organization ncr engineering manufacturing atlanta atlanta , ga lines 58 noose ecn purdue edu tbrent bank ecn purdue edu \( timothy j brent \) writes probably , n't pack heavy weaponry intent use please cite evidence intending use n't really think allowed keep stuff \? \? , tell live sure steer well clear check sig public also rights , placed individual society rights individuals rights go ahead , call commie , ok , commie 'd singing different tune exercised right rape daughter think right rape anyone \? wonder n't care others broke law , please indicate law feel koresh broke , convicted said crime threat society , feel owning guns makes threat society ou going start going knives baseball bats well feel someone spouts unpopular ideas definition threat society job simple simple think job assualt civilians support first , second , fourth , fifth , sixth , eighth amendment rights , lest taken away fbi davidians think 'll support \( except 2 \) words n't support mob rule n't prettier merely mob calls government ai n't charity using someone else 's money wilson 's theory relativity go back far enough , 're related mark wilson atlantaga ncr com"
talk.religion.misc,"alizard tweekco uucp \( lizard \) subject 14 apr 93 god 's promise 1 john 1 7 organization com systems bbs , , ca \( 510 \) 631 lines 20 starowl rahul net \( michael adams \) writes anyone netland process devising new religion , use lamb bull , already reserved please choose another animal , preferably one endangered species list washed blood barney dinosaur \? \) judging postings 've read usenet non usenet bbs conferences , barney definitely endangered species especially runs dark alley lizard lizard internet addresses alizard tweekco boo pacbell com \( preferred \) pacbell com ! boo ! tweekco ! alizard \( bang path \) alizard com \( backup \) pgp2 2 public key available request"
talk.religion.misc,"alizard tweekco uucp \( lizard \) subject oto , ancient order oriental templars organization com systems bbs , , ca \( 510 \) 631 lines 18 thyagi cup portal com \( thyagi morgoth nagasiva \) writes organization known present time ancient order oriental templars ordo templi orientis otherwise hermetic brotherhood light organization official e mail address days \? \( address sf bay area lodges , e g would \) 93 lizard lizard internet addresses alizard tweekco boo pacbell com \( preferred \) pacbell com ! boo ! tweekco ! alizard \( bang path \) alizard com \( backup \) pgp2 2 public key available request"

+ 6
- 0
tests/data_for_tests/io/20ng/train.csv View File

@@ -0,0 +1,6 @@
talk.religion.misc,"deane binah cc brandeis edu \( david matthew deane \) subject flaming nazis reply deane binah cc brandeis edu organization brandeis university lines 106 okay , 'll bite probably leave alone , heck article 1993apr14 422 sun0 urz uni heidelberg de , gsmith lauren iwr uni heidelberg de \( gene w smith \) writes article brewich hou tx us popec brewich hou tx us \( pope charles \) writes name guy responsible much uniforms , props used early nazis rallies name roehm , hitler claim came swastika business n't credit actual flag design party member dentist \? believe gives credit mein kampf killed early nazi purge many associates flaming homosexuals well know also trying find actual evidence common assertion recently postings groups soc history soc culture german uncovered net experts could provide well , i'm expert , histories nazi germany assert make reference several scandals occurred long night long knives impression got homosexuality portions sa common knowledge also , book \( homosexual author whose name escapes moment \) called homosexuals history asserts roehm heines homosexuals , well others roehm 's sa circle books say roehm associate , edmund heines , homosexual able find nothing beyond , suspect sort historical urban legend well , 're one germany n't believe history books , look primary sources us outside germany access seems plenty documented instances several scandals , fact knight long knives several sa members \( including heines \) found sleeping together , etc also believe people complaining sa 's homosexual activities \( young boys , etc \) histories 've read make convincing case none sounds like urban legend \( irving , notoriously unreliable historian , says funk , nazi finance minister , homosexual gives sources \) know next nothing irving nothing funk precisely know , would contradict history books read concerning existence homosexual nazis \? trying say historians taking part anti homosexual smear \? homosexual writers agree official history \? n't think would found truth roehm heines homosexuals \? would think would want homosexuality nazism one use connection two bash homosexuals case challenge anyone document claim going challenge historians point \( irving \) , burden proof track references find stories originate , one germany , close archival material people net found great deal evidence many flaming heterosexuals among nazis seems include worst ones hitler , himmler , goebbels , goering , , eichmann , many eh \? agenda \? prove nazis heterosexuals , bash heterosexuals \? bother nazis might homosexuals \? make homosexuals bad true \? course bisexuals \? half nazis \? n't know would difficult believe nazis homosexuals german officer corps ww1 , instance , notorious homosexuality numerous scandals rocked german govt late 19th early 20th century many kaiser 's friends prosecuted kaiser homosexual , germany army long tradition homosexuality , going far back prussian history back frederick great least , homosexual roehm product prussian officer tradition , old german army \( like english public school system \) , well known center homosexuality , would quite willing overlook roehm 's homosexuality addition , nazis complained homosexuality hitler youth hitler youth swallowed pre nazi youth groups , various pre war , bund , youth groups known promote ideals friendship , many cases , homosexuality seems unlikely plenty homosexual nazis , regardless official nazi dogmas concerning evils homosexuality suprise anyone \? homosexuality always existed , societies would unusual nazis exception , n't sources , think kind proof accept would citations archival material , access intend reread every book nazis modern homosexuality ever read n't time nothing stopping , however , chasing sources prove otherwise , though , stick established histories david matthew deane \( deane binah cc brandeis edu \) eternal bleak wind let gods speak softly us days hereafter \( ezra pound \)"
talk.religion.misc,"psyrobtw ubvmsd cc buffalo edu \( robert weiss \) subject 18 apr 93 god 's promise philippians 4 9 organization university buffalo lines 8 news software vax vms vnews 1 41 nntp posting host ubvmsd cc buffalo edu things , ye learned , received , heard , seen , god peace shall philippians 4 9"
talk.religion.misc,"sandvik newton apple com \( kent sandvik \) subject 14 apr 93 god 's promise 1 john 1 7 organization cookamunga tourist bureau lines 14 article tweekco uucp , alizard tweekco uucp \( lizard \) wrote judging postings 've read usenet non usenet bbs conferences , barney definitely endangered species especially runs dark alley please , please n't make barney modern martyr saviour mythical figure , humans create religion name , life unbearable \) cheers , kent sandvik newton apple com alink ksand private activities net"
talk.religion.misc,"sandvik newton apple com \( kent sandvik \) subject disillusioned protestant finds christ organization cookamunga tourist bureau lines 23 article boi hp com , jburrill boi hp com \( jim burrill \) wrote jesus never taught concept trinity , deal following mat 28 18 jesus came said , authority heaven earth given mat 28 19 therefore go make disciples nations , baptizing name father son holy spirit , mat 28 20 teaching obey everything commanded surely always , end age jim , please , 's lame explanation trinity jesus provides baptizing people name three things ! trinity case , i'm wrong , assumed trinity implies god three entities , yet cheers , kent sandvik newton apple com alink ksand private activities net"
talk.religion.misc,"cutter gloster via mind org \( cutter \) subject biblical backing koresh 's 3 02 tape \( cites enclosed \) distribution world organization gordian knot , gloster , ga lines 22 netd susie sbc com \( \) writes article 20apr199301460499 utarlg uta edu b645zaw utarlg uta edu \( stephen think david koresh n't solid structure , sound biblical backing hour long tape broadcast , n't think anyone really cares solid structure sermon 's deaths 's responsible concern people think ought hold christ followers died hand romans also fault believing god , society reminds roman empire every day guess 'll log go watch american cutter gloster via mind org \( chris \) jobs easy person n't holt 's law"
talk.religion.misc,"subject albert sabin rfox charlie usd edu \( rich fox , univ south dakota \) reply rfox charlie usd edu organization university south dakota computer science dept nntp posting host charlie lines 91 article 1993apr15 nntpd2 cxo dec com , sharpe enet dec com \( system privileged account \) writes article 885 sunfish usd edu , rfox charlie usd edu \( rich fox , univ south dakota \) writes article 1993apr10 rambo atlanta dg com , wpr atlanta dg com \( bill rawlins \) writes earlier dialogue deleted perhaps read stop advancing bible evidence relating questions science jesus exist \? g wells great fallacy statement question origins based science alone nope , fallacy yep , science best determining religions handle rich , curious others award custody baby theists religion \? hope n't award custody , rich purposely used handle order avoid e , happens religions \( course like scientific creationism \) used best part indicate science currently time , domains mostly ignored also attempted brief , doubt confused matter aside , science written nobody seems argue theists , theologians better investigate magicians , , , athiests agnostics seems answer would vary individual individual i'm trying evasive , societal perspective , religion works hand , sometimes abused misused , many suffer , know net result seems positive , anthropological perspective human affairs might call neo insofar think masses ca n't get along without religion generally incapable n't , myriad reasons , main one seems promise immortality , immortality therefore seems theologians better equipped others mention answers suggest holds regardless truth answers simply people believe end , spiritual beliefs real scientific facts explanation \( caution take context \) suggest forever closed scientific investigation \? fact , n't think closed , least individuals n't group theoretical physicists argue matter created nothing big bang singularity \? approach might absence , except seems could argued something responsible nothing \? maybe something n't supernatural , maybe 's tough one people today grasp case , theory without empirical data explanation , question require data words , agree theorizing \( within scientific parameters \) scientific explaining answer , closed scientists , sense science currently inadequate data necessary improvement , seems long way , ever pretty convoluted hope 've made sense seems 200 years ago , question origin life earth considered open scientific agree generally prefer put way questions , , open inquiry enlightenment , reason questioning theological answers , , part , science thus born curiosity , eventually away largely leaving behind ignorant , selfish , intolerant , arrogant , course , still claim authority four domains rich fox , anthro , usouthdakota like discussion around , figure original post \) much obliged funny facts tend things , n't \? well , sure plenty scientific creationist somewhere , even created nothing record , , modern humans best regards \) , rich fox , anthro , usouthdakota"

+ 6
- 0
tests/data_for_tests/io/BQCorpus/dev.txt View File

@@ -0,0 +1,6 @@
sentence1,sentence2,label
综合评分不足什么原因,综合评估的依据,0
什么时候我能使用微粒贷,你就赶快给我开通就行了,0
如何修改每个月的还款日期,可以申请延期还款日吗?,0
没什么问的,不能登陆就是我最大的问题了,登录不上,1
你的意思是不能取现,借到的钱可不可以提出来,1

+ 6
- 0
tests/data_for_tests/io/BQCorpus/test.txt View File

@@ -0,0 +1,6 @@
sentence1,sentence2,label
你电话号码多少,你们的客服电话是多少?,1
10000块日利息是多少,0.05%就是借2000块,利息为1块钱一天,1
17号还款了,我现在想提前几天还,怎么弄,一直按时还款,提前还款,怎么会评估不足,0
我昨晚申请的,现在钱没到,也没有人联系我,审核多久才会打电话,1
假如我贷四万还款怎么,18号还款日可以不凌晨扣款,我18日下午还款可以吗,0

+ 6
- 0
tests/data_for_tests/io/BQCorpus/train.txt View File

@@ -0,0 +1,6 @@
sentence1,sentence2,label
一天了还是不能登录,你好,用app干嘛但是无法登入,1
为什么我的钱包点开,没显示微粒贷呀,点击我进入钱包,没有,借款的,提示呀!,1
什么要求,借款没有,0
微信注册的手机号停机了,还可以办理吗,没有邀请可以注册嘛,0
开通微粒贷,开通微粒贷!强烈要求,1

+ 7
- 0
tests/data_for_tests/io/ChnSentiCorp/dev.txt View File

@@ -0,0 +1,7 @@
label text_a
1 基金痛所有投资项目一样,必须先要有所了解,才能把握分寸,不至于跟风而造成损失。此本基金入门的书是一个不错的选择,不像一般的书一样偏重概念,虽然也涉及到概念,但作者用自己的方式解读,使第一次接触基金的人能更好的理解。内容以非常容易理解的语言象大众普及了基金的很多观念,对于普通基民来说,要想有所收获,必须了解基金界的很多情况,在关键的时候才不会盲目跟风。对于新手,强烈推荐。
1 系统很好装,LED屏是不错,就是16比9的比例看起来比较长,是14.0的屏。外观比较酷,适合年轻人,键盘模仿SONY的,还不错。
1 这书的装帧很好的,既适合家庭收藏亦适合阅读了解。了解一个人,通过他的书信,而且是家书,再好不过了,而了解这个人也更了解些那个时代,那个社会,给我们现代人些许启发吧。而我从中也知道了他的学习习惯、方法以及教子方面。比较有收获。软精装的封面,封面要是每个唐老师那个照片就更好了,分上下册便于阅读。内里字体有分别:信是用的启功老师的手写字体,评点是宋体。
0 屏幕没有坏点和暗点,这个比较不错。配置性价比较高,目前使用已有半个月,基本正常。
0 典型的国营酒店,管理层缺乏责任心,管理混乱。房间里的大灯镜灯台灯都是坏的,只有一盏床头灯可用,不知道酒店是怎么维护的。最可气的是结帐时竟然要求客人赔偿房间里已损坏很久的鞋盒,简直是讹诈。
0 普通游客旅馆 还三星 让我伤心 店名好大 奇差无比 补充点评 2006年12月8日 : 还说有地下车库 谁敢下去 晕 狭小 黑暗 要卡壳儿的 CTRIP上怎么让它这么忽悠顾客的 ?!!!!!!!

+ 7
- 0
tests/data_for_tests/io/ChnSentiCorp/test.txt View File

@@ -0,0 +1,7 @@
label text_a
0 v系统和XP系统能做到二选一就更好了,毕竟大部分人还是更偏爱XP系统。
0 自带的Linix系统上上网还可以,想玩其他的功能毫无疑问得换XP.偶在京东订的时候为了装XP方便,一起买了阿帕奇的USB光驱。到货后,发现该USB光驱无法引导系统光盘启动,已验证过该光驱读写功能正常。
1 非常不错的酒店,依山傍水,里面大片森林,散散步很不错,坐在湖边也休息也是不错的选择;房间很幽静,房间的设施很好,服务员态度也很好。
0 5月8日付款成功,当当网显示5月10日发货,可是至今还没看到货物,也没收到任何通知,简不知怎么说好!!!
1 收到书,还未打开就被封面的鲜艳色彩及版样吸引,迫不急待的打开,书内的设计及彩图也不错,色泽及印刷质量都称的上好,没有味道,贴图也从简入深。价格也不贵。拿回家,小宝贝也很喜欢,我家宝宝只有2岁5个月对于她贴片不太好撕,大一些的贴片要我来帮她撕。不过,今天再玩时已经比昨天撕的好很多了,可以锻炼她的小手呢。等这几本用完了,我想我还会再给她买一些类似的书。
0 挺失望的,还不如买一本张爱玲文集呢,以<色戒>命名,可这篇文章仅仅10多页,且无头无尾的,完全比不上里面的任意一篇其它文章.

+ 7
- 0
tests/data_for_tests/io/ChnSentiCorp/train.txt View File

@@ -0,0 +1,7 @@
label text_a
1 很好的酒店,很规范植得一住.餐厅一般不应该的,不知道为什么. 宾馆反馈 2008年4月17日 : 餐厅现已重新装修,用餐环境较以前要好的多。谢谢您的宝贵意见!
0 这是我看过文字写得很糟糕的书,因为买了,还是耐着性子看完了,但是总体来说不好,文字、内容、结构都不好
1 拿房时没大床房了,给我们免费升成套房,这点还蛮满意的。酒店大致不错,有国内五星水准。比国际品牌的要差一点。酒店有点年纪了,维修要加强,比如我们浴室的下水就堵塞不通,这些在客人入住前就该发觉修好。其它都还可以。
1 开始看了2005年的几位朋友的评价,都不敢去入住。没想到现在改观了很多,房间虽小,但很整洁。下次再来的话,还会选择这个酒店。只是希望宽带能一直免费!
0 本机预装的Vista跟瑞星杀软不兼容,蓝屏,不能进入系统,不能自行卸载!!千万小心别装,用卡巴可以。
0 跟心灵鸡汤没什么本质区别嘛,至少我不喜欢这样读经典,把经典都解读成这样有点去中国化的味道了

+ 6
- 0
tests/data_for_tests/io/LCQMC/dev.txt View File

@@ -0,0 +1,6 @@
开初婚未育证明怎么弄? 初婚未育情况证明怎么开? 1
脚气怎么治疗 醋怎么治疗脚气 0
世界是先有男人还是先有女人 世界上是先有男人还是先有女人 1
有什么小说软件好用的 那个看小说的阅读器较好 1
网上兼职是做什么的,手机可以做吗 手机可以做什么网上兼职,拍单子是什么 0
郑州有什么好玩的地方? 郑州有什么好玩的地方啊 1

+ 5
- 0
tests/data_for_tests/io/LCQMC/test.txt View File

@@ -0,0 +1,5 @@
谁有狂三这张高清的 这张高清图,谁有 0
淘宝模特叫什么?急 淘宝的模特她叫什么 1
不要嘛用韩语怎么说 韩语的请不要走怎么说 0
倒瓜子脸适合什么发型 额头高又是瓜子脸的女生适合什么刘海 0
淘宝流量怎么买 刚淘宝店如何才能有流量 0

+ 6
- 0
tests/data_for_tests/io/LCQMC/train.txt View File

@@ -0,0 +1,6 @@
喜欢打篮球的男生喜欢什么样的女生 爱打篮球的男生喜欢什么样的女生 1
你帮我设计小说的封面吧 谁能帮我给小说设计个封面? 0
移动手机卡刷砖 关于移动手机卡 0
有什么好听的短信铃声啊 有什么好听的韩剧短信铃声 0
人生的三大事是什么 人生三大事是什么? 1
您好是后8位的 您提供后8位即可, 1

+ 6
- 0
tests/data_for_tests/io/MNLI/dev_matched.tsv View File

@@ -0,0 +1,6 @@
index promptID pairID genre sentence1_binary_parse sentence2_binary_parse sentence1_parse sentence2_parse sentence1 sentence2 label1 label2 label3 label4 label5 gold_label
0 63735 63735n slate ( ( The ( new rights ) ) ( are ( nice enough ) ) ) ( Everyone ( really ( likes ( the ( newest benefits ) ) ) ) ) (ROOT (S (NP (DT The) (JJ new) (NNS rights)) (VP (VBP are) (ADJP (JJ nice) (RB enough))))) (ROOT (S (NP (NN Everyone)) (VP (ADVP (RB really)) (VBZ likes) (NP (DT the) (JJS newest) (NNS benefits))))) The new rights are nice enough Everyone really likes the newest benefits neutral entailment neutral neutral neutral neutral
1 91383 91383c government ( ( This site ) ( ( includes ( ( ( ( a list ) ( of ( all ( award winners ) ) ) ) and ) ( ( a ( searchable database ) ) ( of ( Government ( Executive articles ) ) ) ) ) ) . ) ) ( ( ( The ( Government ( Executive articles ) ) ) ( housed ( on ( the website ) ) ) ) ( ( ( are not ) ( able ( to ( be searched ) ) ) ) . ) ) (ROOT (S (NP (DT This) (NN site)) (VP (VBZ includes) (NP (NP (NP (DT a) (NN list)) (PP (IN of) (NP (DT all) (NN award) (NNS winners)))) (CC and) (NP (NP (DT a) (JJ searchable) (NN database)) (PP (IN of) (NP (NNP Government) (NNP Executive) (NNS articles)))))) (. .))) (ROOT (S (NP (NP (DT The) (NNP Government) (NNP Executive) (NNS articles)) (VP (VBN housed) (PP (IN on) (NP (DT the) (NN website))))) (VP (VBP are) (RB not) (ADJP (JJ able) (S (VP (TO to) (VP (VB be) (ADJP (JJ searched))))))) (. .))) This site includes a list of all award winners and a searchable database of Government Executive articles. The Government Executive articles housed on the website are not able to be searched. contradiction contradiction contradiction contradiction contradiction contradiction
2 755 755e telephone ( ( ( ( uh ( i ( ( do n't ) ( know ( ( i i ) ( have ( ( mixed emotions ) ( about ( him ( ( uh sometimes ) ( i ( like him ) ) ) ) ) ) ) ) ) ) ) ) but ) ( ( at ( the ( same times ) ) ) ( i ( love ( to ( see somebody ) ) ) ) ) ) ( beat him ) ) ( I ( ( ( ( ( ( like him ) ( for ( the ( most part ) ) ) ) , ) but ) ( ( would still ) ( enjoy ( seeing ( someone ( beat him ) ) ) ) ) ) . ) ) (ROOT (SINV (S (S (INTJ (UH uh)) (NP (FW i)) (VP (VBP do) (RB n't) (VP (VB know) (NP (NP (FW i) (FW i)) (SBAR (S (VP (VBP have) (VP (VBN mixed) (NP (NNS emotions)) (PP (IN about) (S (NP (PRP him)) (VP (VBG uh) (ADVP (RB sometimes)) (NP (NP (FW i)) (PP (IN like) (NP (PRP him))))))))))))))) (CC but) (S (PP (IN at) (NP (DT the) (JJ same) (NNS times))) (NP (FW i)) (VP (VBP love) (S (VP (TO to) (VP (VB see) (NP (NN somebody)))))))) (VP (VBD beat)) (NP (PRP him)))) (ROOT (S (NP (PRP I)) (VP (VP (VBP like) (NP (PRP him)) (PP (IN for) (NP (DT the) (JJS most) (NN part)))) (, ,) (CC but) (VP (MD would) (ADVP (RB still)) (VP (VB enjoy) (S (VP (VBG seeing) (S (NP (NN someone)) (VP (VB beat) (NP (PRP him))))))))) (. .))) uh i don't know i i have mixed emotions about him uh sometimes i like him but at the same times i love to see somebody beat him I like him for the most part, but would still enjoy seeing someone beat him. entailment entailment entailment entailment entailment entailment
3 78013 78013c telephone ( yeah ( ( i i ) ( think ( ( my ( favorite restaurant ) ) ( ( is always ) ( been ( ( the ( one closest ) ) ( you ( ( know ( the closest ) ) ( ( as long ) ( as ( it ( 's ( it ( meets ( ( the ( minimum criteria ) ) ( you ( know ( of ( good food ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ( ( My ( favorite restaurants ) ) ( ( ( ( are always ) ( ( ( ( ( at least ) a ) hundred ) miles ) away ) ) ( from ( my house ) ) ) . ) ) (ROOT (S (VP (VB yeah) (NP (NP (FW i) (FW i)) (SBAR (S (VP (VBP think) (SBAR (S (NP (PRP$ my) (JJ favorite) (NN restaurant)) (VP (VBZ is) (ADVP (RB always)) (VP (VBN been) (NP (NP (DT the) (CD one) (JJS closest)) (SBAR (S (NP (PRP you)) (VP (VBP know) (NP (DT the) (JJS closest)) (ADVP (ADVP (RB as) (RB long)) (SBAR (IN as) (S (NP (PRP it)) (VP (VBZ 's) (SBAR (S (NP (PRP it)) (VP (VBZ meets) (NP (NP (DT the) (JJ minimum) (NNS criteria)) (SBAR (S (NP (PRP you)) (VP (VBP know) (PP (IN of) (NP (JJ good) (NN food))))))))))))))))))))))))))))) (ROOT (S (NP (PRP$ My) (JJ favorite) (NNS restaurants)) (VP (VBP are) (ADVP (RB always)) (ADVP (NP (QP (IN at) (JJS least) (DT a) (CD hundred)) (NNS miles)) (RB away)) (PP (IN from) (NP (PRP$ my) (NN house)))) (. .))) yeah i i think my favorite restaurant is always been the one closest you know the closest as long as it's it meets the minimum criteria you know of good food My favorite restaurants are always at least a hundred miles away from my house. contradiction contradiction contradiction contradiction contradiction contradiction
4 96377 96377c telephone ( i ( ( do n't ) ( know ( um ( do ( you ( do ( ( a lot ) ( of camping ) ) ) ) ) ) ) ) ) ( I ( ( know exactly ) . ) ) (ROOT (S (NP (FW i)) (VP (VBP do) (RB n't) (VP (VB know) (SBAR (S (NP (NN um)) (VP (VBP do) (SBAR (S (NP (PRP you)) (VP (VBP do) (NP (NP (DT a) (NN lot)) (PP (IN of) (NP (NN camping)))))))))))))) (ROOT (S (NP (PRP I)) (VP (VBP know) (ADVP (RB exactly))) (. .))) i don't know um do you do a lot of camping I know exactly. contradiction contradiction contradiction contradiction contradiction contradiction

+ 6
- 0
tests/data_for_tests/io/MNLI/dev_mismatched.tsv View File

@@ -0,0 +1,6 @@
index promptID pairID genre sentence1_binary_parse sentence2_binary_parse sentence1_parse sentence2_parse sentence1 sentence2 label1 label2 label3 label4 label5 gold_label
0 75290 75290c letters ( ( Your contribution ) ( ( helped ( make ( it ( possible ( for ( us ( to ( ( provide ( our students ) ) ( with ( a ( quality education ) ) ) ) ) ) ) ) ) ) ) . ) ) ( ( Your contributions ) ( ( were ( of ( ( no help ) ( with ( ( our ( students ' ) ) education ) ) ) ) ) . ) ) (ROOT (S (NP (PRP$ Your) (NN contribution)) (VP (VBD helped) (VP (VB make) (S (NP (PRP it)) (ADJP (JJ possible)) (SBAR (IN for) (S (NP (PRP us)) (VP (TO to) (VP (VB provide) (NP (PRP$ our) (NNS students)) (PP (IN with) (NP (DT a) (NN quality) (NN education)))))))))) (. .))) (ROOT (S (NP (PRP$ Your) (NNS contributions)) (VP (VBD were) (PP (IN of) (NP (NP (DT no) (NN help)) (PP (IN with) (NP (NP (PRP$ our) (NNS students) (POS ')) (NN education)))))) (. .))) Your contribution helped make it possible for us to provide our students with a quality education. Your contributions were of no help with our students' education. contradiction contradiction contradiction contradiction contradiction contradiction
1 133794 133794c verbatim ( ( ( ( ( ( The answer ) ( ( ( ( has nothing ) ( to ( do ( with ( their cause ) ) ) ) ) , ) however ) ) , ) but ) ( ( with ( ( ( ( ( ( ( ( the ( simple fact ) ) ( that ( dictionaries ( ( are not ) ( exercises ( in ( bi-unique substitutability ) ) ) ) ) ) ) ; ) ( in ( ( ( other words ) , ) ( if ( ( one ( of ( ( the senses ) ( of run ) ) ) ) ( ( is ` ) ( ( ( ( operate ' ) -LRB- ) ( as ( in ( She ( runs ( an ( engine factory ) ) ) ) ) ) ) -RRB- ) ) ) ) ) ) ) , ) ( that ( ( does not ) ( ( make it ) ( ( valid ( to ( assume ( that ( one ( can ( substitute ( ( operate ( for run ) ) ( in ( We ( ( run ( in ( ( the marathon ) ( every year ) ) ) ) . ) ) ) ) ) ) ) ) ) ) ) ( Although ( ( ( ( recognizing this ) ( as ( ( a shortcoming ) ( of dictionaries ) ) ) ) and ) ( ( ( assigning it ) arbitrarily ) ( to ( what ( , ( ( for ( lack ( of ( a ( better term ) ) ) ) ) ( , ( we ( might ( call ( ( the genius ) ( of ( the language ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) , ) ( might ( seem ( trivial ( to ( the ( casual observer ) ) ) ) ) ) ) ) ( , ( it ( is ( ( a ( valid matter ) ) ( for ( concern ( in ( ( the realm ) ( of lexicology ) ) ) ) ) ) ) ) ) ) ) . ) ( Dictionaries ( ( ( are indeed ) ( exercises ( in ( bi-unique substitutability ) ) ) ) . ) ) (ROOT (S (S (NP (DT The) (NN answer)) (VP (VBZ has) (ADVP (NN nothing)) (S (VP (TO to) (VP (VB do) (PP (IN with) (NP (PRP$ their) (NN cause)))))) (, ,) (ADVP (RB however)))) (, ,) (CC but) (S (SBAR (IN with) (S (NP (NP (DT the) (JJ simple) (NN fact)) (SBAR (IN that) (S (NP (NNS dictionaries)) (VP (VBP are) (RB not) (NP (NP (NNS exercises)) (PP (IN in) (NP (JJ bi-unique) (NN substitutability))))))) (: ;) (PP (IN in) (NP (NP (JJ other) (NNS words)) (, ,) (SBAR (IN if) (S (NP (NP (CD one)) (PP (IN of) (NP (NP (DT the) (NNS senses)) (PP (IN of) (NP (NN run)))))) (VP (VBZ is) (`` `) (VP (VB operate) ('' ') (-LRB- -LRB-) (SBAR (RB as) (IN in) (S (NP (PRP She)) (VP (VBZ runs) (NP (DT an) (NN engine) (NN factory))))) (-RRB- -RRB-))))))) (, ,) (SBAR (WHNP (WDT that)) (S (VP (VBZ does) (RB not) (VP (VB make) (NP (PRP it)) (S (ADJP (JJ valid) (S (VP (TO to) (VP (VB assume) (SBAR (IN that) (S (NP (PRP one)) (VP (MD can) (VP (VB substitute) (VP (VB operate) (PP (IN for) (NP (NN run))) (SBAR (IN in) (S (NP (PRP We)) (VP (VB run) (PP (IN in) (NP (NP (DT the) (NN marathon)) (NP (DT every) (NN year)))) (. .))))))))))))) (SBAR (IN Although) (S (S (VP (VBG recognizing) (NP (DT this)) (PP (IN as) (NP (NP (DT a) (NN shortcoming)) (PP (IN of) (NP (NNS dictionaries))))))) (CC and) (S (VP (VBG assigning) (NP (PRP it)) (ADVP (RB arbitrarily)) (PP (TO to) (SBAR (WHNP (WP what)) (S (, ,) (PP (IN for) (NP (NP (NN lack)) (PP (IN of) (NP (DT a) (JJR better) (NN term))))) (, ,) (NP (PRP we)) (VP (MD might) (VP (VB call) (NP (NP (DT the) (NN genius)) (PP (IN of) (NP (DT the) (NN language)))))))))))))))))) (, ,)) (VP (MD might) (VP (VB seem) (ADJP (JJ trivial) (PP (TO to) (NP (DT the) (JJ casual) (NN observer)))))))) (, ,) (NP (PRP it)) (VP (VBZ is) (NP (NP (DT a) (JJ valid) (NN matter)) (PP (IN for) (NP (NP (NN concern)) (PP (IN in) (NP (NP (DT the) (NN realm)) (PP (IN of) (NP (NN lexicology)))))))))) (. .))) (ROOT (S (NP (NNS Dictionaries)) (VP (VBP are) (ADVP (RB indeed)) (NP (NP (NNS exercises)) (PP (IN in) (NP (JJ bi-unique) (NN substitutability))))) (. .))) The answer has nothing to do with their cause, however, but with the simple fact that dictionaries are not exercises in bi-unique substitutability; in other words, if one of the senses of run is `operate' (as in She runs an engine factory ), that does not make it valid to assume that one can substitute operate for run in We run in the marathon every year . Although recognizing this as a shortcoming of dictionaries and assigning it arbitrarily to what, for lack of a better term, we might call the genius of the language, might seem trivial to the casual observer, it is a valid matter for concern in the realm of lexicology. Dictionaries are indeed exercises in bi-unique substitutability. contradiction contradiction contradiction contradiction contradiction contradiction
2 3628 3628c verbatim ( We ( ( serve ( ( a ( classic ( Tuscan meal ) ) ) ( that ( includes ( ( a ( Florentine terrine ) ) ( made ( with ( dick ( and ( chicken livers ) ) ) ) ) ) ) ) ) ) . ) ) ( We ( ( serve ( ( a meal ) ( of ( Florentine terrine ) ) ) ) . ) ) (ROOT (S (NP (PRP We)) (VP (VBP serve) (NP (NP (DT a) (JJ classic) (NNP Tuscan) (NN meal)) (SBAR (WHNP (WDT that)) (S (VP (VBZ includes) (NP (NP (DT a) (JJ Florentine) (NN terrine)) (VP (VBN made) (PP (IN with) (NP (NN dick) (CC and) (NN chicken) (NNS livers)))))))))) (. .))) (ROOT (S (NP (PRP We)) (VP (VBP serve) (NP (NP (DT a) (NN meal)) (PP (IN of) (NP (NNP Florentine) (NN terrine))))) (. .))) We serve a classic Tuscan meal that includes a Florentine terrine made with dick and chicken livers. We serve a meal of Florentine terrine. contradiction neutral entailment entailment entailment entailment
3 89411 89411c letters ( ( ( A ( few months ) ) ago ) ( , ( ( ( ( Carl Newton ) and ) I ) ( ( ( wrote ( a letter ) ) ( asking ( you ( to ( ( consider ( a ( financial contribution ) ) ) ( to ( ( graduate Endodontics ) ( at ( Indiana University ) ) ) ) ) ) ) ) ) . ) ) ) ) ( ( ( ( Carl Newton ) and ) I ) ( ( ( have never ) ( ( had ( any ( other ( previous contact ) ) ) ) ( with you ) ) ) . ) ) (ROOT (S (ADVP (NP (DT A) (JJ few) (NNS months)) (RB ago)) (, ,) (NP (NP (NNP Carl) (NNP Newton)) (CC and) (NP (PRP I))) (VP (VBD wrote) (NP (DT a) (NN letter)) (S (VP (VBG asking) (S (NP (PRP you)) (VP (TO to) (VP (VB consider) (NP (DT a) (JJ financial) (NN contribution)) (PP (TO to) (NP (NP (JJ graduate) (NNS Endodontics)) (PP (IN at) (NP (NNP Indiana) (NNP University))))))))))) (. .))) (ROOT (S (NP (NP (NNP Carl) (NNP Newton)) (CC and) (NP (PRP I))) (VP (VBP have) (ADVP (RB never)) (VP (VBN had) (NP (DT any) (JJ other) (JJ previous) (NN contact)) (PP (IN with) (NP (PRP you))))) (. .))) A few months ago, Carl Newton and I wrote a letter asking you to consider a financial contribution to graduate Endodontics at Indiana University. Carl Newton and I have never had any other previous contact with you. contradiction contradiction contradiction contradiction contradiction contradiction
4 136158 136158e facetoface ( I ( ( was ( on ( ( this earth ) ( you ( know ( ( , ( ( I ( 've ( lived ( on ( ( this earth ) ( for ( some reason ) ) ) ) ) ) ) , ) ) ( I ( just ( ( do n't ) ( know ( what ( it ( is yet ) ) ) ) ) ) ) ) ) ) ) ) ) . ) ) ( I ( ( ( ( do n't ) yet ) ( ( know ( the reason ) ) ( why ( I ( have ( lived ( on earth ) ) ) ) ) ) ) . ) ) (ROOT (S (NP (PRP I)) (VP (VBD was) (PP (IN on) (NP (NP (DT this) (NN earth)) (SBAR (S (NP (PRP you)) (VP (VBP know) (SBAR (S (PRN (, ,) (S (NP (PRP I)) (VP (VBP 've) (VP (VBN lived) (PP (IN on) (NP (NP (DT this) (NN earth)) (PP (IN for) (NP (DT some) (NN reason)))))))) (, ,)) (NP (PRP I)) (ADVP (RB just)) (VP (VBP do) (RB n't) (VP (VB know) (SBAR (WHNP (WP what)) (S (NP (PRP it)) (VP (VBZ is) (ADVP (RB yet))))))))))))))) (. .))) (ROOT (S (NP (PRP I)) (VP (VBP do) (RB n't) (ADVP (RB yet)) (VP (VB know) (NP (DT the) (NN reason)) (SBAR (WHADVP (WRB why)) (S (NP (PRP I)) (VP (VBP have) (VP (VBN lived) (PP (IN on) (NP (NN earth))))))))) (. .))) I was on this earth you know, I've lived on this earth for some reason, I just don't know what it is yet. I don't yet know the reason why I have lived on earth. entailment entailment entailment entailment entailment entailment

+ 6
- 0
tests/data_for_tests/io/MNLI/test_matched.tsv View File

@@ -0,0 +1,6 @@
index promptID pairID genre sentence1_binary_parse sentence2_binary_parse sentence1_parse sentence2_parse sentence1 sentence2
0 31493 31493 travel ( ( ( ( ( ( ( ( Hierbas , ) ( ans seco ) ) , ) ( ans dulce ) ) , ) and ) frigola ) ( ( ( are just ) ( ( a ( few names ) ) ( worth ( ( keeping ( a look-out ) ) for ) ) ) ) . ) ) ( Hierbas ( ( is ( ( a name ) ( worth ( ( looking out ) for ) ) ) ) . ) ) (ROOT (S (NP (NP (NNS Hierbas)) (, ,) (NP (NN ans) (NN seco)) (, ,) (NP (NN ans) (NN dulce)) (, ,) (CC and) (NP (NN frigola))) (VP (VBP are) (ADVP (RB just)) (NP (NP (DT a) (JJ few) (NNS names)) (PP (JJ worth) (S (VP (VBG keeping) (NP (DT a) (NN look-out)) (PP (IN for))))))) (. .))) (ROOT (S (NP (NNS Hierbas)) (VP (VBZ is) (NP (NP (DT a) (NN name)) (PP (JJ worth) (S (VP (VBG looking) (PRT (RP out)) (PP (IN for))))))) (. .))) Hierbas, ans seco, ans dulce, and frigola are just a few names worth keeping a look-out for. Hierbas is a name worth looking out for.
1 92164 92164 government ( ( ( The extent ) ( of ( the ( behavioral effects ) ) ) ) ( ( would ( ( depend ( in ( part ( on ( ( the structure ) ( of ( ( ( the ( individual ( account program ) ) ) and ) ( any limits ) ) ) ) ) ) ) ) ( on ( accessing ( the funds ) ) ) ) ) . ) ) ( ( Many people ) ( ( would ( be ( very ( unhappy ( to ( ( loose control ) ( over ( their ( own money ) ) ) ) ) ) ) ) ) . ) ) (ROOT (S (NP (NP (DT The) (NN extent)) (PP (IN of) (NP (DT the) (JJ behavioral) (NNS effects)))) (VP (MD would) (VP (VB depend) (PP (IN in) (NP (NP (NN part)) (PP (IN on) (NP (NP (DT the) (NN structure)) (PP (IN of) (NP (NP (DT the) (JJ individual) (NN account) (NN program)) (CC and) (NP (DT any) (NNS limits)))))))) (PP (IN on) (S (VP (VBG accessing) (NP (DT the) (NNS funds))))))) (. .))) (ROOT (S (NP (JJ Many) (NNS people)) (VP (MD would) (VP (VB be) (ADJP (RB very) (JJ unhappy) (PP (TO to) (NP (NP (JJ loose) (NN control)) (PP (IN over) (NP (PRP$ their) (JJ own) (NN money)))))))) (. .))) The extent of the behavioral effects would depend in part on the structure of the individual account program and any limits on accessing the funds. Many people would be very unhappy to loose control over their own money.
2 9662 9662 government ( ( ( Timely access ) ( to information ) ) ( ( is ( in ( ( the ( best interests ) ) ( of ( ( ( both GAO ) and ) ( the agencies ) ) ) ) ) ) . ) ) ( It ( ( ( is ( in ( ( everyone 's ) ( best interest ) ) ) ) ( to ( ( have access ) ( to ( information ( in ( a ( timely manner ) ) ) ) ) ) ) ) . ) ) (ROOT (S (NP (NP (JJ Timely) (NN access)) (PP (TO to) (NP (NN information)))) (VP (VBZ is) (PP (IN in) (NP (NP (DT the) (JJS best) (NNS interests)) (PP (IN of) (NP (NP (DT both) (NNP GAO)) (CC and) (NP (DT the) (NNS agencies))))))) (. .))) (ROOT (S (NP (PRP It)) (VP (VBZ is) (PP (IN in) (NP (NP (NN everyone) (POS 's)) (JJS best) (NN interest))) (S (VP (TO to) (VP (VB have) (NP (NN access)) (PP (TO to) (NP (NP (NN information)) (PP (IN in) (NP (DT a) (JJ timely) (NN manner))))))))) (. .))) Timely access to information is in the best interests of both GAO and the agencies. It is in everyone's best interest to have access to information in a timely manner.
3 5991 5991 travel ( ( Based ( in ( ( the ( Auvergnat ( spa town ) ) ) ( of Vichy ) ) ) ) ( , ( ( the ( French government ) ) ( often ( ( ( ( proved ( more zealous ) ) ( than ( its masters ) ) ) ( in ( ( ( suppressing ( civil liberties ) ) and ) ( ( drawing up ) ( anti-Jewish legislation ) ) ) ) ) . ) ) ) ) ) ( ( The ( French government ) ) ( ( passed ( ( anti-Jewish laws ) ( aimed ( at ( helping ( the Nazi ) ) ) ) ) ) . ) ) (ROOT (S (PP (VBN Based) (PP (IN in) (NP (NP (DT the) (NNP Auvergnat) (NN spa) (NN town)) (PP (IN of) (NP (NNP Vichy)))))) (, ,) (NP (DT the) (JJ French) (NN government)) (ADVP (RB often)) (VP (VBD proved) (NP (JJR more) (NNS zealous)) (PP (IN than) (NP (PRP$ its) (NNS masters))) (PP (IN in) (S (VP (VP (VBG suppressing) (NP (JJ civil) (NNS liberties))) (CC and) (VP (VBG drawing) (PRT (RP up)) (NP (JJ anti-Jewish) (NN legislation))))))) (. .))) (ROOT (S (NP (DT The) (JJ French) (NN government)) (VP (VBD passed) (NP (NP (JJ anti-Jewish) (NNS laws)) (VP (VBN aimed) (PP (IN at) (S (VP (VBG helping) (NP (DT the) (JJ Nazi)))))))) (. .))) Based in the Auvergnat spa town of Vichy, the French government often proved more zealous than its masters in suppressing civil liberties and drawing up anti-Jewish legislation. The French government passed anti-Jewish laws aimed at helping the Nazi.
4 50156 50156 travel ( ( ( ( ( Built ( in 1870 ) ) ( , ( ( ( its canopy ) ( of ( stained ( glass ( and ( cast iron ) ) ) ) ) ) ( is ( ( the oldest ) ( in Dublin ) ) ) ) ) ) ; ) ( ( its ( enthusiastic ( interior decoration ) ) ) ( ( is also ) ( typical ( of ( the era ) ) ) ) ) ) . ) ( It ( ( ( ( was ( constructed ( in 1870 ) ) ) and ) ( has ( ( the ( oldest canopy ) ) ( in Dublin ) ) ) ) . ) ) (ROOT (S (S (S (VP (VBN Built) (PP (IN in) (NP (CD 1870))))) (, ,) (NP (NP (PRP$ its) (NN canopy)) (PP (IN of) (NP (JJ stained) (NN glass) (CC and) (NN cast) (NN iron)))) (VP (VBZ is) (NP (NP (DT the) (JJS oldest)) (PP (IN in) (NP (NNP Dublin)))))) (: ;) (S (NP (PRP$ its) (JJ enthusiastic) (JJ interior) (NN decoration)) (VP (VBZ is) (ADVP (RB also)) (ADJP (JJ typical) (PP (IN of) (NP (DT the) (NN era)))))) (. .))) (ROOT (S (NP (PRP It)) (VP (VP (VBD was) (VP (VBN constructed) (PP (IN in) (NP (CD 1870))))) (CC and) (VP (VBZ has) (NP (NP (DT the) (JJS oldest) (NN canopy)) (PP (IN in) (NP (NNP Dublin)))))) (. .))) Built in 1870, its canopy of stained glass and cast iron is the oldest in Dublin; its enthusiastic interior decoration is also typical of the era. It was constructed in 1870 and has the oldest canopy in Dublin.

+ 6
- 0
tests/data_for_tests/io/MNLI/test_mismatched.tsv View File

@@ -0,0 +1,6 @@
index promptID pairID genre sentence1_binary_parse sentence2_binary_parse sentence1_parse sentence2_parse sentence1 sentence2
0 16130 16130 facetoface ( ( What ( have ( you decided ) ) ) ( , ( what ( ( ( are you ) ( going ( to do ) ) ) ? ) ) ) ) ( So ( what ( ( 's ( your decision ) ) ? ) ) ) (ROOT (SBARQ (SBAR (WHNP (WP What)) (S (VP (VBP have) (S (NP (PRP you)) (VP (VBD decided)))))) (, ,) (WHNP (WP what)) (SQ (VBP are) (NP (PRP you)) (VP (VBG going) (S (VP (TO to) (VP (VB do)))))) (. ?))) (ROOT (SBARQ (RB So) (WHNP (WP what)) (SQ (VBZ 's) (NP (PRP$ your) (NN decision))) (. ?))) What have you decided, what are you going to do? So what's your decision?
1 128269 128269 oup ( ( ( Women 's ) clothing ) ( ( is ( characterized ( by ( ( great diversity ) ( in ( ( styles and ) ( short ( production runs ) ) ) ) ) ) ) ) . ) ) ( ( ( Men 's ) clothing ) ( typically ( ( ( has ( the ( ( most stylistic ) diversity ) ) ) ( unlike ( ( the blandness ) ( of ( ( women 's ) fashion ) ) ) ) ) . ) ) ) (ROOT (S (NP (NP (NNP Women) (POS 's)) (NN clothing)) (VP (VBZ is) (VP (VBN characterized) (PP (IN by) (NP (NP (JJ great) (NN diversity)) (PP (IN in) (NP (NP (NNS styles)) (CC and) (NP (JJ short) (NN production) (NNS runs)))))))) (. .))) (ROOT (S (NP (NP (NNP Men) (POS 's)) (NN clothing)) (ADVP (RB typically)) (VP (VBZ has) (NP (DT the) (ADJP (RBS most) (JJ stylistic)) (NN diversity)) (PP (IN unlike) (NP (NP (DT the) (NN blandness)) (PP (IN of) (NP (NP (NNS women) (POS 's)) (NN fashion)))))) (. .))) Women's clothing is characterized by great diversity in styles and short production runs. Men's clothing typically has the most stylistic diversity unlike the blandness of women's fashion.
2 130938 130938 nineeleven ( ( ( ( ( Reports ( from ( ( two ( flight attendants ) ) ( in ( the ( coach cabin ) ) ) ) ) ) , ) ( ( ( Betty Ong ) and ) ( Madeline ( Amy Sweeney ) ) ) ) , ) ( ( ( tell us ) ( ( most ( of what ) ) ( we ( know ( about ( how ( ( the hijacking ) happened ) ) ) ) ) ) ) . ) ) ( ( ( The report ) ( on ( the hijacking ) ) ) ( ( ( was ( ( over ( five hundred ) ) pages ) ) long ) . ) ) (ROOT (S (NP (NP (NP (NNS Reports)) (PP (IN from) (NP (NP (CD two) (NN flight) (NNS attendants)) (PP (IN in) (NP (DT the) (NN coach) (NN cabin)))))) (, ,) (NP (NP (NNP Betty) (NNP Ong)) (CC and) (NP (NNP Madeline) (NNP Amy) (NNP Sweeney))) (, ,)) (VP (VBP tell) (NP (PRP us)) (SBAR (WHNP (JJS most) (WHPP (IN of) (WHNP (WP what)))) (S (NP (PRP we)) (VP (VBP know) (PP (IN about) (SBAR (WHADVP (WRB how)) (S (NP (DT the) (NN hijacking)) (VP (VBD happened))))))))) (. .))) (ROOT (S (NP (NP (DT The) (NN report)) (PP (IN on) (NP (DT the) (NN hijacking)))) (VP (VBD was) (NP (QP (RB over) (CD five) (CD hundred)) (NNS pages)) (ADVP (RB long))) (. .))) Reports from two flight attendants in the coach cabin, Betty Ong and Madeline Amy Sweeney, tell us most of what we know about how the hijacking happened. The report on the hijacking was over five hundred pages long.
3 40009 40009 nineeleven ( ( At ( about 9:20 ) ) ( , ( ( ( security personnel ) ( at ( FAA headquarters ) ) ) ( ( ( ( set up ) ( a ( hijacking teleconference ) ) ) ( with ( ( ( several agencies ) , ) ( including ( the ( Defense Department ) ) ) ) ) ) . ) ) ) ) ( ( The teleconference ) ( ( lasted ( for ( 13 ( straight hours ) ) ) ) . ) ) (ROOT (S (PP (IN At) (NP (QP (RB about) (CD 9:20)))) (, ,) (NP (NP (NN security) (NNS personnel)) (PP (IN at) (NP (NNP FAA) (NNS headquarters)))) (VP (VBD set) (PRT (RP up)) (NP (DT a) (VBG hijacking) (NN teleconference)) (PP (IN with) (NP (NP (JJ several) (NNS agencies)) (, ,) (PP (VBG including) (NP (DT the) (NNP Defense) (NNP Department)))))) (. .))) (ROOT (S (NP (DT The) (NN teleconference)) (VP (VBD lasted) (PP (IN for) (NP (CD 13) (JJ straight) (NNS hours)))) (. .))) At about 9:20, security personnel at FAA headquarters set up a hijacking teleconference with several agencies, including the Defense Department. The teleconference lasted for 13 straight hours.
4 105266 105266 nineeleven ( So ( we ( ( 've ( ( got ( ( a couple ) ( of aircraft ) ) ) ( ( up there ) ( that ( ( have ( those instructions ) ) ( at ( this ( present time ) ) ) ) ) ) ) ) ? ) ) ) ( ( At ( the ( present time ) ) ) ( , ( there ( ( ( ( ( were n't ) ( ( any aircraft ) ( in ( the air ) ) ) ) , ) right ) ? ) ) ) ) (ROOT (S (IN So) (NP (PRP we)) (VP (VBP 've) (VP (VBD got) (NP (NP (DT a) (NN couple)) (PP (IN of) (NP (NN aircraft)))) (ADVP (ADVP (RB up) (RB there)) (SBAR (WHNP (WDT that)) (S (VP (VBP have) (NP (DT those) (NNS instructions)) (PP (IN at) (NP (DT this) (JJ present) (NN time))))))))) (. ?))) (ROOT (S (PP (IN At) (NP (DT the) (JJ present) (NN time))) (, ,) (NP (EX there)) (VP (VBD were) (RB n't) (NP (NP (DT any) (NN aircraft)) (PP (IN in) (NP (DT the) (NN air)))) (, ,) (ADJP (JJ right))) (. ?))) So we've got a couple of aircraft up there that have those instructions at this present time? At the present time, there weren't any aircraft in the air, right?

+ 7
- 0
tests/data_for_tests/io/MNLI/train.tsv View File

@@ -0,0 +1,7 @@
index promptID pairID genre sentence1_binary_parse sentence2_binary_parse sentence1_parse sentence2_parse sentence1 sentence2 label1 gold_label
0 31193 31193n government ( ( Conceptually ( cream skimming ) ) ( ( has ( ( ( two ( basic dimensions ) ) - ) ( ( product and ) geography ) ) ) . ) ) ( ( ( Product and ) geography ) ( ( are ( what ( make ( cream ( skimming work ) ) ) ) ) . ) ) (ROOT (S (NP (JJ Conceptually) (NN cream) (NN skimming)) (VP (VBZ has) (NP (NP (CD two) (JJ basic) (NNS dimensions)) (: -) (NP (NN product) (CC and) (NN geography)))) (. .))) (ROOT (S (NP (NN Product) (CC and) (NN geography)) (VP (VBP are) (SBAR (WHNP (WP what)) (S (VP (VBP make) (NP (NP (NN cream)) (VP (VBG skimming) (NP (NN work)))))))) (. .))) Conceptually cream skimming has two basic dimensions - product and geography. Product and geography are what make cream skimming work. neutral neutral
1 101457 101457e telephone ( you ( ( know ( during ( ( ( the season ) and ) ( i guess ) ) ) ) ( at ( at ( ( your level ) ( uh ( you ( ( ( lose them ) ( to ( the ( next level ) ) ) ) ( if ( ( if ( they ( decide ( to ( recall ( the ( the ( parent team ) ) ) ) ) ) ) ) ( ( the Braves ) ( decide ( to ( call ( to ( ( recall ( a guy ) ) ( from ( ( triple A ) ( ( ( then ( ( a ( double ( A guy ) ) ) ( ( goes up ) ( to ( replace him ) ) ) ) ) and ) ( ( a ( single ( A guy ) ) ) ( ( goes up ) ( to ( replace him ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ( You ( ( ( ( lose ( the things ) ) ( to ( the ( following level ) ) ) ) ( if ( ( the people ) recall ) ) ) . ) ) (ROOT (S (NP (PRP you)) (VP (VBP know) (PP (IN during) (NP (NP (DT the) (NN season)) (CC and) (NP (FW i) (FW guess)))) (PP (IN at) (IN at) (NP (NP (PRP$ your) (NN level)) (SBAR (S (INTJ (UH uh)) (NP (PRP you)) (VP (VBP lose) (NP (PRP them)) (PP (TO to) (NP (DT the) (JJ next) (NN level))) (SBAR (IN if) (S (SBAR (IN if) (S (NP (PRP they)) (VP (VBP decide) (S (VP (TO to) (VP (VB recall) (NP (DT the) (DT the) (NN parent) (NN team)))))))) (NP (DT the) (NNPS Braves)) (VP (VBP decide) (S (VP (TO to) (VP (VB call) (S (VP (TO to) (VP (VB recall) (NP (DT a) (NN guy)) (PP (IN from) (NP (NP (RB triple) (DT A)) (SBAR (S (S (ADVP (RB then)) (NP (DT a) (JJ double) (NNP A) (NN guy)) (VP (VBZ goes) (PRT (RP up)) (S (VP (TO to) (VP (VB replace) (NP (PRP him))))))) (CC and) (S (NP (DT a) (JJ single) (NNP A) (NN guy)) (VP (VBZ goes) (PRT (RP up)) (S (VP (TO to) (VP (VB replace) (NP (PRP him)))))))))))))))))))))))))))) (ROOT (S (NP (PRP You)) (VP (VBP lose) (NP (DT the) (NNS things)) (PP (TO to) (NP (DT the) (JJ following) (NN level))) (SBAR (IN if) (S (NP (DT the) (NNS people)) (VP (VBP recall))))) (. .))) you know during the season and i guess at at your level uh you lose them to the next level if if they decide to recall the the parent team the Braves decide to call to recall a guy from triple A then a double A guy goes up to replace him and a single A guy goes up to replace him You lose the things to the following level if the people recall. entailment entailment
2 134793 134793e fiction ( ( One ( of ( our number ) ) ) ( ( will ( ( ( carry out ) ( your instructions ) ) minutely ) ) . ) ) ( ( ( A member ) ( of ( my team ) ) ) ( ( will ( ( execute ( your orders ) ) ( with ( immense precision ) ) ) ) . ) ) (ROOT (S (NP (NP (CD One)) (PP (IN of) (NP (PRP$ our) (NN number)))) (VP (MD will) (VP (VB carry) (PRT (RP out)) (NP (PRP$ your) (NNS instructions)) (ADVP (RB minutely)))) (. .))) (ROOT (S (NP (NP (DT A) (NN member)) (PP (IN of) (NP (PRP$ my) (NN team)))) (VP (MD will) (VP (VB execute) (NP (PRP$ your) (NNS orders)) (PP (IN with) (NP (JJ immense) (NN precision))))) (. .))) One of our number will carry out your instructions minutely. A member of my team will execute your orders with immense precision. entailment entailment
3 37397 37397e fiction ( ( How ( ( ( do you ) know ) ? ) ) ( ( All this ) ( ( ( is ( their information ) ) again ) . ) ) ) ( ( This information ) ( ( belongs ( to them ) ) . ) ) (ROOT (S (SBARQ (WHADVP (WRB How)) (SQ (VBP do) (NP (PRP you)) (VP (VB know))) (. ?)) (NP (PDT All) (DT this)) (VP (VBZ is) (NP (PRP$ their) (NN information)) (ADVP (RB again))) (. .))) (ROOT (S (NP (DT This) (NN information)) (VP (VBZ belongs) (PP (TO to) (NP (PRP them)))) (. .))) How do you know? All this is their information again. This information belongs to them. entailment entailment
4 50563 50563n telephone ( yeah ( i ( ( tell you ) ( what ( ( though ( if ( you ( go ( price ( some ( of ( those ( tennis shoes ) ) ) ) ) ) ) ) ) ( i ( can ( see ( why ( now ( you ( know ( they ( 're ( ( getting up ) ( in ( the ( hundred ( dollar range ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ( ( The ( tennis shoes ) ) ( ( have ( ( a range ) ( of prices ) ) ) . ) ) (ROOT (S (VP (VB yeah) (S (NP (FW i)) (VP (VB tell) (NP (PRP you)) (SBAR (WHNP (WP what)) (S (SBAR (RB though) (IN if) (S (NP (PRP you)) (VP (VBP go) (VP (VB price) (NP (NP (DT some)) (PP (IN of) (NP (DT those) (NN tennis) (NNS shoes)))))))) (NP (FW i)) (VP (MD can) (VP (VB see) (SBAR (WHADVP (WRB why)) (S (ADVP (RB now)) (NP (PRP you)) (VP (VBP know) (SBAR (S (NP (PRP they)) (VP (VBP 're) (VP (VBG getting) (PRT (RP up)) (PP (IN in) (NP (DT the) (CD hundred) (NN dollar) (NN range))))))))))))))))))) (ROOT (S (NP (DT The) (NN tennis) (NNS shoes)) (VP (VBP have) (NP (NP (DT a) (NN range)) (PP (IN of) (NP (NNS prices))))) (. .))) yeah i tell you what though if you go price some of those tennis shoes i can see why now you know they're getting up in the hundred dollar range The tennis shoes have a range of prices. neutral neutral
11 11877 11877c travel ( ( Fun ( for ( ( adults and ) children ) ) ) . ) ( ( Fun ( for ( only children ) ) ) . ) (ROOT (S (VP (VB Fun) (PP (IN for) (NP (NNS adults) (CC and) (NNS children)))) (. .))) (ROOT (S (VP (VB Fun) (PP (IN for) (NP (JJ only) (NNS children)))) (. .))) Fun for adults and children. Fun for only children. contradiction contradiction

+ 38
- 0
tests/data_for_tests/io/MSRA_NER/dev.conll View File

@@ -0,0 +1,38 @@
把 O
欧 B-LOC

美 B-LOC
、 O

港 B-LOC
台 B-LOC

流 O
行 O

的 O
食 O

品 O
类 O

图 O
谱 O

马 B-PER
列 B-PER

主 O
义 O

在 O
中 B-LOC

国 I-LOC
传 O

播 O
的 O

历 O
史 O

+ 31
- 0
tests/data_for_tests/io/MSRA_NER/test.conll View File

@@ -0,0 +1,31 @@
中 B-ORG
共 I-ORG

中 I-ORG
央 I-ORG

致 O
中 B-ORG

国 I-ORG
致 I-ORG

公 I-ORG
党 I-ORG

十 I-ORG
一 I-ORG

大 I-ORG
的 O

贺 O
词 O


各 O

位 O
代 O

表 O

+ 60
- 0
tests/data_for_tests/io/MSRA_NER/train.conll View File

@@ -0,0 +1,60 @@
是 O
我 O

们 O
收 O

藏 O
北 B-LOC

京 I-LOC
史 O

料 O

调 O
查 O

范 O
围 O

涉 O
及 O

故 B-LOC
宫 I-LOC

、 O
历 B-LOC

博 I-LOC
、 O

古 B-ORG
研 I-ORG

所 I-ORG
、 O

北 B-LOC
大 I-LOC

清 I-LOC
华 I-LOC

图 I-LOC
书 I-LOC

馆 I-LOC
. O

夏 B-PER
财 I-PER

兴 I-PER
家 O

分 O
到 O

田 O

+ 10
- 0
tests/data_for_tests/io/OntoNotes/dev.txt View File

@@ -0,0 +1,10 @@

bc/msnbc/00/msnbc_0000 0 0 Hi UH (TOP(FRAG(INTJ*) - - - Dan_Abrams * -
bc/msnbc/00/msnbc_0000 0 1 everyone NN (NP*) - - - Dan_Abrams * -
bc/msnbc/00/msnbc_0000 0 2 /. . *)) - - - Dan_Abrams * -

bc/msnbc/00/msnbc_0000 0 0 first RB (TOP(S(ADVP* - - - Dan_Abrams * (ARGM-TMP* * * * -
bc/msnbc/00/msnbc_0000 0 1 up RB * - - - Dan_Abrams * * * * * -
bc/msnbc/00/msnbc_0000 0 2 on IN (PP* - - - Dan_Abrams * * * * * -
bc/msnbc/00/msnbc_0000 0 3 the DT (NP* - - - Dan_Abrams * * * * * -
bc/msnbc/00/msnbc_0000 0 4 docket NN *)) docket - - Dan_Abrams * * * * * -

+ 10
- 0
tests/data_for_tests/io/OntoNotes/test.txt View File

@@ -0,0 +1,10 @@

bc/msnbc/00/msnbc_0007 0 0 Dealing VBG (TOP(VP* deal 01 - speaker_1 * (V*) -
bc/msnbc/00/msnbc_0007 0 1 with IN (PP* - - - speaker_1 * (ARG1* -
bc/msnbc/00/msnbc_0007 0 2 serial JJ (NP(NP* - - - speaker_1 * * (156
bc/msnbc/00/msnbc_0007 0 3 crimes NNS *) crime - 1 speaker_1 * * 156)
bc/msnbc/00/msnbc_0007 0 4 per FW (ADVP* - - - speaker_1 * * -
bc/msnbc/00/msnbc_0007 0 5 se FW *))) - - - speaker_1 * *) -
bc/msnbc/00/msnbc_0007 0 6 /. . *)) - - - speaker_1 * * -

bc/msnbc/00/msnbc_0007 0 0 We PRP (TOP(S(NP*) - - - speaker_1 * (ARG0*) * (90)

+ 50
- 0
tests/data_for_tests/io/OntoNotes/train.txt View File

@@ -0,0 +1,50 @@

bc/msnbc/00/msnbc_0003 0 0 The DT (TOP(S(NP* - - - Chris_Matthews * * (ARG1* * * * * -
bc/msnbc/00/msnbc_0003 0 1 move NN *) move 02 2 Chris_Matthews * (V*) *) * * * * -
bc/msnbc/00/msnbc_0003 0 2 comes VBZ (VP* come 03 2 Chris_Matthews * * (V*) * * * * -
bc/msnbc/00/msnbc_0003 0 3 a DT (SBAR(NP* - - - Chris_Matthews (DATE* * (ARGM-TMP* * * * * -
bc/msnbc/00/msnbc_0003 0 4 month NN *) month - 2 Chris_Matthews *) * * * * * * -
bc/msnbc/00/msnbc_0003 0 5 before IN * - - - Chris_Matthews * * * * * * * -
bc/msnbc/00/msnbc_0003 0 6 the DT (S(NP* - - - Chris_Matthews * * * * (ARG1* (ARG0* * -
bc/msnbc/00/msnbc_0003 0 7 Senate NNP *) - - - Chris_Matthews (ORG) * * * *) *) * -
bc/msnbc/00/msnbc_0003 0 8 is VBZ (VP* be 03 - Chris_Matthews * * * (V*) * * * -
bc/msnbc/00/msnbc_0003 0 9 scheduled VBN (VP* schedule 01 - Chris_Matthews * * * * (V*) * * -
bc/msnbc/00/msnbc_0003 0 10 to TO (S(VP* - - - Chris_Matthews * * * * (ARG2* * * -
bc/msnbc/00/msnbc_0003 0 11 hold VB (VP* hold 04 8 Chris_Matthews * * * * * (V*) * -
bc/msnbc/00/msnbc_0003 0 12 confirmation NN (NP(NP* - - - Chris_Matthews * * * * * (ARG1* (ARG2*) -
bc/msnbc/00/msnbc_0003 0 13 hearings NNS *) hearing 01 1 Chris_Matthews * * * * * * (V*) -
bc/msnbc/00/msnbc_0003 0 14 on IN (PP* - - - Chris_Matthews * * * * * * (ARG1* -
bc/msnbc/00/msnbc_0003 0 15 President NNP (NP(NP(NP* - - - Chris_Matthews * * * * * * * (194
bc/msnbc/00/msnbc_0003 0 16 Bush NNP * - - - Chris_Matthews (PERSON) * * * * * * -
bc/msnbc/00/msnbc_0003 0 17 's POS *) - - - Chris_Matthews * * * * * * * 194)
bc/msnbc/00/msnbc_0003 0 18 Supreme NNP (NML* - - - Chris_Matthews (ORG* * * * * * * -
bc/msnbc/00/msnbc_0003 0 19 Court NNP *) - - - Chris_Matthews *) * * * * * * -
bc/msnbc/00/msnbc_0003 0 20 nominee NN *) - - - Chris_Matthews * * * * * * * -
bc/msnbc/00/msnbc_0003 0 21 John NNP (NP* - - - Chris_Matthews (PERSON* * * * * * * -
bc/msnbc/00/msnbc_0003 0 22 Roberts NNP *)))))))))))) - - - Chris_Matthews *) * *) * *) *) *) -
bc/msnbc/00/msnbc_0003 0 23 /. . *)) - - - Chris_Matthews * * * * * * * -

bc/msnbc/00/msnbc_0003 0 0 Senator NNP (TOP(S(NP(NP* - - - Chris_Matthews * (ARG1* * * (162
bc/msnbc/00/msnbc_0003 0 1 Chris NNP * - - - Chris_Matthews (PERSON* * * * -
bc/msnbc/00/msnbc_0003 0 2 Dodd NNP *) - - - Chris_Matthews *) * * * -
bc/msnbc/00/msnbc_0003 0 3 of IN (PP* - - - Chris_Matthews * * * * -
bc/msnbc/00/msnbc_0003 0 4 Connecticut NNP (NP*))) - - - Chris_Matthews (GPE) *) * * 162)
bc/msnbc/00/msnbc_0003 0 5 was VBD (VP* be 01 1 Chris_Matthews * (V*) * * -
bc/msnbc/00/msnbc_0003 0 6 among IN (PP* - - - Chris_Matthews * (ARG2* * * -
bc/msnbc/00/msnbc_0003 0 7 those DT (NP(NP* - - - Chris_Matthews * * (ARG0* * -
bc/msnbc/00/msnbc_0003 0 8 Democrats NNPS *) - - - Chris_Matthews (NORP) * *) * -
bc/msnbc/00/msnbc_0003 0 9 who WP (SBAR(WHNP*) - - - Chris_Matthews * * (R-ARG0*) * -
bc/msnbc/00/msnbc_0003 0 10 spoke VBD (S(VP* speak 03 5 Chris_Matthews * * (V*) * -
bc/msnbc/00/msnbc_0003 0 11 out RP (PRT*) - - - Chris_Matthews * * * * -
bc/msnbc/00/msnbc_0003 0 12 against IN (PP* - - - Chris_Matthews * * (ARG1* * -
bc/msnbc/00/msnbc_0003 0 13 Bolton NNP (NP(NP* - - - Chris_Matthews (PERSON) * * (ARG1* (31|(130
bc/msnbc/00/msnbc_0003 0 14 's POS *) - - - Chris_Matthews * * * *) 31)
bc/msnbc/00/msnbc_0003 0 15 appointment NN *)) appointment 01 1 Chris_Matthews * * *) (V*) 130)
bc/msnbc/00/msnbc_0003 0 16 today NN (NP*))))))) today - 2 Chris_Matthews (DATE) *) (ARGM-TMP*) * (121)
bc/msnbc/00/msnbc_0003 0 17 /. . *)) - - - Chris_Matthews * * * * -

bc/msnbc/00/msnbc_0003 0 0 I PRP (TOP(S(NP*) - - - Christopher_Dodd * * (ARG0*) * (162)
bc/msnbc/00/msnbc_0003 0 1 just RB (ADVP*) - - - Christopher_Dodd * * (ARGM-ADV*) * -
bc/msnbc/00/msnbc_0003 0 2 do VBP (VP* do 01 - Christopher_Dodd * (V*) * * -
bc/msnbc/00/msnbc_0003 0 3 n't RB * - - - Christopher_Dodd * * (ARGM-NEG*) * -
bc/msnbc/00/msnbc_0003 0 4 think VB (VP* think 01 1 Christopher_Dodd * * (V*) * -

+ 6
- 0
tests/data_for_tests/io/QNLI/dev.tsv View File

@@ -0,0 +1,6 @@
index question sentence label
0 What came into force after the new constitution was herald? As of that day, the new constitution heralding the Second Republic came into force. entailment
1 What is the first major city in the stream of the Rhine? The most important tributaries in this area are the Ill below of Strasbourg, the Neckar in Mannheim and the Main across from Mainz. not_entailment
2 What is the minimum required if you want to teach in Canada? In most provinces a second Bachelor's Degree such as a Bachelor of Education is required to become a qualified teacher. not_entailment
3 How was Temüjin kept imprisoned by the Tayichi'ud? The Tayichi'ud enslaved Temüjin (reportedly with a cangue, a sort of portable stocks), but with the help of a sympathetic guard, the father of Chilaun (who later became a general of Genghis Khan), he was able to escape from the ger (yurt) in the middle of the night by hiding in a river crevice.[citation needed] entailment
4 What did Herr Gott, dich loben wir become known as ? He paraphrased the Te Deum as "Herr Gott, dich loben wir" with a simplified form of the melody. not_entailment

+ 6
- 0
tests/data_for_tests/io/QNLI/test.tsv View File

@@ -0,0 +1,6 @@
index question sentence
0 What organization is devoted to Jihad against Israel? For some decades prior to the First Palestine Intifada in 1987, the Muslim Brotherhood in Palestine took a "quiescent" stance towards Israel, focusing on preaching, education and social services, and benefiting from Israel's "indulgence" to build up a network of mosques and charitable organizations.
1 In what century was the Yarrow-Schlick-Tweedy balancing system used? In the late 19th century, the Yarrow-Schlick-Tweedy balancing 'system' was used on some marine triple expansion engines.
2 The largest brand of what store in the UK is located in Kingston Park? Close to Newcastle, the largest indoor shopping centre in Europe, the MetroCentre, is located in Gateshead.
3 What does the IPCC rely on for research? In principle, this means that any significant new evidence or events that change our understanding of climate science between this deadline and publication of an IPCC report cannot be included.
4 What is the principle about relating spin and space variables? Thus in the case of two fermions there is a strictly negative correlation between spatial and spin variables, whereas for two bosons (e.g. quanta of electromagnetic waves, photons) the correlation is strictly positive.

+ 6
- 0
tests/data_for_tests/io/QNLI/train.tsv View File

@@ -0,0 +1,6 @@
index question sentence label
0 When did the third Digimon series begin? Unlike the two seasons before it and most of the seasons that followed, Digimon Tamers takes a darker and more realistic approach to its story featuring Digimon who do not reincarnate after their deaths and more complex character development in the original Japanese. not_entailment
1 Which missile batteries often have individual launchers several kilometres from one another? When MANPADS is operated by specialists, batteries may have several dozen teams deploying separately in small sections; self-propelled air defence guns may deploy in pairs. not_entailment
2 What two things does Popper argue Tarski's theory involves in an evaluation of truth? He bases this interpretation on the fact that examples such as the one described above refer to two things: assertions and the facts to which they refer. entailment
3 What is the name of the village 9 miles north of Calafat where the Ottoman forces attacked the Russians? On 31 December 1853, the Ottoman forces at Calafat moved against the Russian force at Chetatea or Cetate, a small village nine miles north of Calafat, and engaged them on 6 January 1854. entailment
4 What famous palace is located in London? London contains four World Heritage Sites: the Tower of London; Kew Gardens; the site comprising the Palace of Westminster, Westminster Abbey, and St Margaret's Church; and the historic settlement of Greenwich (in which the Royal Observatory, Greenwich marks the Prime Meridian, 0° longitude, and GMT). not_entailment

+ 2
- 0
tests/data_for_tests/io/Quora/dev.tsv View File

@@ -0,0 +1,2 @@
1 How do I get funding for my web based startup idea ? How do I get seed funding pre product ? 327970
0 Is honey a viable alternative to sugar for diabetics ? How would you compare the United States ' euthanasia laws to Denmark ? 90348

+ 2
- 0
tests/data_for_tests/io/Quora/test.tsv View File

@@ -0,0 +1,2 @@
1 What should I do to avoid sleeping in class ? How do I not sleep in a boring class ? 50018
0 Do women support each other more than men do ? Do women need more compliments than men ? 126924

+ 2
- 0
tests/data_for_tests/io/Quora/train.tsv View File

@@ -0,0 +1,2 @@
1 What is your review of Hidden Figures -LRB- 2016 movie -RRB- ? What are your impressions of Hidden Figures -LRB- 2017 movie -RRB- ? 11877
0 Currently , all Supreme Court Justices come from very elite law schools , is it similar for the best lawyers in private practice ? What 's your type of jungle -LRB- concrete or nature -RRB- and why ? 221489

+ 6
- 0
tests/data_for_tests/io/R52/dev.csv View File

@@ -0,0 +1,6 @@
trade,canadians urge exemption u trade bill group canadian lawmakers ontario today asked u counterparts exempt canada mandatory trade retaliation provisions major trade bill considered u congress meeting northeast midwest coalition organization u legislators david cooke chairman ontario parliament select committee economic affairs said exemption would help trade relations trade legislation considered full house late april would require president reagan retaliate foreign unfair trade practices unless trade actions would harm u economy currently reagan reject trade sanctions grounds cooke member liberal party told u congressmen understand trade bill think concerns parts world would suggest best concerns canada consider country bill added canada united states largest trading partner two way trade billion dlrs according coalition u ran billion dlr deficit manufactured goods year compared billion dlr surplus services trade reuter
earn,american corp nd qtr feb shr profit one cts vs loss three cts net profit vs loss revs mln vs mln six months shr profit six cts vs loss six cts net profit mln vs loss mln revs mln vs mln note six months includes gain four cts change accounting principle reuter
earn,meyers co increases dividend qtly div eight cts vs seven cts prior payable may record april reuter
earn,meyers co year feb shr dlrs vs dlrs net mln dlrs vs mln revs mln vs mln note results reflect year month period company changed fiscal year end february march reuter
earn,kelly oil gas partners year dec shr cts vs cts net mln vs mln revs mln vs mln reuter
money-fx,japan seeks strengthen paris currency accord japan seek strengthen paris accord currency stability meeting group seven leading industrial nations tomorrow japanese officials said however officials japanese finance minister kiichi miyazawa asked identified would provide details wanted accord signed six leading industrial democracies february strengthened currency target zones reference ranges discussed g meeting scheduled tomorrow japanese officials said meeting held conjunction week international monetary fund world bank sessions currency pact need changing language used paris accord officials said miyazawa met u treasury secretary james baker early afternoon discussed dollar yen exchange rates officials said declined disclosed details discussion japanese officials also declined detail miyazawa baker discussed subject greater joint intervention currency markets stabilize dollar independent american intervention officials said money market action stabilize dollar benefit japan suffering sharp appreciation currency also benefit united states well u japan take steps boost domestic demand reduce trade surplus japan explain economic measures g officials said however miyazawa failed outline size japanese economic package meeting baker today japanese budget authorized parliament despite new fiscal year started april one officials said japan ruling liberal democratic party revealed economic package today calling billion yen additional spending reuter

+ 6
- 0
tests/data_for_tests/io/R52/test.csv View File

@@ -0,0 +1,6 @@
pet-chem,italy eni invest venezuelan projects italy state owned ente nazionale idrocarburi eni invest mln dlrs two joint ventures coal petroleos de venezuela eni president franco said speaking news conference said two projects eventually bring mln dlrs annually foreign exchange venezuela help diversify country export base joint ventures principal instrument allowing resources industrialized countries developing world lead future growth said eni subsidiary join petrochemical subsidiary pdvsa building mln dlr plant produce gasoline additive used increase octane levels mt per year plant jose eastern venezuela fed butane produced pdvsa eastern complex eni owns pct joint venture company super c pct remaining three pct sold private investors production set begin third quarter officials said plant one saudi arabia another eni subsidiary agip sign letter intent caracas tomorrow enter partnership pdvsa mine coal deposits western state said feasibility studies still done project definitive accord slated august added agip atlantic richfield coal arco subsidiary formed consortium pct project whose total cost estimated mln dlrs company said agip invest pct mln dlrs project said reuter
earn,republicbank rpt brazil loans republicbank corp said placed mln dlrs intermediate term loans brazil non accrual basis march said reclassification reduce first quarter earnings mln dlrs taxes mln dlrs taxes brazil change position moratorium interest payments republicbank also said net income first quarter expected mln dlrs cts share fully diluted basis year ago first quarter company earned mln dlrs cts share company also said first quarter results expected include provision loan losses mln dlrs mln dlrs net loan charge offs mln dlrs said provision increase loan losses mln dlrs pct loans republicbank total assets billion dlrs announced december agreement interfirst corp form first republicbank corp merger approved regulatory agencies stockholders would create th largest bank holding company united states reuter
acq,amoskeag bank seek rehearing amoskeag bank shares inc portsmouth savings bank said file rehearing new hampshire supreme court march ruling state regulatory approval amoskeag acquisition portsmouth decision believe go well beyond affiliation amoskeag portsmouth savings bank said amoskeag chairman william transaction opposed group portsmouth investors wanted bank remain independent according press reports reuter
strategic-metal,doe recommends special unit uranium energy secretary john herrington told congress federally chartered corporation would best way manage operate government uranium program said letter congressmen unless program run energy department improved sales worth five billion dlrs could lost program annual commercial sales one billion dlrs holds pct free world market services department official said world market uranium power utilities increasingly competitive private entity could better tap administration plan spin department uranium operation line effort reduce federal government role areas feels private enterprise could efficient reuter
earn,declares stock dividend financial corp said declared stock dividend one class share two class shares held payable may shareholders record april reuter
acq,allegheny ag shareholders file suit allegheny international inc agreed merge jointly formed first boston inc affiliate deal worth mn dlrs said shareholders preferred stock filed class action complaint company complaint alleges among things company board agreed pay first boston illegal seven mln dlr fee received higher offer company prior buyout suit fee allegheny ability attract offers take actions would benefit holders preferred stock complaint also alleges federal securities laws violations breach fiduciary duty suit requests injunction proceeding pending offer made sunter acquisition acquire allegheny sunter acquisition corp sunter holdings corp formed first boston allegheny allegheny said sunter concerns intend vigorously defend complaint charges complaints filed robert parties believed shares allegheny preferred stock reuter

+ 6
- 0
tests/data_for_tests/io/R52/train.csv View File

@@ -0,0 +1,6 @@
earn,convertible securities sets dividend convertible securities fund inc said board declared initial quarterly dividend three cents per share payable april shareholders record april said anticipates paying regular quarterly dividend company made initial public stock offering march five reuter
jobs,n z unemployment rate pct december quarter new zealand unemployment rate pct workforce quarter ended december unchanged revised pct preliminary pct previous quarter slightly pct year earlier quarter statistics department said department citing household labour force survey said statement number unemployed october december september quarter year earlier reuter
rubber,japan rubber stocks fall march japan rubber stocks fell tonnes march february march japan rubber trade association said stocks tonnes february year earlier comparisons march feb march crude rubber synthetic latex reuter
money-fx,south korean fixed month high bank korea said fixed dollar highest level since february set yesterday risen pct dollar far year rising pct reuter
copper,nippon mining lowers copper price nippon mining co ltd said lowered selling price electrolytic copper yen per tonne effective immediately reuter
ship,australian unions launch new south wales strikes australian trade unions said launched week long strikes industrial action new south wales nsw protest new laws would reduce injury compensation payments union sources said talks state government broke last night two sides scheduled meet later today attempt find compromise rail freight shipping cargo movements country state first affected union officials said almost every business sector hit unless quick settlement state government recently introduced new workers compensation act would cut cash benefits injured workers third act awaiting parliamentary ratification nsw state premier said workers compensation risen recent years proposed cuts would save hundreds mlns dollars year union officials said industrial action could spread states federal government also plans make sharp cuts workers compensation reuter

+ 6
- 0
tests/data_for_tests/io/R8/dev.csv View File

@@ -0,0 +1,6 @@
acq,amoskeag bank seek amoskeag bank shares inc portsmouth savings bank said file new hampshire supreme court march ruling state regulatory approval amoskeag acquisition portsmouth decision believe go well beyond affiliation amoskeag portsmouth savings bank said amoskeag chairman william transaction opposed group portsmouth investors wanted bank remain independent according press reports reuter
earn,declares stock dividend financial corp said declared stock dividend one class share two class shares held payable may shareholders record april reuter
acq,allegheny ag shareholders file suit allegheny international inc agreed merge jointly formed first boston inc affiliate deal worth mn dlrs said shareholders preferred stock filed class action complaint company complaint alleges among things company board agreed pay first boston illegal seven mln dlr fee received higher offer company prior buyout suit fee allegheny ability attract offers take actions would benefit holders preferred stock complaint also alleges federal securities laws violations fiduciary duty suit requests injunction proceeding pending offer made sunter acquisition acquire allegheny sunter acquisition corp sunter holdings corp formed first boston allegheny allegheny said sunter concerns intend vigorously defend complaint charges complaints filed robert parties believed shares allegheny preferred stock reuter
trade,canadians urge exemption u trade bill group canadian lawmakers ontario today asked u exempt canada mandatory trade retaliation provisions major trade bill considered u congress meeting northeast midwest coalition organization u legislators david cooke chairman ontario parliament select committee economic affairs said exemption would help trade relations trade legislation considered full house late april would require president reagan retaliate foreign unfair trade practices unless trade actions would harm u economy currently reagan reject trade sanctions grounds cooke member liberal party told u congressmen understand trade bill think concerns parts world would suggest best concerns canada consider country bill added canada united states largest trading partner two way trade billion dlrs according coalition u ran billion dlr deficit manufactured goods year compared billion dlr surplus services trade reuter
earn,american corp nd qtr feb shr profit one cts vs loss three cts net profit vs loss revs mln vs mln six months shr profit six cts vs loss six cts net profit mln vs loss mln revs mln vs mln note six months includes gain four cts change accounting principle reuter
earn,meyers co increases dividend qtly div eight cts vs seven cts prior payable may record april reuter

+ 6
- 0
tests/data_for_tests/io/R8/test.csv View File

@@ -0,0 +1,6 @@
earn,technology inc nd qtr march shr profit eight cts vs loss dlrs net profit vs loss revs mln vs avg shrs vs six mths shr loss nine cts vs loss dlrs net loss vs loss revs mln vs mln avg shrs vs reuter
earn,nacco industries report nd qtr gain nacco industries inc said report gain second quarter mln dlrs dlrs share sale stock subsidiary nacco said north american coal corp unit received notice consolidation coal co unit du pont co dd exercise option buy stock mining co subsidiary north american coal stock north american coal receive mln dlrs mln paid closing april rest company said addition pay dividend north american coal mln dlrs retained earnings closing funds previously used finance mining operations consolidation coal got option group utilities received option nacco nacco reported earnings mln dlrs dlrs share last year second quarter generated mln dlrs net income equal cts share nacco total earnings dlrs share produced mln short tons mln tons produced north american coal nacco said reuter
earn,buffton post investigation charge buffton corp said conduct investigation plant designated site result charge six cts per share second quarter year ago second quarter buffton reported net income cts share dlrs sales mln dlrs study completed nine months determine action may required inc plant former owner split cost buffton said share cost dlrs reuter
acq,american dynamics sell pct stake american dynamics corp meridian reserve inc said signed definitive agreement meridian buy mln shares pct american dynamics common stock terms agreement santa calif based meridian said pay based american dynamics one mln dlrs cash notes five years shares common stock meridian said option issue additional shares common next two years payment certain notes meridian oil gas company whose operations primarily oklahoma said acquisition increase consolidated assets mln dlrs committed gas reserves mln dlrs discounted present value american dynamics engaged gas gathering transmission liquids also oklahoma companies said five plants miles transmission lines five oklahoma counties reuter
money-fx,ussr exchange rates soviet state bank effective april roubles per hundred unless stated u stg unch fin unch yen aus aus dlr unch pak unch ind unch unch one unch unch
earn,republicbank rpt brazil loans republicbank corp said placed mln dlrs intermediate term loans brazil non accrual basis march said reclassification reduce first quarter earnings mln dlrs taxes mln dlrs taxes brazil change position moratorium interest payments republicbank also said net income first quarter expected mln dlrs cts share fully diluted basis year ago first quarter company earned mln dlrs cts share company also said first quarter results expected include provision loan losses mln dlrs mln dlrs net loan charge offs mln dlrs said provision increase loan losses mln dlrs pct loans republicbank total assets billion dlrs announced december agreement interfirst corp form first republicbank corp merger approved regulatory agencies stockholders would create th largest bank holding company united states reuter

+ 6
- 0
tests/data_for_tests/io/R8/train.csv View File

@@ -0,0 +1,6 @@
earn,meyers co year feb shr dlrs vs dlrs net mln dlrs vs mln revs mln vs mln note results reflect year month period company changed fiscal year end february march reuter
earn,kelly oil gas partners year dec shr cts vs cts net mln vs mln revs mln vs mln reuter
money-fx,japan seeks strengthen paris currency accord japan seek strengthen paris accord currency stability meeting group seven leading industrial nations tomorrow japanese officials said however officials japanese finance minister kiichi miyazawa asked identified would provide details wanted accord signed six leading industrial democracies february strengthened currency target zones reference ranges discussed g meeting scheduled tomorrow japanese officials said meeting held conjunction week international monetary fund world bank sessions currency pact need changing language used paris accord officials said miyazawa met u treasury secretary james baker early afternoon discussed dollar yen exchange rates officials said declined disclosed details discussion japanese officials also declined detail miyazawa baker discussed subject greater joint intervention currency markets stabilize dollar independent american intervention officials said money market action stabilize dollar benefit japan suffering sharp appreciation currency also benefit united states well u japan take steps boost domestic demand reduce trade surplus japan explain economic measures g officials said however miyazawa failed outline size japanese economic package meeting baker today japanese budget authorized parliament despite new fiscal year started april one officials said japan ruling liberal democratic party revealed economic package today calling billion yen additional spending reuter
earn,convertible securities sets dividend convertible securities fund inc said board declared initial quarterly dividend three cents per share payable april shareholders record april said anticipates paying regular quarterly dividend company made initial public stock offering march five reuter
money-fx,south korean fixed month high bank korea said fixed dollar highest level since february set yesterday risen pct dollar far year rising pct reuter
ship,australian unions launch new south wales strikes australian trade unions said launched week long strikes industrial action new south wales nsw protest new laws would reduce injury compensation payments union sources said talks state government broke last night two sides scheduled meet later today attempt find compromise rail freight shipping cargo movements country state first affected union officials said almost every business sector hit unless quick settlement state government recently introduced new workers compensation act would cut cash benefits injured workers third act awaiting parliamentary nsw state premier said workers compensation risen recent years proposed cuts would save hundreds dollars year union officials said industrial action could spread states federal government also plans make sharp cuts workers compensation reuter

+ 6
- 0
tests/data_for_tests/io/RTE/dev.tsv View File

@@ -0,0 +1,6 @@
index sentence1 sentence2 label
0 Dana Reeve, the widow of the actor Christopher Reeve, has died of lung cancer at age 44, according to the Christopher Reeve Foundation. Christopher Reeve had an accident. not_entailment
1 Yet, we now are discovering that antibiotics are losing their effectiveness against illness. Disease-causing bacteria are mutating faster than we can come up with new antibiotics to fight the new variations. Bacteria is winning the war against antibiotics. entailment
2 Cairo is now home to some 15 million people - a burgeoning population that produces approximately 10,000 tonnes of rubbish per day, putting an enormous strain on public services. In the past 10 years, the government has tried hard to encourage private investment in the refuse sector, but some estimate 4,000 tonnes of waste is left behind every day, festering in the heat as it waits for someone to clear it up. It is often the people in the poorest neighbourhoods that are worst affected. But in some areas they are fighting back. In Shubra, one of the northern districts of the city, the residents have taken to the streets armed with dustpans and brushes to clean up public areas which have been used as public dumps. 15 million tonnes of rubbish are produced daily in Cairo. not_entailment
3 The Amish community in Pennsylvania, which numbers about 55,000, lives an agrarian lifestyle, shunning technological advances like electricity and automobiles. And many say their insular lifestyle gives them a sense that they are protected from the violence of American society. But as residents gathered near the school, some wearing traditional garb and arriving in horse-drawn buggies, they said that sense of safety had been shattered. "If someone snaps and wants to do something stupid, there's no distance that's going to stop them," said Jake King, 56, an Amish lantern maker who knew several families whose children had been shot. Pennsylvania has the biggest Amish community in the U.S. not_entailment
4 Security forces were on high alert after an election campaign in which more than 1,000 people, including seven election candidates, have been killed. Security forces were on high alert after a campaign marred by violence. entailment

+ 6
- 0
tests/data_for_tests/io/RTE/test.tsv View File

@@ -0,0 +1,6 @@
index sentence1 sentence2
0 Mangla was summoned after Madhumita's sister Nidhi Shukla, who was the first witness in the case. Shukla is related to Mangla.
1 Authorities in Brazil say that more than 200 people are being held hostage in a prison in the country's remote, Amazonian-jungle state of Rondonia. Authorities in Brazil hold 200 people as hostage.
2 A mercenary group faithful to the warmongering policy of former Somozist colonel Enrique Bermudez attacked an IFA truck belonging to the interior ministry at 0900 on 26 March in El Jicote, wounded and killed an interior ministry worker and wounded five others. An interior ministry worker was killed by a mercenary group.
3 The British ambassador to Egypt, Derek Plumbly, told Reuters on Monday that authorities had compiled the list of 10 based on lists from tour companies and from families whose relatives have not been in contact since the bombings. Derek Plumbly resides in Egypt.
4 Tibone estimated diamond production at four mines operated by Debswana -- Botswana's 50-50 joint venture with De Beers -- could reach 33 million carats this year. Botswana is a business partner of De Beers.

+ 6
- 0
tests/data_for_tests/io/RTE/train.tsv View File

@@ -0,0 +1,6 @@
index sentence1 sentence2 label
0 No Weapons of Mass Destruction Found in Iraq Yet. Weapons of Mass Destruction Found in Iraq. not_entailment
1 A place of sorrow, after Pope John Paul II died, became a place of celebration, as Roman Catholic faithful gathered in downtown Chicago to mark the installation of new Pope Benedict XVI. Pope Benedict XVI is the new leader of the Roman Catholic Church. entailment
2 Herceptin was already approved to treat the sickest breast cancer patients, and the company said, Monday, it will discuss with federal regulators the possibility of prescribing the drug for more breast cancer patients. Herceptin can be used to treat breast cancer. entailment
3 Judie Vivian, chief executive at ProMedica, a medical service company that helps sustain the 2-year-old Vietnam Heart Institute in Ho Chi Minh City (formerly Saigon), said that so far about 1,500 children have received treatment. The previous name of Ho Chi Minh City was Saigon. entailment
4 A man is due in court later charged with the murder 26 years ago of a teenager whose case was the first to be featured on BBC One's Crimewatch. Colette Aram, 16, was walking to her boyfriend's house in Keyworth, Nottinghamshire, on 30 October 1983 when she disappeared. Her body was later found in a field close to her home. Paul Stewart Hutchinson, 50, has been charged with murder and is due before Nottingham magistrates later. Paul Stewart Hutchinson is accused of having stabbed a girl. not_entailment

+ 5
- 0
tests/data_for_tests/io/SNLI/snli_1.0_dev.jsonl View File

@@ -0,0 +1,5 @@
{"annotator_labels": ["neutral", "entailment", "neutral", "neutral", "neutral"], "captionID": "4705552913.jpg#2", "gold_label": "neutral", "pairID": "4705552913.jpg#2r1n", "sentence1": "Two women are embracing while holding to go packages.", "sentence1_binary_parse": "( ( Two women ) ( ( are ( embracing ( while ( holding ( to ( go packages ) ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (CD Two) (NNS women)) (VP (VBP are) (VP (VBG embracing) (SBAR (IN while) (S (NP (VBG holding)) (VP (TO to) (VP (VB go) (NP (NNS packages)))))))) (. .)))", "sentence2": "The sisters are hugging goodbye while holding to go packages after just eating lunch.", "sentence2_binary_parse": "( ( The sisters ) ( ( are ( ( hugging goodbye ) ( while ( holding ( to ( ( go packages ) ( after ( just ( eating lunch ) ) ) ) ) ) ) ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (DT The) (NNS sisters)) (VP (VBP are) (VP (VBG hugging) (NP (UH goodbye)) (PP (IN while) (S (VP (VBG holding) (S (VP (TO to) (VP (VB go) (NP (NNS packages)) (PP (IN after) (S (ADVP (RB just)) (VP (VBG eating) (NP (NN lunch))))))))))))) (. .)))"}
{"annotator_labels": ["entailment", "entailment", "entailment", "entailment", "entailment"], "captionID": "4705552913.jpg#2", "gold_label": "entailment", "pairID": "4705552913.jpg#2r1e", "sentence1": "Two women are embracing while holding to go packages.", "sentence1_binary_parse": "( ( Two women ) ( ( are ( embracing ( while ( holding ( to ( go packages ) ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (CD Two) (NNS women)) (VP (VBP are) (VP (VBG embracing) (SBAR (IN while) (S (NP (VBG holding)) (VP (TO to) (VP (VB go) (NP (NNS packages)))))))) (. .)))", "sentence2": "Two woman are holding packages.", "sentence2_binary_parse": "( ( Two woman ) ( ( are ( holding packages ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (CD Two) (NN woman)) (VP (VBP are) (VP (VBG holding) (NP (NNS packages)))) (. .)))"}
{"annotator_labels": ["contradiction", "contradiction", "contradiction", "contradiction", "contradiction"], "captionID": "4705552913.jpg#2", "gold_label": "contradiction", "pairID": "4705552913.jpg#2r1c", "sentence1": "Two women are embracing while holding to go packages.", "sentence1_binary_parse": "( ( Two women ) ( ( are ( embracing ( while ( holding ( to ( go packages ) ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (CD Two) (NNS women)) (VP (VBP are) (VP (VBG embracing) (SBAR (IN while) (S (NP (VBG holding)) (VP (TO to) (VP (VB go) (NP (NNS packages)))))))) (. .)))", "sentence2": "The men are fighting outside a deli.", "sentence2_binary_parse": "( ( The men ) ( ( are ( fighting ( outside ( a deli ) ) ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (DT The) (NNS men)) (VP (VBP are) (VP (VBG fighting) (PP (IN outside) (NP (DT a) (NNS deli))))) (. .)))"}
{"annotator_labels": ["entailment", "entailment", "entailment", "entailment", "entailment"], "captionID": "2407214681.jpg#0", "gold_label": "entailment", "pairID": "2407214681.jpg#0r1e", "sentence1": "Two young children in blue jerseys, one with the number 9 and one with the number 2 are standing on wooden steps in a bathroom and washing their hands in a sink.", "sentence1_binary_parse": "( ( ( Two ( young children ) ) ( in ( ( ( ( ( blue jerseys ) , ) ( one ( with ( the ( number 9 ) ) ) ) ) and ) ( one ( with ( the ( number 2 ) ) ) ) ) ) ) ( ( are ( ( ( standing ( on ( ( wooden steps ) ( in ( a bathroom ) ) ) ) ) and ) ( ( washing ( their hands ) ) ( in ( a sink ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (NP (CD Two) (JJ young) (NNS children)) (PP (IN in) (NP (NP (JJ blue) (NNS jerseys)) (, ,) (NP (NP (CD one)) (PP (IN with) (NP (DT the) (NN number) (CD 9)))) (CC and) (NP (NP (CD one)) (PP (IN with) (NP (DT the) (NN number) (CD 2))))))) (VP (VBP are) (VP (VP (VBG standing) (PP (IN on) (NP (NP (JJ wooden) (NNS steps)) (PP (IN in) (NP (DT a) (NN bathroom)))))) (CC and) (VP (VBG washing) (NP (PRP$ their) (NNS hands)) (PP (IN in) (NP (DT a) (NN sink)))))) (. .)))", "sentence2": "Two kids in numbered jerseys wash their hands.", "sentence2_binary_parse": "( ( ( Two kids ) ( in ( numbered jerseys ) ) ) ( ( wash ( their hands ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (NP (CD Two) (NNS kids)) (PP (IN in) (NP (JJ numbered) (NNS jerseys)))) (VP (VBP wash) (NP (PRP$ their) (NNS hands))) (. .)))"}
{"annotator_labels": ["neutral", "neutral", "neutral", "entailment", "entailment"], "captionID": "2407214681.jpg#0", "gold_label": "neutral", "pairID": "2407214681.jpg#0r1n", "sentence1": "Two young children in blue jerseys, one with the number 9 and one with the number 2 are standing on wooden steps in a bathroom and washing their hands in a sink.", "sentence1_binary_parse": "( ( ( Two ( young children ) ) ( in ( ( ( ( ( blue jerseys ) , ) ( one ( with ( the ( number 9 ) ) ) ) ) and ) ( one ( with ( the ( number 2 ) ) ) ) ) ) ) ( ( are ( ( ( standing ( on ( ( wooden steps ) ( in ( a bathroom ) ) ) ) ) and ) ( ( washing ( their hands ) ) ( in ( a sink ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (NP (CD Two) (JJ young) (NNS children)) (PP (IN in) (NP (NP (JJ blue) (NNS jerseys)) (, ,) (NP (NP (CD one)) (PP (IN with) (NP (DT the) (NN number) (CD 9)))) (CC and) (NP (NP (CD one)) (PP (IN with) (NP (DT the) (NN number) (CD 2))))))) (VP (VBP are) (VP (VP (VBG standing) (PP (IN on) (NP (NP (JJ wooden) (NNS steps)) (PP (IN in) (NP (DT a) (NN bathroom)))))) (CC and) (VP (VBG washing) (NP (PRP$ their) (NNS hands)) (PP (IN in) (NP (DT a) (NN sink)))))) (. .)))", "sentence2": "Two kids at a ballgame wash their hands.", "sentence2_binary_parse": "( ( ( Two kids ) ( at ( a ballgame ) ) ) ( ( wash ( their hands ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (NP (CD Two) (NNS kids)) (PP (IN at) (NP (DT a) (NN ballgame)))) (VP (VBP wash) (NP (PRP$ their) (NNS hands))) (. .)))"}

+ 5
- 0
tests/data_for_tests/io/SNLI/snli_1.0_test.jsonl View File

@@ -0,0 +1,5 @@
{"annotator_labels": ["neutral", "contradiction", "contradiction", "neutral", "neutral"], "captionID": "2677109430.jpg#1", "gold_label": "neutral", "pairID": "2677109430.jpg#1r1n", "sentence1": "This church choir sings to the masses as they sing joyous songs from the book at a church.", "sentence1_binary_parse": "( ( This ( church choir ) ) ( ( ( sings ( to ( the masses ) ) ) ( as ( they ( ( sing ( joyous songs ) ) ( from ( ( the book ) ( at ( a church ) ) ) ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (DT This) (NN church) (NN choir)) (VP (VBZ sings) (PP (TO to) (NP (DT the) (NNS masses))) (SBAR (IN as) (S (NP (PRP they)) (VP (VBP sing) (NP (JJ joyous) (NNS songs)) (PP (IN from) (NP (NP (DT the) (NN book)) (PP (IN at) (NP (DT a) (NN church))))))))) (. .)))", "sentence2": "The church has cracks in the ceiling.", "sentence2_binary_parse": "( ( The church ) ( ( has ( cracks ( in ( the ceiling ) ) ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (DT The) (NN church)) (VP (VBZ has) (NP (NP (NNS cracks)) (PP (IN in) (NP (DT the) (NN ceiling))))) (. .)))"}
{"annotator_labels": ["entailment", "entailment", "entailment", "neutral", "entailment"], "captionID": "2677109430.jpg#1", "gold_label": "entailment", "pairID": "2677109430.jpg#1r1e", "sentence1": "This church choir sings to the masses as they sing joyous songs from the book at a church.", "sentence1_binary_parse": "( ( This ( church choir ) ) ( ( ( sings ( to ( the masses ) ) ) ( as ( they ( ( sing ( joyous songs ) ) ( from ( ( the book ) ( at ( a church ) ) ) ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (DT This) (NN church) (NN choir)) (VP (VBZ sings) (PP (TO to) (NP (DT the) (NNS masses))) (SBAR (IN as) (S (NP (PRP they)) (VP (VBP sing) (NP (JJ joyous) (NNS songs)) (PP (IN from) (NP (NP (DT the) (NN book)) (PP (IN at) (NP (DT a) (NN church))))))))) (. .)))", "sentence2": "The church is filled with song.", "sentence2_binary_parse": "( ( The church ) ( ( is ( filled ( with song ) ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (DT The) (NN church)) (VP (VBZ is) (VP (VBN filled) (PP (IN with) (NP (NN song))))) (. .)))"}
{"annotator_labels": ["contradiction", "contradiction", "contradiction", "contradiction", "contradiction"], "captionID": "2677109430.jpg#1", "gold_label": "contradiction", "pairID": "2677109430.jpg#1r1c", "sentence1": "This church choir sings to the masses as they sing joyous songs from the book at a church.", "sentence1_binary_parse": "( ( This ( church choir ) ) ( ( ( sings ( to ( the masses ) ) ) ( as ( they ( ( sing ( joyous songs ) ) ( from ( ( the book ) ( at ( a church ) ) ) ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (DT This) (NN church) (NN choir)) (VP (VBZ sings) (PP (TO to) (NP (DT the) (NNS masses))) (SBAR (IN as) (S (NP (PRP they)) (VP (VBP sing) (NP (JJ joyous) (NNS songs)) (PP (IN from) (NP (NP (DT the) (NN book)) (PP (IN at) (NP (DT a) (NN church))))))))) (. .)))", "sentence2": "A choir singing at a baseball game.", "sentence2_binary_parse": "( ( ( A choir ) ( singing ( at ( a ( baseball game ) ) ) ) ) . )", "sentence2_parse": "(ROOT (NP (NP (DT A) (NN choir)) (VP (VBG singing) (PP (IN at) (NP (DT a) (NN baseball) (NN game)))) (. .)))"}
{"annotator_labels": ["neutral", "neutral", "neutral", "neutral", "neutral"], "captionID": "6160193920.jpg#4", "gold_label": "neutral", "pairID": "6160193920.jpg#4r1n", "sentence1": "A woman with a green headscarf, blue shirt and a very big grin.", "sentence1_binary_parse": "( ( ( A woman ) ( with ( ( ( ( ( a ( green headscarf ) ) , ) ( blue shirt ) ) and ) ( a ( ( very big ) grin ) ) ) ) ) . )", "sentence1_parse": "(ROOT (NP (NP (DT A) (NN woman)) (PP (IN with) (NP (NP (DT a) (JJ green) (NN headscarf)) (, ,) (NP (JJ blue) (NN shirt)) (CC and) (NP (DT a) (ADJP (RB very) (JJ big)) (NN grin)))) (. .)))", "sentence2": "The woman is young.", "sentence2_binary_parse": "( ( The woman ) ( ( is young ) . ) )", "sentence2_parse": "(ROOT (S (NP (DT The) (NN woman)) (VP (VBZ is) (ADJP (JJ young))) (. .)))"}
{"annotator_labels": ["entailment", "entailment", "contradiction", "entailment", "neutral"], "captionID": "6160193920.jpg#4", "gold_label": "entailment", "pairID": "6160193920.jpg#4r1e", "sentence1": "A woman with a green headscarf, blue shirt and a very big grin.", "sentence1_binary_parse": "( ( ( A woman ) ( with ( ( ( ( ( a ( green headscarf ) ) , ) ( blue shirt ) ) and ) ( a ( ( very big ) grin ) ) ) ) ) . )", "sentence1_parse": "(ROOT (NP (NP (DT A) (NN woman)) (PP (IN with) (NP (NP (DT a) (JJ green) (NN headscarf)) (, ,) (NP (JJ blue) (NN shirt)) (CC and) (NP (DT a) (ADJP (RB very) (JJ big)) (NN grin)))) (. .)))", "sentence2": "The woman is very happy.", "sentence2_binary_parse": "( ( The woman ) ( ( is ( very happy ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (DT The) (NN woman)) (VP (VBZ is) (ADJP (RB very) (JJ happy))) (. .)))"}

+ 5
- 0
tests/data_for_tests/io/SNLI/snli_1.0_train.jsonl View File

@@ -0,0 +1,5 @@
{"annotator_labels": ["neutral"], "captionID": "3416050480.jpg#4", "gold_label": "neutral", "pairID": "3416050480.jpg#4r1n", "sentence1": "A person on a horse jumps over a broken down airplane.", "sentence1_binary_parse": "( ( ( A person ) ( on ( a horse ) ) ) ( ( jumps ( over ( a ( broken ( down airplane ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (NP (DT A) (NN person)) (PP (IN on) (NP (DT a) (NN horse)))) (VP (VBZ jumps) (PP (IN over) (NP (DT a) (JJ broken) (JJ down) (NN airplane)))) (. .)))", "sentence2": "A person is training his horse for a competition.", "sentence2_binary_parse": "( ( A person ) ( ( is ( ( training ( his horse ) ) ( for ( a competition ) ) ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) (VP (VBG training) (NP (PRP$ his) (NN horse)) (PP (IN for) (NP (DT a) (NN competition))))) (. .)))"}
{"annotator_labels": ["contradiction"], "captionID": "3416050480.jpg#4", "gold_label": "contradiction", "pairID": "3416050480.jpg#4r1c", "sentence1": "A person on a horse jumps over a broken down airplane.", "sentence1_binary_parse": "( ( ( A person ) ( on ( a horse ) ) ) ( ( jumps ( over ( a ( broken ( down airplane ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (NP (DT A) (NN person)) (PP (IN on) (NP (DT a) (NN horse)))) (VP (VBZ jumps) (PP (IN over) (NP (DT a) (JJ broken) (JJ down) (NN airplane)))) (. .)))", "sentence2": "A person is at a diner, ordering an omelette.", "sentence2_binary_parse": "( ( A person ) ( ( ( ( is ( at ( a diner ) ) ) , ) ( ordering ( an omelette ) ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) (PP (IN at) (NP (DT a) (NN diner))) (, ,) (S (VP (VBG ordering) (NP (DT an) (NN omelette))))) (. .)))"}
{"annotator_labels": ["entailment"], "captionID": "3416050480.jpg#4", "gold_label": "entailment", "pairID": "3416050480.jpg#4r1e", "sentence1": "A person on a horse jumps over a broken down airplane.", "sentence1_binary_parse": "( ( ( A person ) ( on ( a horse ) ) ) ( ( jumps ( over ( a ( broken ( down airplane ) ) ) ) ) . ) )", "sentence1_parse": "(ROOT (S (NP (NP (DT A) (NN person)) (PP (IN on) (NP (DT a) (NN horse)))) (VP (VBZ jumps) (PP (IN over) (NP (DT a) (JJ broken) (JJ down) (NN airplane)))) (. .)))", "sentence2": "A person is outdoors, on a horse.", "sentence2_binary_parse": "( ( A person ) ( ( ( ( is outdoors ) , ) ( on ( a horse ) ) ) . ) )", "sentence2_parse": "(ROOT (S (NP (DT A) (NN person)) (VP (VBZ is) (ADVP (RB outdoors)) (, ,) (PP (IN on) (NP (DT a) (NN horse)))) (. .)))"}
{"annotator_labels": ["neutral"], "captionID": "2267923837.jpg#2", "gold_label": "neutral", "pairID": "2267923837.jpg#2r1n", "sentence1": "Children smiling and waving at camera", "sentence1_binary_parse": "( Children ( ( ( smiling and ) waving ) ( at camera ) ) )", "sentence1_parse": "(ROOT (NP (S (NP (NNP Children)) (VP (VBG smiling) (CC and) (VBG waving) (PP (IN at) (NP (NN camera)))))))", "sentence2": "They are smiling at their parents", "sentence2_binary_parse": "( They ( are ( smiling ( at ( their parents ) ) ) ) )", "sentence2_parse": "(ROOT (S (NP (PRP They)) (VP (VBP are) (VP (VBG smiling) (PP (IN at) (NP (PRP$ their) (NNS parents)))))))"}
{"annotator_labels": ["entailment"], "captionID": "2267923837.jpg#2", "gold_label": "entailment", "pairID": "2267923837.jpg#2r1e", "sentence1": "Children smiling and waving at camera", "sentence1_binary_parse": "( Children ( ( ( smiling and ) waving ) ( at camera ) ) )", "sentence1_parse": "(ROOT (NP (S (NP (NNP Children)) (VP (VBG smiling) (CC and) (VBG waving) (PP (IN at) (NP (NN camera)))))))", "sentence2": "There are children present", "sentence2_binary_parse": "( There ( ( are children ) present ) )", "sentence2_parse": "(ROOT (S (NP (EX There)) (VP (VBP are) (NP (NNS children)) (ADVP (RB present)))))"}

+ 6
- 0
tests/data_for_tests/io/SST-2/dev.tsv View File

@@ -0,0 +1,6 @@
sentence label
it 's a charming and often affecting journey . 1
unflinchingly bleak and desperate 0
allows us to hope that nolan is poised to embark a major career as a commercial yet inventive filmmaker . 1
the acting , costumes , music , cinematography and sound are all astounding given the production 's austere locales . 1
it 's slow -- very , very slow . 0

+ 6
- 0
tests/data_for_tests/io/SST-2/test.tsv View File

@@ -0,0 +1,6 @@
index sentence
0 uneasy mishmash of styles and genres .
1 this film 's relationship to actual tension is the same as what christmas-tree flocking in a spray can is to actual snow : a poor -- if durable -- imitation .
2 by the end of no such thing the audience , like beatrice , has a watchful affection for the monster .
3 director rob marshall went out gunning to make a great one .
4 lathan and diggs have considerable personal charm , and their screen rapport makes the old story seem new .

+ 6
- 0
tests/data_for_tests/io/SST-2/train.tsv View File

@@ -0,0 +1,6 @@
sentence label
hide new secretions from the parental units 0
contains no wit , only labored gags 0
that loves its characters and communicates something rather beautiful about human nature 1
remains utterly satisfied to remain the same throughout 0
on the worst revenge-of-the-nerds clichés the filmmakers could dredge up 0

+ 6
- 0
tests/data_for_tests/io/SST/dev.txt View File

@@ -0,0 +1,6 @@
(3 (2 It) (4 (4 (2 's) (4 (3 (2 a) (4 (3 lovely) (2 film))) (3 (2 with) (4 (3 (3 lovely) (2 performances)) (2 (2 by) (2 (2 (2 Buy) (2 and)) (2 Accorsi))))))) (2 .)))
(2 (2 (1 No) (2 one)) (1 (1 (2 goes) (2 (1 (2 (2 unindicted) (2 here)) (2 ,)) (2 (2 which) (3 (2 (2 is) (2 probably)) (3 (2 for) (4 (2 the) (4 best))))))) (2 .)))
(3 (2 And) (4 (3 (2 if) (1 (2 you) (1 (2 (2 (2 're) (1 not)) (2 nearly)) (4 (3 (3 moved) (2 (2 to) (1 tears))) (2 (2 by) (2 (2 (2 a) (2 couple)) (2 (2 of) (2 scenes)))))))) (2 (2 ,) (2 (2 you) (2 (2 (2 've) (1 (2 got) (2 (3 (2 ice) (2 water)) (2 (2 in) (2 (2 your) (2 veins)))))) (2 .))))))
(4 (4 (2 A) (4 (3 (3 warm) (2 ,)) (3 funny))) (3 (2 ,) (3 (4 (4 engaging) (2 film)) (2 .))))
(4 (3 (2 Uses) (3 (3 (4 (3 sharp) (4 (3 (4 humor) (2 and)) (2 insight))) (2 (2 into) (3 (2 human) (2 nature)))) (2 (2 to) (2 (2 examine) (2 (2 class) (1 conflict)))))) (2 (2 ,) (2 (2 (2 adolescent) (2 (2 (2 yearning) (2 ,)) (3 (2 (2 the) (2 roots)) (3 (2 of) (2 (2 friendship) (2 (2 and) (2 (2 sexual) (2 identity)))))))) (2 .))))
(2 (2 (2 Half) (1 (2 (2 (2 (2 (2 Submarine) (2 flick)) (2 ,)) (2 (2 Half) (2 (2 Ghost) (2 Story)))) (2 ,)) (2 (2 All) (2 (2 in) (2 (2 one) (2 criminally)))))) (1 (1 neglected) (2 film)))

+ 6
- 0
tests/data_for_tests/io/SST/test.txt View File

@@ -0,0 +1,6 @@
(2 (3 (3 Effective) (2 but)) (1 (1 too-tepid) (2 biopic)))
(3 (3 (2 If) (3 (2 you) (3 (2 sometimes) (2 (2 like) (3 (2 to) (3 (3 (2 go) (2 (2 to) (2 (2 the) (2 movies)))) (3 (2 to) (3 (2 have) (4 fun))))))))) (2 (2 ,) (2 (2 Wasabi) (3 (3 (2 is) (2 (2 a) (2 (3 good) (2 (2 place) (2 (2 to) (2 start)))))) (2 .)))))
(4 (4 (4 (3 (2 Emerges) (3 (2 as) (3 (2 something) (3 rare)))) (2 ,)) (4 (2 (2 an) (2 (2 issue) (2 movie))) (3 (2 that) (3 (3 (2 's) (4 (3 (3 (2 so) (4 honest)) (2 and)) (3 (2 keenly) (2 observed)))) (2 (2 that) (2 (2 it) (2 (1 (2 does) (2 n't)) (2 (2 feel) (2 (2 like) (2 one)))))))))) (2 .))
(2 (2 (2 The) (2 film)) (3 (3 (3 (3 provides) (2 (2 some) (3 (4 great) (2 insight)))) (3 (2 into) (3 (2 (2 the) (2 (2 neurotic) (2 mindset))) (3 (2 of) (2 (2 (2 (2 (2 all) (2 comics)) (2 --)) (2 even)) (3 (2 those) (4 (2 who) (4 (2 have) (4 (2 reached) (4 (4 (2 the) (3 (2 absolute) (2 top))) (2 (2 of) (2 (2 the) (2 game))))))))))))) (2 .)))
(4 (4 (2 Offers) (3 (3 (2 that) (3 (3 rare) (2 combination))) (2 (2 of) (3 (3 (3 entertainment) (2 and)) (2 education))))) (2 .))
(3 (2 Perhaps) (4 (2 (1 (1 no) (2 picture)) (2 (2 ever) (2 made))) (3 (3 (2 (2 has) (2 (2 more) (3 literally))) (3 (2 showed) (2 (2 that) (2 (1 (2 (2 the) (1 road)) (1 (2 to) (0 hell))) (3 (2 is) (3 (2 paved) (3 (2 with) (3 (3 good) (2 intentions))))))))) (2 .))))

+ 6
- 0
tests/data_for_tests/io/SST/train.txt View File

@@ -0,0 +1,6 @@
(3 (2 (2 The) (2 Rock)) (4 (3 (2 is) (4 (2 destined) (2 (2 (2 (2 (2 to) (2 (2 be) (2 (2 the) (2 (2 21st) (2 (2 (2 Century) (2 's)) (2 (3 new) (2 (2 ``) (2 Conan)))))))) (2 '')) (2 and)) (3 (2 that) (3 (2 he) (3 (2 's) (3 (2 going) (3 (2 to) (4 (3 (2 make) (3 (3 (2 a) (3 splash)) (2 (2 even) (3 greater)))) (2 (2 than) (2 (2 (2 (2 (1 (2 Arnold) (2 Schwarzenegger)) (2 ,)) (2 (2 Jean-Claud) (2 (2 Van) (2 Damme)))) (2 or)) (2 (2 Steven) (2 Segal))))))))))))) (2 .)))
(4 (4 (4 (2 The) (4 (3 gorgeously) (3 (2 elaborate) (2 continuation)))) (2 (2 (2 of) (2 ``)) (2 (2 The) (2 (2 (2 Lord) (2 (2 of) (2 (2 the) (2 Rings)))) (2 (2 '') (2 trilogy)))))) (2 (3 (2 (2 is) (2 (2 so) (2 huge))) (2 (2 that) (3 (2 (2 (2 a) (2 column)) (2 (2 of) (2 words))) (2 (2 (2 (2 can) (1 not)) (3 adequately)) (2 (2 describe) (2 (3 (2 (2 co-writer\/director) (2 (2 Peter) (3 (2 Jackson) (2 's)))) (3 (2 expanded) (2 vision))) (2 (2 of) (2 (2 (2 J.R.R.) (2 (2 Tolkien) (2 's))) (2 Middle-earth))))))))) (2 .)))
(3 (3 (2 (2 (2 (2 (2 Singer\/composer) (2 (2 Bryan) (2 Adams))) (2 (2 contributes) (2 (2 (2 a) (2 slew)) (2 (2 of) (2 songs))))) (2 (2 --) (2 (2 (2 (2 a) (2 (2 few) (3 potential))) (2 (2 (2 hits) (2 ,)) (2 (2 (2 a) (2 few)) (1 (1 (2 more) (1 (2 simply) (2 intrusive))) (2 (2 to) (2 (2 the) (2 story))))))) (2 --)))) (2 but)) (3 (4 (2 the) (3 (2 whole) (2 package))) (2 (3 certainly) (3 (2 captures) (2 (1 (2 the) (2 (2 (2 intended) (2 (2 ,) (2 (2 er) (2 ,)))) (3 spirit))) (2 (2 of) (2 (2 the) (2 piece)))))))) (2 .))
(2 (2 (2 You) (2 (2 'd) (2 (2 think) (2 (2 by) (2 now))))) (2 (2 America) (2 (2 (2 would) (1 (2 have) (2 (2 (2 had) (1 (2 enough) (2 (2 of) (2 (2 plucky) (2 (2 British) (1 eccentrics)))))) (4 (2 with) (4 (3 hearts) (3 (2 of) (3 gold))))))) (2 .))))
(3 (2 ``) (3 (2 Frailty) (4 (2 '') (3 (4 (3 (2 has) (3 (2 been) (3 (4 (3 (3 written) (3 (2 so) (3 well))) (2 ,)) (2 (2 (2 that) (2 even)) (1 (2 (2 a) (2 simple)) (1 (2 ``) (0 Goddammit))))))) (2 !)) (2 '')))))
(4 (2 (2 Whether) (2 (2 (2 (2 or) (1 not)) (3 (2 you) (2 (2 're) (3 (3 enlightened) (2 (2 by) (2 (2 any) (2 (2 of) (2 (2 Derrida) (2 's))))))))) (2 (2 lectures) (2 (2 on) (2 (2 ``) (2 (2 (2 (2 (2 (2 the) (2 other)) (2 '')) (2 and)) (2 ``)) (2 (2 the) (2 self)))))))) (3 (2 ,) (3 (2 '') (3 (2 Derrida) (3 (3 (2 is) (4 (2 an) (4 (4 (2 undeniably) (3 (4 (3 fascinating) (2 and)) (4 playful))) (2 fellow)))) (2 .))))))

+ 9
- 0
tests/data_for_tests/io/THUCNews/dev.txt View File

@@ -0,0 +1,9 @@
体育 调查-您如何评价热火客场胜绿军总分3-1夺赛点?新浪体育讯四年了,终于赢球了,热火在凯尔特人的主场经过加时98-90艰难战胜对手,总比分3-1领先,詹姆斯拿下35分14个篮板,韦德28分9篮板,波什20分12个篮板。您如何评价这场比赛?
娱乐 盘点好莱坞明星新年目标 布兰妮迪亚兹在列(图)新年伊始,又是制定新一年目标的时候了。大到关注环保、寻找真爱,小到改掉坏毛病、改变生活习惯,这些都是美国演艺明星在2009年中的目标。●告别烟圈好莱坞女星卡梅隆·迪亚兹计划在新的一年戒烟,和她目标相同者还有《实习医生格蕾》中的凯瑟琳·海格尔及《飞跃贝弗利》中的布莱恩·奥斯汀·格林。格林说:“每年我似乎都说要戒烟,看看今年行不行吧。”●不咬指甲女歌手布兰妮( 听歌)希望自己“改掉咬手指甲的毛病”。此外,她还表示:“我希望自己不再焦虑,以前的我无时无刻不在焦虑中,我要学会让自己幸福。”●寻觅真爱凭借《灵魂歌王》一片夺得2005年奥斯卡()奖的杰米·福克斯希望自己能在2009年找到真爱。●回归平静去年刚刚与男友分手的影星安妮·海瑟薇则希望过上平静的生活。●享受滑雪因出演《灵异第六感》而一举成名的影星黑利·乔尔·奥斯门特的最大愿望就是重拾自己滑雪的爱好,并从美国犹他州的某座高山上直冲而下。●致力环保曾主演《异形》和《冰风暴》等片的女演员西戈尼·威弗表示要为环保事业贡献力量。她说:“我不再使用塑料袋,手头现有的这些我也要循环使用。”●亲近素食《绝望主妇》中的伊娃·朗格利亚的目标是努力尝试吃素。●活络筋骨热门电视剧《汉娜·蒙塔娜》的主角麦莉·赛勒斯关心的问题则是“多做运动”。●回馈世界要说计划最为抽象的当数帕丽斯·希尔顿,她说:“我已经长大了,成熟了,我要怀着一颗感恩的心,开始回馈世界。”●计划“计划”1983年出演《战争游戏》的马修·布罗德里克的新年计划最别具一格,他的计划就是在2009年“拟订计划”。○据新华社
家居 蓝景丽家尹勃乐居思路清晰 创新开拓(图)     新浪家居谢娟讯  10月16日,易居中国与新浪合资公司中国房产信息集团(简称CRIC)在美国纳斯达克成功上市。此消息一出,家居业界大腕在分享喜悦的同时,纷纷来电来函,向中国房产信息集团成功登陆纳斯达克表示祝贺,同时对CRIC在未来发展提出了中肯的建议和期待。新浪家居电话连线业内数位大腕,倾听他们对此事的看法,以及对中国房产信息集团上市寄语。【CRIC(中国房产信息集团)纳斯达克挂牌上市】       采访嘉宾:蓝景丽家总经理 尹勃         新浪家居:您好,尹总,我是新浪乐居家居频道编辑谢娟,感谢您接受本次访谈。   尹勃:您好。       新浪家居:北京时间2009年10月16日,易居中国与新浪合资公司中国房产信息集团在美国纳斯达克成功上市融资2亿美元。您是否知道此事?您对此有怎样的看法?       尹勃:刚刚知道!对家居很好的促进作用,希望能够加大北京市场支持力度,给予北京市场更高的重视。   新浪家居:感谢您的肯定。同时也希望您能给予建设性的意见。       尹勃:在罗总的带领下做的比较有声势,目前的思路更清晰。希望乐居做到较其他媒体更有高度,活动更有所创新。   新浪家居:您有怎样的祝语?             尹勃:祝新浪乐居越办越好,带动北京家居市场更上一层楼!      【嘉宾简介】       尹勃:(蓝景丽家总经理 北京市建筑装饰协会家装委员会副会长 北京市场协会家居分会副会长 北京家具协会常务理事 中国建材市场协会理事会副理事长)家居流通卖场一路走来,从昔日倒爷式的地摊、棚户到今天品牌型的综合、主题式购物广场,经历了多少时代的洗礼。尹勃作为这个行业中翘楚企业的负责人,见证了整个家具行业的变迁。名字后面这一连串的职务介绍足以说明他在整个行业中举足轻重的影响力,也更加肯定了他对“蓝景丽家”这个行业航母的巨大贡献。      【推荐阅读】        蓝景丽家十一精彩促销撼京城       百城万店无假货蓝景丽家启动       乐居装修日首战告捷 蓝景丽家销售额逆势暴涨       【媒体声音】      中国证券报:新浪易居合资公司CRIC登陆纳市       上证报:新浪易居合资公司CRIC逆市登陆纳市       第一财经日报:CRIC上市首日市值20亿美元       新华网:新浪与易居合资公司CRIC登陆纳斯达克       专访丁祖昱:CRIC在做前人没有做过的事情       专访罗军:CRIC具有巨大的商业潜力       专访曹国伟:在某些垂直领域会做更多尝试 【更多】     上市背景资料:      美国东部时间10月16日(北京时间10月16日)消息,易居中国与新浪合资公司中国房产信息集团(以下简称CRIC)在美国纳斯达克挂牌上市,首日开盘价12.28美元,超出发行价0.28美元。CRIC为易居中国与新浪的合资公司,股票代码为CRIC,发行价12美元,共发行美国存托股票(ADS)1800万股,同时承销商有权在未来30天内,行使总额达到270万股的超额配售权,此次IPO共计募集资金约2.16亿美元。作为中国在美国的地产科技第一股,CRIC是中国最大的专业房地产信息服务公司,并且拥有同时覆盖线上线下的房地产综合信息和服务平台。CRIC的成功上市,也创造了两家在美国上市的中国公司,分拆各自极具成长力的业务后进行合并,并进行二次上市的先河。CRIC联席董事长、CEO周忻表示;“我们很高兴看到CRIC成功上市,此次IPO将确立CRIC作为中国房地产信息服务第一品牌的地位,并有利于CRIC继续推进国内最大和最先进的房地产信息系统建设,使CRIC成为同时覆盖线上和线下的强大中国房地产网络信息服务平台,为房地产开发商、供应商、专业机构以及个人用户提供多元化房地产信息服务。CRIC联席董事长、新浪CEO曹国伟表示:“CRIC的成功上市,是易居中国和新浪合作的重要一步,也是我们在垂直领域商业模式探索的有益尝试,我们很高兴有机会发挥双方的协同效应。而进一步拓展和深化互联网垂直领域的商机,建立公司在细分市场的核心竞争力并做大做强,这也是新浪未来长远战略的重要组成部分。    
房产 弘阳大厦骏馆开盘 首日热销1亿昨天,位于南京大桥北路69号的红太阳销售中心人头攒动,当天开盘的弘阳大厦·骏馆取得了开门红,由于产品品质高端、户型精致总价又低,吸引了一拨又一拨看房者,当天销售额突破了一个亿。弘阳大厦·骏馆位于南京市浦口区大桥北路西侧,紧邻已建成的旭日华庭金棕榈园区旁,用地总面积6万多平米,包括一个包含酒店公寓、商业及办公的综合楼,一个酒店式公寓以及8万平方米的居住建筑和15000平方米的商业。弘阳大厦作为这块地块中的综合楼,主楼高99.65米,共28层,是集办公、商业、餐饮、公寓为一体的泛配套复合多功能商住楼。此次推出的弘阳大厦·骏馆,是弘阳大厦其中5-22层的酒店式公寓,主力户型为41-75平米商住先锋小户型。由于项目地处桥北新城的核心位置,离市区仅一桥之隔,规划中的地铁与过江隧道近在咫尺,兼具成熟配套资源优势。公共交通也非常方便,131、132、鼓珍、鼓扬、汉江、中六、汉六等多条公交线路可以直达该项目。除了地处桥北核心地段,具备传统的生活多方面配套以外,弘阳大厦·骏馆还拥有同属弘阳集团旗下的华东MALL完美商业配套。 我要评论
教育 名师解析标准读音在四级考试中的重要性对于中国学生而言,都知道口语和听力很重要,但就是怎么也不好过关,究其原因就是他们英语发音不标准。一、口语。一口标准而流利的口语可以立即提升你的形象,给人以很好的第一印象。举例1:汤姆汉克斯主演的电影《幸福终点站》中有一个情节,大家应该很熟悉:他将a man of mystery“一个神秘的人”读成了a man of misery“一个痛苦的人”,意思相差了十万八千里,自然造成理解障碍。举例2:中文中v和w没有任何区别,说“我wo”的时候,如果上齿咬着下唇的话,也无所谓,因为不会产生任何歧义。但是英文中不一样,这两个音区别很大。vine表示“葡萄藤”;而wine则表示“葡萄酒”。green wine表示“新酒”;而green vine则表示“绿色的葡萄藤”。读错了音意思差别可就大了去了。举例3:一位外国人在中国马路上迷了路,见到一位姑娘,立即冲上前去,说道:“我想吻(问)你...”吓得姑娘连忙跑掉,就是因为读音的问题,外国人在中国也会遭遇理解障碍。二、听力。听力在四级考试中占35%的份额,如果听力不如意的话,考试想要及格真的是很难。听力过程中学生可能会有以下几种体会:1. 根本听不清楚读音——因为不熟悉英文的读音规则;2. 听清了读音,但对应不出是哪个单词——词汇量不够,没有好好记单词;3. 听清了读音,也知道是哪个单词,但忘了啥意思了——还是词汇量不够,对于单词不熟悉;4. 对于spot dictation题型而言,听清了,知道是哪个单词,但就是—写就出现拼写错误——还是词汇没记好。第一,注意单词的读音,英式的和美式的。如:It's very hot today. 中hot美语中几乎就读成了hut这个词的读音了。第二,句子一连读、失去爆破等,连单词的影子都找不到了。如:This-is-an ol(d) pi(c)ture-of-a bi(g) car。横线表示连读,连读起来都不知道到底是一个词还是几个词了,括号里是不发音的,所以这个句子一旦读出来就完全走了样了。但听力中这种现象确是很常见的。要想练习好听力,首先要练习好英文的读音,包括词和句的读音规则。尤其对于外地孩子来说,就更重要了。如湖南的孩子说“我来自湖南”,由于方言影响就成了“我来自弗兰”。而这些人都不认为自己的读音是错误的,所以他听别人这样说的时候也认为是正确的。总之,如果我们平时的读音是错误的话,当听到正确读音时反而会不知道是哪个词,所以要想加强听力,首先要加强自己的读音。(党敏)
时尚 组图:10款艳丽泳装熟女穿出少女情怀导语:时下的泳装注重层次和线条感的悠闲设计,流露出自然的气质。 简洁的色彩搭配,甜美感觉凸显少女情怀,抽象概念化的异域花卉,颜色和谐、明快,印花纱裙,感觉轻盈,细致有女人味。
时政 台“中选会”称12月5日选举时程不变新华网消息 据台联合晚报报道,台“中选会”上午如期召开幕僚选务会议,仍按原定12月5日举办“三合一”选举时程进行相关作业规划。“中选会”将在9月4日发布选举公告。基于考量莫拉克风灾灾后重建,以及H1N1疫情发烧,有部分蓝绿政治人物倡议延后年底“三合一”选举。据了解,到目前为止,年底“三合一”选举的相关选务作业仍如期进行。“中选会”表示,“中选会”是选务机关,是否延选,仍须由政策决定,在政策未改变前,“中选会”将依既定时程,规划年底“三合一”选举的相关选务作业。
游戏 《天问》国家系统神秘美丽女儿国初探传说在遥远的西域,有一个神秘美丽的国家,上至国王,下至百姓,全国居民都是美丽温婉的女性。唐僧四师徒一路西行,就是来到了这个风光如画的女性之国。粉色帷幔随风飘扬,阳光照耀着的粉色砖墙闪闪发亮;清澈的泉水边,风情万种的女子们悠闲地编制精美的地毯,蝴蝶在花香中起舞……西梁女国就是一位端坐西域的温柔而美丽的少女,带着神秘的微笑注视来来往往的游客。解阳山是全新的练级场景, 山上微风吹拂,仙鹤悠闲地梳理着翎羽,处处透露平和安逸的气氛。但是山顶一座简陋的道观,竟藏着不少金银财宝?西梁女国百姓最珍视的一口泉水,也隐藏在道观山之上,这里到底隐藏着什么秘密?在解阳山上有一个神秘的副本波月洞,里面溶岩密布,石柱高耸,组成了各种美妙的景观。然而,波月洞盘踞着以毒蝎精领导的一群女妖,这帮妖精已与女儿国争战多年。当群侠得知毒蝎精近来甚至企图绑架女儿国太子,以要挟国王就范时,不论是出于怜香惜玉,还是英雄救美,一场的激烈的战争终将不可避免的开始了……
科技 五彩时尚MP3 三星U5仅售299元 三星YP-U5(2GB)共有蓝、粉、白、红、黑五种时尚漂亮颜色可供选择。色彩感很浓烈。三星YP-U5(2GB)的背面还提供了一个背夹,再加上五颜六色的款式,使它看上去很像一个美发卡。机身很小巧,三围尺寸只有25×88×11.8mm,重量也只有23g,完全可以随身携带。在机身正面可以看到一个OLED冷光屏,显示的字体比较清晰。三星YP-U5(2GB)可以支持mp3、wma、ogg、Flac音频格式文件播放,此外,它支持三星最新的DNSe代3代音效,5种音效,提供自动、正常、工作室、摇滚、节奏及布鲁斯、舞厅、音乐厅7种选择,也可以进行自定义,对EQ和3D进行调节,效果非常好。除了出色的音乐播放功能以外,三星YP-U5(2GB)还支持FM收音机、歌词显示、MIC录音等功能。编辑点评:U系列是三星主打平价市场的产品,主要针对学生、办公室一族。相信这款音质出众、色彩绚丽的时尚MP3,也将为学生和年轻白领一族的个性生活增添亮丽色彩。    三星YP-U5(2GB)      [参考价格] 299元    [联系方式] 13434155009     

+ 9
- 0
tests/data_for_tests/io/THUCNews/test.txt View File

@@ -0,0 +1,9 @@
体育 凯尔特人vs尼克斯前瞻III纽约背水战 甜瓜必杀令新浪体育讯北京时间4月23日上午7点,凯尔特人将迎移师纽约城,挑战尼克斯,这是两队首轮的第三次交锋。前两场比赛中,小斯和安东尼轮番打出现象级的表现,可惜都无法为尼克斯带来一场胜利。目前凯尔特人总比分2-0领先,对尼克斯而言,他们没有退路。“第三场在主场击败,这是一场必胜的战争,我们根本输不起,这是本赛季为止将要面临的最艰难的一场比赛。”安东尼说。然而运气却不在纽约这边,他们接连以小分差输掉两场,与此同时,比卢普斯和小斯又接连出现伤病,第三场比赛两人的状态仍旧未知,小斯缺席了球队的训练,他在第二场下半场因为背部痉挛休战,但小斯仍希望能够在第三场出战,比卢普斯则有膝伤在身,能否复出还要等赛前决定。第二场比赛中,比卢普斯休战,小斯下半场未打,比尔-沃克全场11投0中,但是尼克斯凭借安东尼的42分17个篮板6次助攻,顽强的将比赛拖到最后一秒,直到最后时刻杰弗里斯的传球被KG抢断,才遗憾落败。德安东尼说:“很遗憾他们两不能上场,但从积极方面看,下半场球队打出的顽强表现,让我们信心满满。”小斯在第一场拿到28分11个篮板,但是安东尼在那场饱受犯规困扰,18投5中只拿到15分,下半场11投1中,尼克斯最终85-87落败,纽约人相信,如果安东尼和小斯同时发挥,他们有很大机会扳倒绿巨人。“我想这是一种精神折磨,你知道自己打得有多努力,有多棒,但两次我们都距离胜利差之毫厘。”安东尼说。第三战将是尼克斯自从2004年4月25日以来,首次在麦迪逊广场花园首次举办季后赛,这座举世闻名的篮球麦加殿堂已有七年未曾染指季后赛。对凯尔特人而言,他们的进攻出现了不少问题,季后赛前两场分别是靠雷-阿伦和凯文-加内特的关键球才勉强击败对手。里弗斯表示,球队表现需要提高,奥尼尔第三场能否出战还是谜,雷-阿伦连续两场打出不俗表现,隆多则在第二场砍下30分7次助攻,他们将尼克斯的命中率限制到35.6%,但与此同时,他们也丢失了大量的防守篮板,上场比赛尼克斯抢下了20个进攻篮板,而凯尔特人只有9个。小斯曾在这轮系列赛中和格伦-戴维斯大打口水仗,此战重回纽约,尼克斯急需他的发挥,接下来就看小斯带伤出战,能为尼克斯提供多少支援了。两队预计首发:凯尔特人:隆多、阿伦、皮尔斯、加内特、小奥尼尔尼克斯:道格拉斯、菲尔德斯、图里亚夫、安东尼、小斯(木瓜丁)
娱乐 独家探班李康生蔡明亮短片《自转》(组图)新浪娱乐讯蔡明亮(阿亮)导演、李康生(小康)演出的银幕组合让两人在国际影坛挣出一席地位,如今两人“角色互换”!李康生执导台湾公视《台北异想》影片中的短片──《自转》,请出已20年没站在镜头前的蔡明亮当演员,阿亮为了爱徒再次“下海”演戏,没想到自称对演员施以爱的教育的小康,拍第一场戏就让阿亮吃了18次NG,现场更放催泪音乐,让感情丰富的阿亮流下真情的眼泪。台湾公视的《台北异想》影片,概念将一天从清晨六点起分为八个时段,邀来李康生、郑芬芬、钮承泽、林靖杰等八位导演,拍摄八部十分钟短片,接力诠释24小时的台北故事。小康选了凌晨四时至六时的时段发挥,他说:“2006年,舞蹈家伍国柱、罗曼菲相继过世让我感触很深,蔡明亮拍摄电影《洞》时,罗曼菲担任舞蹈编排,她直率、认真的性格留给大家很深的印象。因此特别选择她凌晨四点多辞世的时段,拍摄《自转》,也希望将这部短片献给她。”蔡明亮自从20年前曾在电视单元剧中饰演乐团主唱后,即不再以演员身分现身萤光幕前,为了挺爱徒再站镜头前,阿亮坦言,剧中虽只需扮演自己,但被拍仍令他紧张,要不是近几年常受访,被媒体训练出减少对镜头的恐惧,不然他不会让自己名列演员名单中。被阿亮指导演戏惯了的小康,如何回过头来对恩师教戏?他虽说:“我让演员自由发挥,采取『爱的教育』!”但光是陆奕静炒咖啡豆,阿亮静坐咖啡厅一隅,这全剧第一个镜头就磨了十八次,现场播放雷光夏广播录音和林怀民舞作《挽歌》音乐,更催出阿亮的男儿泪,阿亮说:“我就是想到了罗曼菲,更感受到美好的事物都会消失,真想再看一次罗曼菲跳舞。”《自转》的最后一场戏,陆奕静衬着音乐转圈跳舞,阿亮也即兴起舞,但连两天熬夜赶戏体力透支,加上不停转圈,她拍到呕吐、阿亮则晕眩不止,小康却满意称赞:“这两人跳得不错嘛!”小康当导演,从第一场戏折腾演员到末场戏,堪称“有始有终”,蔡明亮笑说:“未来我还是选择继续当导演吧。”台湾特派记者郑伟柏/台北报导 声明:新浪网独家稿件,转载请注明出处。
家居 打好算盘最省钱瓷砖选购法面对导购小姐的微笑更是心中打鼓:人家说的好像挺有道理,但会觉得说得越好,会不会上当啊,是不是有猫腻呢?本文从建筑卫生陶瓷角度来分析,其它建材选购原理也与之相差无几。瓷砖的选购很讲究,要知道瓷砖这玩意儿一旦铺上了要是再发现有问题,后果是很严重的!下面列出的几点问题是在装修前一定要想清楚的,这些问题往往决定了以后选择瓷砖的种类、规格、价位甚至家居的整体风格。1、到底铺什么?这个问题好像问得很白痴,但这却是最基本的,首先你得充分了解哪些空间适合用哪些瓷砖啊!其实这个问题的关键不是用什么铺地,而是各种材料该怎么搭配。比如:有些业主希望在客厅铺瓷砖,同时在卧室选择木地板,这样问题就产生了:如果客厅铺普通玻化砖,卧室铺强化复合地板,那么卧室与客厅就会存在3cm左右的高度差,这主要是由于强化地板下没有打龙骨造成的。那么是不是在卧室选择实木地板就行了呢?当然不是。通常实木地板由厂家安装都会使用3×2cm的龙骨,如果为了和客厅的瓷砖找平最好使用5×4cm规格的龙骨,但是各个地板厂商对于更换龙骨的服务条款可是不同的。所以要充分与业主沟通,毕竟我们的目的是要让业主满意,了解业主的最基本的要求,然后根据业主的原始思路,找出最合适的方案。如果业主希望选择地板与地砖混铺的方式,就一定要规划好,避免不必要的麻烦。下面介绍两种基本搭配方式:瓷砖+强化地板=铺地板的房间用水泥灰浆垫高3cm,瓷砖+实木地板=地板下采用5×4cm规格的龙骨。2、选择什么规格的地砖?是铺600的?800的?还是1000的或是其它规格的?这是一个问题!现在的地砖,尤其是客厅使用的地砖主要是500mm、600mm、 800mm和1000mm(即1米)等规格,其中使用最多的是600mm和800mm两种。那么该如何选择呢?建议根据铺贴的面积及家具的摆放进行选择。由于单位面积中600mm的砖比800mm的砖铺贴数量要多,所以视觉上能产生空间的扩张感,同时在铺贴边角时的废料率要低于800mm的砖,而空间大时铺800甚至1米规格的砖就显得大气。因此建议小于40平米的空间选择600mm规格的地砖;而大于40平米的空间则可以选择800mm或一米的地砖。值得注意的是,如果在房间中家具过多(如卧室),盖住大块地面时,最好也采用600mm的地砖。3、该铺怎样的砖?到底是选择铺怎样的砖呢?是仿古砖还是抛光砖?仿古砖自然、柔务,在复古风格、尤其是拼花上有着玻化砖无法比拟的优势。同时,由于表面釉层的保护,对于茶水、墨水甚至热烟头的抗污能力也优于玻化砖。但是玻化砖也并非一无是处。随着技术的发展,现在玻化砖表面玻化层的密实度、光洁度已经相当的高,不仅能够使居室显得更加亮堂,还决不会像釉面砖由于外力碰撞、摩擦产生釉面破损的现象。所以选择什么样的砖要根据你要体现的风格,要明亮、大气就选抛光砖,要自然、温馨就选仿古砖。建议居室空间、客厅如果采光相对有限选择玻化砖,而光线充足的客厅和和需防滑的厨房和卫生间地面,及阳台等可选择仿古砖或其它釉面砖。4、“微晶玉”、“微晶石”、“微晶钻”都是什么意思?很多人逛建材城最头疼的恐怕就是记录瓷砖的名字了。什么“微晶玉”、“微晶石”、“微晶钻”、“超炫石”、“聚晶玉”等等。其实大家根本没必要记住这些拗口的名字,它们描述的都是同一种东西——玻化砖,这些名字只是厂商为了区分产品的档次,进一步细化市场而使用的代号罢了。在选择时大家只要坚持自己的预算,尽量选择适合自己的产品就行了。微晶石表面很炫,但其硬度只有莫氏五度左右,不耐磨,不适于用在地面,比较适于用在外墙干挂。
房产 迪拜危机启示录:空中楼阁迟早要倒塌美国拉斯维加斯,又一家奢侈至极的酒店在这个“罪恶之城”绽放。但此次,相较酒店豪华的各种天价服务和开幕典礼上的好莱坞群星璀璨外,似乎其幕后的主人更吸引人们的眼球--迪拜世界。仅仅一周前,迪拜世界这个名词牵动了世界每个角落的神经。11月25日,迪拜主权财富基金迪拜世界宣布,暂缓偿还债务。根据评级机构穆迪的估算,迪拜的债务预计接近1000亿美元。巨大的数额勾起了人们对去年雷曼兄弟倒闭以来那波汹涌澎湃的国际金融危机的回忆。汇丰、渣打、巴克莱、苏格兰皇家银行等在内的多家银行涉及在内。人们开始担心,我国是否也会因此受到波及。庆幸的是,国内几大商业银行随即申明表示,没有涉及迪拜世界、迪拜政府和其他相关迪拜主权基金及机构发行的债权。有所涉及的,比例也相当的小。记者致电多家研究所银行业分析师,均表示认为此事对国内银行业影响不大,目前没有特别关注。因此,公众的目光从银行投向了导致其债务根源的房地产业。迪拜世界的房产项目,现在已经成为了全世界最大的烂尾楼。而就在这债务问题凸显的时刻,其旗下的“重型”项目却闪亮登场。“城市中心”酒店的开幕,似乎使得地产行业最尴尬的一面展现在了公众眼中。反观我国的地产行业,近期拍卖地王频现,房屋交易价格再次飙升,种种迹象也让人们对其产生了许多担忧。有专家对记者表示,在高速成长时期,楼价和地价互相推动的背后,是资金的不断流入。在那些光鲜的大楼后被后默默支撑的是债券、贷款等各种负债工具。一个原本是沙漠中人口只有十几万的小城,在几乎没有任何实业的基础上,居然吸引了世界上各方的资金,建成了一个人口上百万的豪华都市。房地产市场的巨大利益诱惑在其中占据了重大的因素。不断高涨的楼市,加上免税的便利,使得国际游资疯狂涌入。在聚集了巨大资金后,其所投资的项目遍布世界,美国这次的拉斯维加斯“城市中心”项目,迪拜世界就砸了近50亿美元。这种推动与反推动作用,给予了人们一个璀璨的迪拜,但当问题暴露,留下的却是满目疮痍。“迪拜危机对我们而言更多的是警示作用。”中国社科院金融研究所中国经济评价中心主任刘煜辉在接受《证券日报》记者采访时如此表示。他认为,目前为止迪拜危机对我国银行业的影响不多,但由于有过全球金融危机的影响,心理上的波动是会有的。此外,刘煜辉还告诉记者,任何以过度负债支撑起来的价格上涨或资产泡沫都是需要高度警惕。因为一旦泡沫破裂,就会带来破坏性较强的连锁反应。相信通过这次迪拜危机的警示,国内更多的行业会关注本行业内的负债和泡沫,对于投机性行为和高风险项目将会更加冷静。我要评论
教育 知名美国私立寄宿中学来华招生行程序号 学校 时间 地点 学校情况 1、北野山中学Northfield Mount Hermon School10月26日 星期三PM1:00 美丽园龙都美国教育部认可的示范型学校2、Cranbrook school10月27日 星期四AM8:40-10:20美丽园龙都每年本校学生的AP考试成绩都位列于全国成绩最好的学校之中3、The Storm King School10月29日 星期六PM4:30上海南京西路1515号嘉里中心1809室纽约州一所私立男女混合精英寄宿中学4、Villanova Preparatory School10月30日 星期日PM1:00-4:00虹桥万豪酒店美国唯一一所的男女混合寄宿制天主教教会学校5、Wyoming Seminary Upper School11月1日 星期二AM10:00香格里拉美国著名的百年贵族名校,也是美国东北部最古老的中学及大学预科学校6、胡桃山音乐学校Walnut Hill School11月2日 星期三PM1:00浦东香格里拉美国最古老的艺术高中7、弗莱堡学校Fryeburg Academy11月3日 星期四PM2:00-6:00上海南京西路1515号嘉里中心1809室一所独特的提供寄宿和走读学习的学校8、St.Johnsbury Academy11月8日 星期二AM9:00-12:00上海南京西路1515号嘉里中心1809室美国中学中拥有最棒校园的男女合校寄宿学校9、波特茅斯教会学校Portsmouth Abbey School11月8日 星期二PM1:00-3:00北京朝阳区建外SOHO,A座9层全国首屈一指的天主教混合住宿学校10、波特茅斯教会学校Portsmouth Abbey School11月15日 星期三PM1:00-4:00上海南京西路1515号嘉里中心1809室全国首屈一指的天主教混合住宿学校11、库欣高中Cushing Academy11月第三周待定美国最悠久男女合校寄宿中学之一12、West NottinghamAcademy11月19日 星期六PM2:00上海南京西路1515号嘉里中心1809室美国最早的学校,245年历史13、格瑞尔女子中学The Grier School11月26日 星期六PM9:45明天广场万豪历史悠久的著名女子寄宿学校14、萨菲尔德学院Suffield Academy11月30日 星期三 待定有170多年历史,是一所男女同校的私立中学15、威利斯顿 • 诺塞普顿中学The Williston Northampton School12月1日 星期四PM2:00-4:00上海南京西路1515号嘉里中心1809室学校以其优质的教学质量而闻名16、菲利普斯埃克塞特Philips Exeter Academy12月2日星期五PM6:30-8:30北京建国饭店牡丹厅(北京建国门外大街5号)“美国高中的哈佛” 、全美国最好的私立寄宿制高中17、菲利普斯埃克塞特Philips Exeter Academy12月3日星期六PM2:30-4:30上海浦东香格里拉浦江楼2层青岛厅“美国高中的哈佛” 、全美国最好的私立寄宿制高中18、菲利普斯埃克塞特Philips Exeter Academy12月5日星期一PM6:30-8:30浙江图书馆1楼文澜厅(杭州西湖区曙光路73号)“美国高中的哈佛” 、全美国最好的私立寄宿制高中19、坎特伯雷中学Canterbury School12月5日  星期一AM9:00-12:00 待定走读与寄宿都有的男女合校20、西城中学/威斯顿中学Westtown School12月5日 星期一AM9:00待定一所拥有205年悠远传统的中学21菲利普斯埃克塞特Philips Exeter Academy12月6日 星期二PM6:30-8:30广州天河区林和中路6号海肮威斯汀酒店5楼蓝厅“美国高中的哈佛” 、全美国最好的私立寄宿制高中22菲利普斯埃克塞特Philips Exeter Academy12月7日 星期三PM6:30-8:30深圳格兰云天酒店26楼云河厅(福田区深南中路3024号)“美国高中的哈佛” 、全美国最好的私立寄宿制高中23Cheshire Academy12月18日 星期日待定美国最早的传统寄宿中学24The Governor’s Academy待定待定美国最古老的寄宿高中之一25Peddie School待定待定著名的具有悠久历史的男女混合寄宿学校26Westover School待定待定美国著名的大学预备女子私立寄宿中学27Rabun Gap-Nacoochee School待定待定一所6-12年级的大学预备住宿走读中学28Ben Lippen School待定待定一所为学生提供大学准备课程的教会学院29George Stevens Academy待定待定一所拥有200多年历史的学校
时尚 组图:纽约2011时装周 博主编辑街拍自成风景导语:纽约2011春夏时装秀正在如火如荼地进行着,打开任何时尚网站,你都可以看到这RUNWAY秀的图片,所以我不想在这里赘述了,反而我觉得秀场外这些赶赴现场的模特们和时尚博主以及时尚编辑的街拍更有意思。
时政 台当局开放大陆银联卡在台刷卡消费中国台湾网7月16日消息 据台湾《联合报》报道,台当局“金管会”昨天发布修正“两岸金融业务往来许可办法”,开放大陆银联卡在台刷卡消费。最快9月初大陆民众就可以持银联卡在台刷卡消费,将可提高大陆游客赴台观光、消费意愿,并为台湾每年新增1000亿元(新台币,下同)刷卡商机。岛内银行也将可办理相关收单业务,对收单银行的手续费年收益至少可多出20亿元的贡献。报道称,台当局“金管会银行局副局长”萧长瑞表示,办法发布生效后,“金管会”就可开始受理岛内收单银行、联合信用卡中心等申请,台湾的联合信用卡中心也要跟大陆银联公司签约,估计最快9月初银联卡就可进入台湾。大陆银联卡赴台使用研议多时,消算等技术层面问题一直待克服,昨天“金管会”正式发布相关规定开放银联卡赴台,也代表技术面问题都已解决。根据“金管会”昨天发布的两岸金融业务往来许可办法第二条及第七条之一修正案,明定岛内信用卡业务机构经主管机关许可者,可以与银联公司从事信用卡或转帐卡的业务往来。主要包括银联卡在岛内刷卡消费的收单业务,以及交易授权与清算业务等两项。至于岛内银行发行银联卡的发卡业务则未开放。(高大林)
游戏 腾讯手游在线 《幻想西游》勇创新高根据腾讯QQ游戏中心2009年11月26日显示的在线数据,由腾讯和广州银汉联合运营的《幻想西游》再创新高,同时在线达到54336!54336同时在线一举打破之前的在线记录,创造手机游戏在线新高,这是《幻想西游》的光荣,也是手机游戏界的光荣!罗马不是一天建成的,《幻想西游》运营三年以前,开发组一直注重提升游戏品质和馈玩家,做属于玩家自己的游戏。这次创造在线人数新高,就是对开发组最高的褒奖。11月期间,《幻想西游》举行了“美在西游”系列活动吸引了数千美女玩家报名,6万多玩家参与了本次活动,掀起了11月的活动高潮。11月25日感恩节,开发组成员更是身怀感恩之心,化身GM来到游戏中倾听玩家的心声,并且心甘情愿地被玩家击败后奉上了感恩节礼物。12月将进入“美在西游”决赛阶段,广州银汉笑迎八方客,热情地邀请来自全国各地的美女玩家和跨服帮战优秀代表共聚羊城,共叙三年幻想情,畅谈西游未来路。《幻想西游》是根据名著《西游记》改编的手机网络游戏,具有操作简洁,界面美观,互动性好,娱乐性强的特点,营造出一个充满梦幻的西游世界。进入游戏:手机访问 http://3g.qq.com,选择游戏-网游-幻想手机官网 http://wap.01234.com.cn,选择快速进入
科技 配18-135mm镜头 佳能7D国庆带票促销中(中关村在线数码影像行情报道)佳能EOS-7D是一款拥有1800万像素成像能力,每秒钟8张连怕性能,并具备高清摄像功能的单反相机。这款单反相机于上周登陆中关村市场,是目前APS-C规格单反中的旗舰机型。今天笔者在市场上了解到,配备有18-135mm防抖镜头的7D套机,价格为13800元带发票。EOS 7D实现了在约1800万有效像素的高画质下,高达约8张/秒的连拍速度。并搭载了高速智能的自动对焦系统等众多新功能。EOS 7D不仅达到了约1800万的有效像素,还实现了低噪点的精细图像表现。其搭载的CMOS图像感应器是佳能自行研发生产的产品。在提高像素感光度的同时,对像素内的晶体管进行了改良实现了更高的S/N(信噪)比。7D的常用ISO感光度为100-6400,扩展ISO感光度最高为12800。图像信号传输是在将单通道序列读取高速化的同时,采用8通道进行高速读取。与EOS 50D相比要快约1.3倍,实现了约8张/秒的高速连拍。另外,对更换镜头时以及反光镜、快门等动作时产生的感应器灰尘也采用了相应的综合除尘措施;同时还搭载了可从相机硬件和附带软件两方面进行除尘的“EOS综合除尘系统”,在除尘功能上考虑得十分周到。快门单元和机身盖采用了不易产生碎屑的特殊材料;即便是不小心进入了灰尘,也可以通过超声波使图像感应器最前面的低通滤镜产生振动将灰尘抖落。低通滤镜表面进行了氟涂层处理,不论是对难以脱落的具有较高粘度的灰尘还是潮湿的灰尘都有着很好的除尘效果。双DIGIC 4数字影像处理器实现了对通过8个通道从图像感应器中高速读取出的,具有约1800万像素的庞大数据的迅速且高精度处理。搭载了2个高性能数字影像处理器DIGIC 4,能够对各种数据进行并行处理,即使是约1800万有效像素也可以实现最高约8张/秒连拍的高速图像处理。EOS 7D搭载了多达19个的自动对焦点,并且提高了每个对焦点的对焦精度。19个对焦点全部采用对应F5.6光束的十字型自动对焦感应器。将用于检测纵向线条的横向线型自动对焦感应器与用于检测横向线条的纵向线型自动对焦感应器呈十字型排列,从而实现了很高的被摄体捕捉能力。中央对焦点在相对于F5.6光束十字型自动对焦感应器的斜方向上配置了对应F2.8光束精度更高的十字型自动对焦感应器。通过中央八向双十字自动对焦感应器的协同工作,实现了高速且高精度的合焦。追踪被摄体的人工智能伺服自动对焦功能也在EOS 7D上得到了大幅的进化。EOS 7D的光学取景器具有约100%的视野率和约1倍(100%)的放大倍率,同时具有29.4°的视角和22毫米的眼点,其光学性能在历代EOS单反相机中也名列前茅。通过视野率约100%的光学取景器观察到的范围与实际拍摄的范围基本一致,因此能够得到非常精确的构图。此外,EOS 7D还在光学取景器内搭载了具有背透型液晶面板的“智能信息显示光学取景器”,它能够在对焦屏上显示网格线和三维电子水准仪等内容。EOS 7D的机身外壳采用了重量轻,刚性高且具有电磁屏蔽效果的镁合金材料。表面涂层采用了与EOS数码单反相机中顶级的EOS-1D系列相同的涂层材料及工艺。此外,EOS 7D还具有防水滴防尘构造,镁合金的外部部件变为高精度接缝构造,电池仓、存储卡插槽盖以及各操作按钮周围等都采用了密封部件,来保护相机的内部。EOS 7D背面的液晶监视器采用了具有160°的广视角(上下左右方向)及高清晰的92万点新型液晶监视器——“3.0"清晰显示液晶监视器II型”,其内部构造也经过重新研发,采用了新技术。7D机身上分别设置了专用的“实时显示/短片拍摄开关 ”和相应的“开始/停止按钮 ”,并且短片拍摄时能够在手动模式下对曝光进行控制。此外,可实现每秒30/25/24帧,分辨率1920×1080像素的全高清短片拍摄,在使用高清画质(分辨率1280×720像素)及标清画质(分辨率640×480像素)时,能够以每秒60/50帧进行拍摄。编辑观点:佳能7D的出现,再一次丰富了E0S产品系列中APS-C规格单反的阵营。佳能也终于有了可以和尼康D300级别单反正面对抗的产品。而出色的性能表现,不论是摄影爱好者还是专业人士,都会对其青睐有加。而上市价格也比较合理,只是希望7D不要重蹈5D II缺货涨价的覆辙。

+ 9
- 0
tests/data_for_tests/io/THUCNews/train.txt
File diff suppressed because it is too large
View File


+ 7
- 0
tests/data_for_tests/io/WeiboSenti100k/dev.txt View File

@@ -0,0 +1,7 @@
label text
1 多谢小莲,好运满满[爱你]
1 能在他乡遇老友真不赖,哈哈,珠儿,我也要用这个拼图软件!BTW,小飞人儿终于要飞回家啦,深圳行,谢谢每位工作人员的照顾![爱你]
0 [衰]补鞋的说鞋子是进口的,质量太好,刀子都切不进去!所以说大家以后别买进口,到时补都没的补![爱你]
0 第五季都没看了[泪]要补起来
1 美图好诗![鼓掌] //@言家楼:回复@俺叫老鬼:【七律。感时】 叶随风舞身何处, 鸟逆风行觅树梢。 岁月风来无退路, 激流风助有波涛。 寒微风动曾言志, 富贵风骚似不牢。 雪竹风梅诗未尽, 休云风雨剪春刀。//鸢肩格:藏珠“风”。
0 没敢问,她男朋友在旁边呢。。[泪]//@好饭换坏饭: 你问问她能不能调成静音模式

+ 8
- 0
tests/data_for_tests/io/WeiboSenti100k/test.txt View File

@@ -0,0 +1,8 @@
label text
1 钟爱大粉的亲们,这一茬我们又种大粉了,座果也不错,能吃上了[嘻嘻]
0 //@北京全攻略: 我擦。。。牛逼~果断收藏[衰]
1 都有我都有我~~~我的2012注定是美美的精彩的不得了啊~哈哈哈[太开心]//@哆啦胖兔梦: 转发微博。
1 这周的成果就是这样 刻的好累但是很喜欢[嘻嘻]#我的橡皮章#
1 你把我整?了。[抓狂] //@窦智耀:开 往大稿艺术区店开 带上祝贺的花篮。。。昨夜 杨家火锅 你把我灌醉。。。今夜 我要学会排队等位。再贺开业大吉![鼓掌][鼓掌][鼓掌]
1 [爱你]亲们,我刚刚发表了一篇文章,有图有真相,速来围观![围观]||#蚂蜂窝游记#《新疆,雨中的野核桃沟》,查看更多精彩>>> http://t.cn/zR4BMN3 (分享自 @蚂蜂窝旅游攻略)
0 [泪]//@平安北京: 珍爱生命,小心驾驶,驾车时请勿接打电话!

+ 7
- 0
tests/data_for_tests/io/WeiboSenti100k/train.txt View File

@@ -0,0 +1,7 @@
label text
1 //@实用小百科:这才是吃货本色[哈哈]
0 回复@邋遢大王诗文:好的[ok] //@邋遢大王诗文:回复@静冈叔叔:[ok]木有问题!回来了和我联系 //@静冈叔叔:回复@西瓜叫高荔蜒啊:在富士山静冈机场有很多小丸子的土产啊[嘻嘻] //@西瓜叫高荔蜒啊:祝你一路顺风~ 想要小丸子的お土?~[泪]
1 我花了两年最后被抢的只剩下一枚,情何以堪! //@自由橙的小窝:@程诗然 同学集卡速度最快,我花了两年时间才集全 //@怯弱的狮子Susan: 回复@阮导:@墙墙-墙根俱乐部 看你多抢手!快给我们各发一套吧![嘻嘻] //@阮导:回复@怯弱的狮子Susan:所以。。。。你要给我找一套撒。。哈哈哈哈哈!!!
1 KIMSCLOSET的年会,海鲜自助餐,太丰盛了!大家吃的HIGH,喝的HIGH,聊的HIGH!太开心了![哈哈][爱你]
1 在iPhone的便携鱼眼镜头之下,扣肉蝴蝶饱子显得多诱人呀![围观][馋嘴][嘻嘻]
0 英织,你知道不知道,他是我最最最爱的大叔,你跟他靠这么近,我的心都碎了!!!你说你说你说,你有没有他的签名![泪]

+ 7
- 0
tests/data_for_tests/io/XNLI/dev.txt View File

@@ -0,0 +1,7 @@
language gold_label sentence1_binary_parse sentence2_binary_parse sentence1_parse sentence2_parse sentence1 sentence2 promptID pairID genre label1 label2 label3 label4 label5 sentence1_tokenized sentence2_tokenized match
zh neutral 他说,妈妈,我回来了。 校车把他放下后,他立即给他妈妈打了电话。 1 1 facetoface neutral contradiction neutral neutral neutral 他 说 , 妈妈 , 我 回来 了 。 校车 把 他 放下 后 , 他 立即 给 他 妈妈 打 了 电话 。 True
zh contradiction 他说,妈妈,我回来了。 他没说一句话。 1 2 facetoface contradiction contradiction contradiction contradiction contradiction 他 说 , 妈妈 , 我 回来 了 。 他 没 说 一 句 话 。 True
zh entailment 他说,妈妈,我回来了。 他告诉他的妈妈他已经回到家了。 1 3 facetoface entailment entailment neutral entailment entailment 他 说 , 妈妈 , 我 回来 了 。 他 告诉 他 的 妈妈 他 已经 回到家 了 。 True
zh neutral 他们停止了跟这家交朋友,因为他们决定了当白人。 种族紧张局势开始时,他们不再探望这家人。 13 39 facetoface neutral entailment entailment entailment entailment 他们 停止 了 跟 这家 交朋友 , 因为 他们 决定 了 当 白人 。 种族 紧张 局势 开始 时 , 他们 不再 探望 这家 人 。 False
zh contradiction 老太太以前常说她姐姐和姐丈是如何决定要搬到奥古斯塔城里去,并且被当做白人看待。 奶奶的妹妹是白人,搬到了德克萨斯州。 17 49 facetoface contradiction contradiction contradiction contradiction neutral 老太太 以前 常 说 她 姐姐 和 姐丈 是 如何 决定 要 搬 到 奥古斯塔 城里 去 , 并且 被 当做 白人 看待 。 奶奶 的 妹妹 是 白人 , 搬 到 了 德克萨斯州 。 True
zh entailment 老太太以前常说她姐姐和姐丈是如何决定要搬到奥古斯塔城里去,并且被当做白人看待。 奶奶的姐姐不是白人。 17 50 facetoface entailment entailment contradiction neutral entailment 老太太 以前 常 说 她 姐姐 和 姐丈 是 如何 决定 要 搬 到 奥古斯塔 城里 去 , 并且 被 当做 白人 看待 。 奶奶 的 姐姐 不 是 白人 。 True

+ 7
- 0
tests/data_for_tests/io/XNLI/test.txt View File

@@ -0,0 +1,7 @@
language gold_label sentence1_binary_parse sentence2_binary_parse sentence1_parse sentence2_parse sentence1 sentence2 promptID pairID genre label1 label2 label3 label4 label5 sentence1_tokenized sentence2_tokenized match
zh contradiction 嗯,我根本没想过,但是我很沮丧,最后我又和他说话了。 我还没有和他再次谈论。 2 4 facetoface contradiction contradiction contradiction contradiction contradiction 嗯 , 我 根本 没 想 过 , 但是 我 很 沮丧 , 最后 我 又 和 他 说话 了 。 我 还 没有 和 他 再次 谈论 。 True
zh entailment 嗯,我根本没想过,但是我很沮丧,最后我又和他说话了。 我非常沮丧,我刚刚开始跟他说话。 2 5 facetoface entailment entailment entailment entailment entailment 嗯 , 我 根本 没 想 过 , 但是 我 很 沮丧 , 最后 我 又 和 他 说话 了 。 我 非常 沮丧 , 我 刚刚 开始 跟 他 说话 。 True
zh neutral 嗯,我根本没想过,但是我很沮丧,最后我又和他说话了。 我们谈得很好。 2 6 facetoface neutral neutral neutral neutral neutral 嗯 , 我 根本 没 想 过 , 但是 我 很 沮丧 , 最后 我 又 和 他 说话 了 。 我们 谈 得 很 好 。 True
zh neutral 而我当初认为这是一个特权,我现在仍然这样想,我是唯一的922 Ex-O,也是我的AFFC空军职业生涯。 我不知道那天我不是唯一一个在场的人。 3 7 facetoface neutral contradiction contradiction contradiction contradiction 而 我 当初 认为 这 是 一个 特权 , 我 现在 仍然 这样 想 , 我 是 唯一 的 922 Ex-O , 也 是 我 的 AFFC 空军 职业生涯 。 我 不 知道 那天 我 不 是 唯一 一个 在场 的 人 。 False
zh contradiction 而我当初认为这是一个特权,我现在仍然这样想,我是唯一的922 Ex-O,也是我的AFFC空军职业生涯。 我们都被赋予了相同的确切数字,无论我们被许诺了何种特权,都是谎言。 3 9 facetoface contradiction contradiction entailment contradiction contradiction 而 我 当初 认为 这 是 一个 特权 , 我 现在 仍然 这样 想 , 我 是 唯一 的 922 Ex-O , 也 是 我 的 AFFC 空军 职业生涯 。 我们 都 被 赋予 了 相同 的 确切 数字 , 无论 我们 被 许诺 了 何种 特权 , 都 是 谎言 。 True
zh entailment 这是Fannie Flono,她在佐治亚州奥古斯塔长大,她会讲述她童年时的一些故事。 Fannie Flono就在这里,她将与我们分享她在奥古斯塔成长的童年故事。 12 35 facetoface entailment entailment entailment entailment entailment 这 是 Fannie Flono , 她 在 佐治亚州 奥古斯塔 长大 , 她 会讲 述 她 童年 时 的 一些 故事 。 Fannie Flono 就 在 这里 , 她 将 与 我们 分享 她 在 奥古斯塔 成 长 的 童年 故事 。 True

+ 9
- 0
tests/data_for_tests/io/XNLI/train.txt View File

@@ -0,0 +1,9 @@
premise hypo label
我们 家里 有 一个 但 我 没 找到 我 可以 用 的 时间 我们 家里 有 一个 但 我 从来 没有 时间 使用 它 . entailment
该镇 仍然 充满 雕塑家 , piazza alberica 是 一个 夏季 雕塑 比赛 的 现场 14 天 来 制作 一个 杰作 . 几乎 所有 的 雕塑家 都 离开 了 piazza alberica 为 其他 城市 . contradictory
土耳其 的 面包车 是 自己 坐 下 来 的 , 但 他们 喜欢 玩和呃 , 他们 喜欢 和 他们 一起 玩 , 他们 把 他们 的 社会 从 它 . neutral
好 吗 ? 我 问 benignantly , 因为 她 犹豫 了 . 我 抓住 她 的 胳膊 和 她 愤怒地 , 问 , 好 吗 ? contradictory
一 段 时间 来 看 , 这 一 运动 似乎 要 取得 成功 , 但 政治 事件 , 加 上 帕内尔 在 一个 令 人 愤慨 的 离婚案 中 被 称为 共同 答辩人 , 导致 许多 人 撤回 他们 的 支持 . 帕内尔 在 一个 令 人 愤慨 的 离婚 问题 上 的 法律 问题 使 这 场 运动 受到 了 影响 . entailment
看 在 这里 , 他 说 我们 不 希望 任何 律师 混在 这 一 点 . 他 说 看看 那 张 纸 neutral
Soderstrom 在 创伤 中心 进行 了 多次 筛选 测试 . 测试 必须 在 创伤 中心 进行 比较 , 否则 就 会 无效 . neutral
嗯 , 这 是 一 种 明显 的 我 的 意思 是 , 他们 甚至 把 它 带 到 现在 呢 , 他们 在 电视 上 做 广告 , 你 知道 如果 你 知道 , 如果 你 知道 这样 做 , 或者 如果 你 需要 这 个呃 , 我们 会 告 你 和 你 你 不用 给 我们 钱 , 但 他们 不 告诉 你 的 是 如果 他们 赢 了 你 给 他们 至少 三分之一 他们 赢 的 东西 , 所以 我 不 知道 它 是呃 , 它 得到 了 现在 做 更 多 的 生意 , 而 不 是呃 实际上 是 在 处理 犯罪 而 不 是 与 呃嗯 他们 的 律师 只 是 为了 钱 , 我 相信 , 我 知道 我 同意 你 , 我 认为 你 是 真实 的 你. 非常 正确 的 是 , 我 认为 他们 应该 有 同等 数量 的 你 知道 也许 他们 可以 有 几 个 , 但 我 认为 大多数 他们 应该 不 是 律师 在 事实 , 这 是 方式 他们 已经 进入 政治 , 这 是 因为 在 法律 上 , 你 知道 的 循环 和 一切 , 但 我 不 知道 我们 是 在 马里兰州 和呃 , 我们 有 同样 的 东西 人满为患 , 和呃 他们 让 他们 出来 我 的 意思 是 只 是 普通 的 监狱 判决 的 事情 , 他们 让. 他们 是 因为 他们 没有 任何 地方 可以 留住 他们 所以 你 可以 知道呃 , 除非 是 一个 重大 的 罪行 , 但呃 , 即使 是 小小的 东西 , 我 的 意思 是 那些 在 美国 失去 的 人 是 受害者 和 谁 可能 是 抢劫 或 毒品 , 或者 其他 什么 , 他们 是 谁 要 支付 , 他们 是 一个 会 受苦 , 另 一个 你 知道 的 人 , 如果 他们 被 逮捕 , 如果 他们 逮捕 他们嗯 , 然后 呢 , 你 知道 的 时间 法律 接管 了 一 半 时间 呃 他们 要么 让 他们 走 , 或者 他们 下 了 一个 句子 , 因为 他们 有 一个 律师 , 你 知道 的 感觉 他们 是 不 是 所有 在 一起 当 他们 做到 了 .它 我 不 知道 我们 怎么 到 这 一 点 , 虽然 . neutral

+ 5
- 0
tests/data_for_tests/io/ag/test.csv View File

@@ -0,0 +1,5 @@
"3","Fears for T N pension after talks","Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul."
"4","The Race is On: Second Private Team Sets Launch Date for Human Spaceflight (SPACE.com)","SPACE.com - TORONTO, Canada -- A second\team of rocketeers competing for the #36;10 million Ansari X Prize, a contest for\privately funded suborbital space flight, has officially announced the first\launch date for its manned rocket."
"4","Ky. Company Wins Grant to Study Peptides (AP)","AP - A company founded by a chemistry researcher at the University of Louisville won a grant to develop a method of producing better peptides, which are short chains of amino acids, the building blocks of proteins."
"4","Prediction Unit Helps Forecast Wildfires (AP)","AP - It's barely dawn when Mike Fitzpatrick starts his shift with a blur of colorful maps, figures and endless charts, but already he knows what the day will bring. Lightning will strike in places he expects. Winds will pick up, moist places will dry and flames will roar."
"4","Calif. Aims to Limit Farm-Related Smog (AP)","AP - Southern California's smog-fighting agency went after emissions of the bovine variety Friday, adopting the nation's first rules to reduce air pollution from dairy cow manure."

+ 4
- 0
tests/data_for_tests/io/ag/train.csv View File

@@ -0,0 +1,4 @@
"3","Wall St. Bears Claw Back Into the Black (Reuters)","Reuters - Short-sellers, Wall Street's dwindling\band of ultra-cynics, are seeing green again."
"4","Building Dedicated to Columbia Astronauts (AP)","AP - A former dormitory converted to classrooms at the Pensacola Naval Air Station was dedicated Friday to two Columbia astronauts who were among the seven who died in the shuttle disaster Feb. 1, 2003."
"2","Phelps On Relay Team","Michael Phelps is named to the 4x100-meter freestyle relay team that will compete in Sunday's final, keeping alive his quest for a possible eight Olympic gold medals."
"1","Venezuelans Vote Early in Referendum on Chavez Rule (Reuters)","Reuters - Venezuelans turned out early\and in large numbers on Sunday to vote in a historic referendum\that will either remove left-wing President Hugo Chavez from\office or give him a new mandate to govern for the next two\years."

+ 155
- 0
tests/data_for_tests/io/cmrc/dev.json View File

@@ -0,0 +1,155 @@
{
"version": "v1.0",
"data": [
{
"paragraphs": [
{
"id": "DEV_0",
"context": "《战国无双3》()是由光荣和ω-force开发的战国无双系列的正统第三续作。本作以三大故事为主轴,分别是以武田信玄等人为主的《关东三国志》,织田信长等人为主的《战国三杰》,石田三成等人为主的《关原的年轻武者》,丰富游戏内的剧情。此部份专门介绍角色,欲知武器情报、奥义字或擅长攻击类型等,请至战国无双系列1.由于乡里大辅先生因故去世,不得不寻找其他声优接手。从猛将传 and Z开始。2.战国无双 编年史的原创男女主角亦有专属声优。此模式是任天堂游戏谜之村雨城改编的新增模式。本作中共有20张战场地图(不含村雨城),后来发行的猛将传再新增3张战场地图。但游戏内战役数量繁多,部分地图会有兼用的状况,战役虚实则是以光荣发行的2本「战国无双3 人物真书」内容为主,以下是相关介绍。(注:前方加☆者为猛将传新增关卡及地图。)合并本篇和猛将传的内容,村雨城模式剔除,战国史模式可直接游玩。主打两大模式「战史演武」&「争霸演武」。系列作品外传作品",
"qas": [
{
"question": "《战国无双3》是由哪两个公司合作开发的?",
"id": "DEV_0_QUERY_0",
"answers": [
{
"text": "光荣和ω-force",
"answer_start": 11
},
{
"text": "光荣和ω-force",
"answer_start": 11
},
{
"text": "光荣和ω-force",
"answer_start": 11
}
]
},
{
"question": "男女主角亦有专属声优这一模式是由谁改编的?",
"id": "DEV_0_QUERY_1",
"answers": [
{
"text": "村雨城",
"answer_start": 226
},
{
"text": "村雨城",
"answer_start": 226
},
{
"text": "任天堂游戏谜之村雨城",
"answer_start": 219
}
]
},
{
"question": "战国史模式主打哪两个模式?",
"id": "DEV_0_QUERY_2",
"answers": [
{
"text": "「战史演武」&「争霸演武」",
"answer_start": 395
},
{
"text": "「战史演武」&「争霸演武」",
"answer_start": 395
},
{
"text": "「战史演武」&「争霸演武」",
"answer_start": 395
}
]
}
]
}
],
"id": "DEV_0",
"title": "战国无双3"
},
{
"paragraphs": [
{
"id": "DEV_1",
"context": "锣鼓经是大陆传统器乐及戏曲里面常用的打击乐记谱方法,以中文字的声音模拟敲击乐的声音,纪录打击乐的各种不同的演奏方法。常用的节奏型称为「锣鼓点」。而锣鼓是戏曲节奏的支柱,除了加强演员身段动作的节奏感,也作为音乐的引子和尾声,提示音乐的板式和速度,以及作为唱腔和念白的伴奏,令诗句的韵律更加抑扬顿锉,段落分明。锣鼓的运用有约定俗成的程式,依照角色行当的身份、性格、情绪以及环境,配合相应的锣鼓点。锣鼓亦可以模仿大自然的音响效果,如雷电、波浪等等。戏曲锣鼓所运用的敲击乐器主要分为鼓、锣、钹和板四类型:鼓类包括有单皮鼓(板鼓)、大鼓、大堂鼓(唐鼓)、小堂鼓、怀鼓、花盆鼓等;锣类有大锣、小锣(手锣)、钲锣、筛锣、马锣、镗锣、云锣;钹类有铙钹、大钹、小钹、水钹、齐钹、镲钹、铰子、碰钟等;打拍子用的檀板、木鱼、梆子等。因为京剧的锣鼓通常由四位乐师负责,又称为四大件,领奏的师傅称为:「鼓佬」,其职责有如西方乐队的指挥,负责控制速度以及利用各种手势提示乐师演奏不同的锣鼓点。粤剧吸收了部份京剧的锣鼓,但以木鱼和沙的代替了京剧的板和鼓,作为打拍子的主要乐器。以下是京剧、昆剧和粤剧锣鼓中乐器对应的口诀用字:",
"qas": [
{
"question": "锣鼓经是什么?",
"id": "DEV_1_QUERY_0",
"answers": [
{
"text": "大陆传统器乐及戏曲里面常用的打击乐记谱方法",
"answer_start": 4
},
{
"text": "大陆传统器乐及戏曲里面常用的打击乐记谱方法",
"answer_start": 4
},
{
"text": "大陆传统器乐及戏曲里面常用的打击乐记谱方法",
"answer_start": 4
}
]
},
{
"question": "锣鼓经常用的节奏型称为什么?",
"id": "DEV_1_QUERY_1",
"answers": [
{
"text": "锣鼓点",
"answer_start": 67
},
{
"text": "锣鼓点",
"answer_start": 67
},
{
"text": "锣鼓点",
"answer_start": 67
}
]
},
{
"question": "锣鼓经运用的程式是什么?",
"id": "DEV_1_QUERY_2",
"answers": [
{
"text": "依照角色行当的身份、性格、情绪以及环境,配合相应的锣鼓点。",
"answer_start": 167
},
{
"text": "依照角色行当的身份、性格、情绪以及环境,配合相应的锣鼓点。",
"answer_start": 167
},
{
"text": "依照角色行当的身份、性格、情绪以及环境,配合相应的锣鼓点",
"answer_start": 167
}
]
},
{
"question": "戏曲锣鼓所运用的敲击乐器主要有什么类型?",
"id": "DEV_1_QUERY_3",
"answers": [
{
"text": "鼓、锣、钹和板",
"answer_start": 237
},
{
"text": "鼓、锣、钹和板",
"answer_start": 237
},
{
"text": "鼓、锣、钹和板",
"answer_start": 237
}
]
}
]
}
],
"id": "DEV_1",
"title": "锣鼓经"
}
]
}

+ 161
- 0
tests/data_for_tests/io/cmrc/train.json View File

@@ -0,0 +1,161 @@
{
"version": "v1.0",
"data": [
{
"paragraphs": [
{
"id": "TRAIN_186",
"context": "范廷颂枢机(,),圣名保禄·若瑟(),是越南罗马天主教枢机。1963年被任为主教;1990年被擢升为天主教河内总教区宗座署理;1994年被擢升为总主教,同年年底被擢升为枢机;2009年2月离世。范廷颂于1919年6月15日在越南宁平省天主教发艳教区出生;童年时接受良好教育后,被一位越南神父带到河内继续其学业。范廷颂于1940年在河内大修道院完成神学学业。范廷颂于1949年6月6日在河内的主教座堂晋铎;及后被派到圣女小德兰孤儿院服务。1950年代,范廷颂在河内堂区创建移民接待中心以收容到河内避战的难民。1954年,法越战争结束,越南民主共和国建都河内,当时很多天主教神职人员逃至越南的南方,但范廷颂仍然留在河内。翌年管理圣若望小修院;惟在1960年因捍卫修院的自由、自治及拒绝政府在修院设政治课的要求而被捕。1963年4月5日,教宗任命范廷颂为天主教北宁教区主教,同年8月15日就任;其牧铭为「我信天主的爱」。由于范廷颂被越南政府软禁差不多30年,因此他无法到所属堂区进行牧灵工作而专注研读等工作。范廷颂除了面对战争、贫困、被当局迫害天主教会等问题外,也秘密恢复修院、创建女修会团体等。1990年,教宗若望保禄二世在同年6月18日擢升范廷颂为天主教河内总教区宗座署理以填补该教区总主教的空缺。1994年3月23日,范廷颂被教宗若望保禄二世擢升为天主教河内总教区总主教并兼天主教谅山教区宗座署理;同年11月26日,若望保禄二世擢升范廷颂为枢机。范廷颂在1995年至2001年期间出任天主教越南主教团主席。2003年4月26日,教宗若望保禄二世任命天主教谅山教区兼天主教高平教区吴光杰主教为天主教河内总教区署理主教;及至2005年2月19日,范廷颂因获批辞去总主教职务而荣休;吴光杰同日真除天主教河内总教区总主教职务。范廷颂于2009年2月22日清晨在河内离世,享年89岁;其葬礼于同月26日上午在天主教河内总教区总主教座堂举行。",
"qas": [
{
"question": "范廷颂是什么时候被任为主教的?",
"id": "TRAIN_186_QUERY_0",
"answers": [
{
"text": "1963年",
"answer_start": 30
}
]
},
{
"question": "1990年,范廷颂担任什么职务?",
"id": "TRAIN_186_QUERY_1",
"answers": [
{
"text": "1990年被擢升为天主教河内总教区宗座署理",
"answer_start": 41
}
]
},
{
"question": "范廷颂是于何时何地出生的?",
"id": "TRAIN_186_QUERY_2",
"answers": [
{
"text": "范廷颂于1919年6月15日在越南宁平省天主教发艳教区出生",
"answer_start": 97
}
]
},
{
"question": "1994年3月,范廷颂担任什么职务?",
"id": "TRAIN_186_QUERY_3",
"answers": [
{
"text": "1994年3月23日,范廷颂被教宗若望保禄二世擢升为天主教河内总教区总主教并兼天主教谅山教区宗座署理",
"answer_start": 548
}
]
},
{
"question": "范廷颂是何时去世的?",
"id": "TRAIN_186_QUERY_4",
"answers": [
{
"text": "范廷颂于2009年2月22日清晨在河内离世",
"answer_start": 759
}
]
}
]
}
],
"id": "TRAIN_186",
"title": "范廷颂"
},
{
"paragraphs": [
{
"id": "TRAIN_54",
"context": "安雅·罗素法(,),来自俄罗斯圣彼得堡的模特儿。她是《全美超级模特儿新秀大赛》第十季的亚军。2008年,安雅宣布改回出生时的名字:安雅·罗素法(Anya Rozova),在此之前是使用安雅·冈()。安雅于俄罗斯出生,后来被一个居住在美国夏威夷群岛欧胡岛檀香山的家庭领养。安雅十七岁时曾参与香奈儿、路易·威登及芬迪(Fendi)等品牌的非正式时装秀。2007年,她于瓦伊帕胡高级中学毕业。毕业后,她当了一名售货员。她曾为Russell Tanoue拍摄照片,Russell Tanoue称赞她是「有前途的新面孔」。安雅在半准决赛面试时说她对模特儿行业充满热诚,所以参加全美超级模特儿新秀大赛。她于比赛中表现出色,曾五次首名入围,平均入围顺序更拿下历届以来最优异的成绩(2.64),另外胜出三次小挑战,分别获得与评判尼祖·百克拍照、为柠檬味道的七喜拍摄广告的机会及十万美元、和盖马蒂洛(Gai Mattiolo)设计的晚装。在最后两强中,安雅与另一名参赛者惠妮·汤姆森为范思哲走秀,但评判认为她在台上不够惠妮突出,所以选了惠妮当冠军,安雅屈居亚军(但就整体表现来说,部份网友认为安雅才是第十季名副其实的冠军。)安雅在比赛拿五次第一,也胜出多次小挑战。安雅赛后再次与Russell Tanoue合作,为2008年4月30日出版的MidWeek杂志拍摄封面及内页照。其后她参加了V杂志与Supreme模特儿公司合办的模特儿选拔赛2008。她其后更与Elite签约。最近她与香港的模特儿公司 Style International Management 签约,并在香港发展其模特儿事业。她曾在很多香港的时装杂志中任模特儿,《Jet》、《东方日报》、《Elle》等。",
"qas": [
{
"question": "安雅·罗素法参加了什么比赛获得了亚军?",
"id": "TRAIN_54_QUERY_0",
"answers": [
{
"text": "《全美超级模特儿新秀大赛》第十季",
"answer_start": 26
}
]
},
{
"question": "Russell Tanoue对安雅·罗素法的评价是什么?",
"id": "TRAIN_54_QUERY_1",
"answers": [
{
"text": "有前途的新面孔",
"answer_start": 247
}
]
},
{
"question": "安雅·罗素法合作过的香港杂志有哪些?",
"id": "TRAIN_54_QUERY_2",
"answers": [
{
"text": "《Jet》、《东方日报》、《Elle》等",
"answer_start": 706
}
]
},
{
"question": "毕业后的安雅·罗素法职业是什么?",
"id": "TRAIN_54_QUERY_3",
"answers": [
{
"text": "售货员",
"answer_start": 202
}
]
}
]
}
],
"id": "TRAIN_54",
"title": "安雅·罗素法"
},
{
"paragraphs": [
{
"id": "TRAIN_756",
"context": "为日本漫画足球小将翼的一个角色,自小父母离异,与父亲一起四处为家,每个地方也是待一会便离开,但他仍然能够保持优秀的学业成绩。在第一次南葛市生活时,与同样就读于南葛小学的大空翼为黄金拍档,曾效力球队包括南葛小学、南葛高中、日本少年队、日本青年军、日本奥运队。效力日本青年军期间,因救同母异父的妹妹导致被车撞至断脚,在决赛周只在决赛的下半场十五分钟开始上场,成为日本队夺得世青冠军的其中一名功臣。基本资料绰号:球场上的艺术家出身地:日本南葛市诞生日:5月5日星座:金牛座球衣号码:11担任位置:中场、攻击中场、右中场擅长脚:右脚所属队伍:盘田山叶故事发展岬太郎在小学期间不断转换学校,在南葛小学就读时在全国大赛中夺得冠军;国中三年随父亲孤单地在法国留学;回国后三年的高中生涯一直输给日本王牌射手日向小次郎率领的东邦学院。在【Golden 23】年代,大空翼、日向小次郎等名将均转战海外,他与松山光、三杉淳组成了「3M」组合(松山光Hikaru Matsuyama、岬太郎Taro Misaki、三杉淳Jyun Misugi)。必杀技1. 回力刀射门2. S. S. S. 射门3. 双人射门(与大空翼合作)",
"qas": [
{
"question": "岬太郎在第一次南葛市生活时的搭档是谁?",
"id": "TRAIN_756_QUERY_0",
"answers": [
{
"text": "大空翼",
"answer_start": 84
}
]
},
{
"question": "日本队夺得世青冠军,岬太郎发挥了什么作用?",
"id": "TRAIN_756_QUERY_1",
"answers": [
{
"text": "在决赛周只在决赛的下半场十五分钟开始上场,成为日本队夺得世青冠军的其中一名功臣。",
"answer_start": 156
}
]
},
{
"question": "岬太郎与谁一起组成了「3M」组合?",
"id": "TRAIN_756_QUERY_2",
"answers": [
{
"text": "他与松山光、三杉淳组成了「3M」组合(松山光Hikaru Matsuyama、岬太郎Taro Misaki、三杉淳Jyun Misugi)。",
"answer_start": 391
}
]
}
]
}
],
"id": "TRAIN_756",
"title": "岬太郎"
}
]
}

+ 4
- 0
tests/data_for_tests/io/cnndm/dev.label.jsonl
File diff suppressed because it is too large
View File


+ 4
- 0
tests/data_for_tests/io/cnndm/test.label.jsonl
File diff suppressed because it is too large
View File


+ 10
- 0
tests/data_for_tests/io/cnndm/train.cnndm.jsonl
File diff suppressed because it is too large
View File


+ 100
- 0
tests/data_for_tests/io/cnndm/vocab View File

@@ -0,0 +1,100 @@
. 12172211
the 11896296
, 9609022
to 5751102
a 5100569
and 4892246
of 4867879
in 4431149
's 2202754
was 2086001
for 1995054
that 1944328
' 1880335
on 1858606
` 1821696
is 1797908
he 1678396
it 1603145
with 1497568
said 1348297
: 1344327
his 1302056
at 1260578
as 1230256
i 1089458
by 1064355
have 1016505
from 1015625
has 969042
her 935151
be 932950
'' 904149
`` 898933
but 884494
are 865728
she 850971
they 816011
an 766001
not 738121
had 725375
who 722127
this 721027
after 669231
were 655187
been 647432
their 645014
we 625684
will 577581
when 506811
-rrb- 501827
n't 499765
-lrb- 497508
one 490666
which 465040
you 461359
-- 460450
up 437177
more 433177
out 432343
about 428037
would 400420
- 399113
or 399001
there 389590
people 386121
new 380970
also 380041
all 350670
two 343787
can 341110
him 338345
do 330166
into 319067
last 315857
so 308507
than 306701
just 305759
time 302071
police 301341
could 298919
told 298384
over 297568
if 297292
what 293759
years 288999
first 283683
no 274488
my 273829
year 272392
them 270715
its 269566
now 262011
before 260991
mr 250970
other 247663
some 245191
being 243458
home 229570
like 229425
did 227833

+ 49
- 0
tests/data_for_tests/io/conll2003/dev.txt View File

@@ -0,0 +1,49 @@
-DOCSTART- -X- -X- O

CRICKET NNP B-NP O
- : O O
LEICESTERSHIRE NNP B-NP B-ORG
TAKE NNP I-NP O
OVER IN B-PP O
AT NNP B-NP O
TOP NNP I-NP O
AFTER NNP I-NP O
INNINGS NNP I-NP O
VICTORY NN I-NP O
. . O O

LONDON NNP B-NP B-LOC
1996-08-30 CD I-NP O

Phil NNP B-NP B-PER
Simmons NNP I-NP I-PER
took VBD B-VP O
four CD B-NP O
for IN B-PP O
38 CD B-NP O
on IN B-PP O
Friday NNP B-NP O
as IN B-PP O
Leicestershire NNP B-NP B-ORG
beat VBD B-VP O
Somerset NNP B-NP B-ORG
by IN B-PP O
an DT B-NP O
innings NN I-NP O
and CC O O
39 CD B-NP O
runs NNS I-NP O
in IN B-PP O
two CD B-NP O
days NNS I-NP O
to TO B-VP O
take VB I-VP O
over IN B-PP O
at IN B-PP O
the DT B-NP O
head NN I-NP O
of IN B-PP O
the DT B-NP O
county NN I-NP O
championship NN I-NP O
. . O O

+ 51
- 0
tests/data_for_tests/io/conll2003/test.txt View File

@@ -0,0 +1,51 @@
-DOCSTART- -X- -X- O

SOCCER NN B-NP O
- : O O
JAPAN NNP B-NP B-LOC
GET VB B-VP O
LUCKY NNP B-NP O
WIN NNP I-NP O
, , O O
THE NP B-NP B-PER
CHINA NNP I-NP I-PER
IN IN B-PP O
SURPRISE DT B-NP O
DEFEAT NN I-NP O
. . O O

Nadim NNP B-NP B-PER
Ladki NNP I-NP I-PER

AL-AIN NNP B-NP B-LOC
, , O O
United NNP B-NP B-LOC
Arab NNP I-NP I-LOC
Emirates NNPS I-NP I-LOC
1996-12-06 CD I-NP O

Japan NNP B-NP B-LOC
began VBD B-VP O
the DT B-NP O
defence NN I-NP O
of IN B-PP O
their PRP$ B-NP O
Asian JJ I-NP B-MISC
Cup NNP I-NP I-MISC
title NN I-NP O
with IN B-PP O
a DT B-NP O
lucky JJ I-NP O
2-1 CD I-NP O
win VBP B-VP O
against IN B-PP O
Syria NNP B-NP B-LOC
in IN B-PP O
a DT B-NP O
Group NNP I-NP O
C NNP I-NP O
championship NN I-NP O
match NN I-NP O
on IN B-PP O
Friday NNP B-NP O
. . O O

+ 48
- 0
tests/data_for_tests/io/conll2003/train.txt View File

@@ -0,0 +1,48 @@
-DOCSTART- -X- -X- O

EU NNP B-NP B-ORG
rejects VBZ B-VP O
German JJ B-NP B-MISC
call NN I-NP O
to TO B-VP O
boycott VB I-VP O
British JJ B-NP B-MISC
lamb NN I-NP O
. . O O

Peter NNP B-NP B-PER
Blackburn NNP I-NP I-PER

BRUSSELS NNP B-NP B-LOC
1996-08-22 CD I-NP O

The DT B-NP O
European NNP I-NP B-ORG
Commission NNP I-NP I-ORG
said VBD B-VP O
on IN B-PP O
Thursday NNP B-NP O
it PRP B-NP O
disagreed VBD B-VP O
with IN B-PP O
German JJ B-NP B-MISC
advice NN I-NP O
to TO B-PP O
consumers NNS B-NP O
to TO B-VP O
shun VB I-VP O
British JJ B-NP B-MISC
lamb NN I-NP O
until IN B-SBAR O
scientists NNS B-NP O
determine VBP B-VP O
whether IN B-SBAR O
mad JJ B-NP O
cow NN I-NP O
disease NN I-NP O
can MD B-VP O
be VB I-VP O
transmitted VBN I-VP O
to TO B-PP O
sheep NN B-NP O
. . O O

+ 6
- 0
tests/data_for_tests/io/cws_as/dev.txt View File

@@ -0,0 +1,6 @@
時間 :
三月 十日 ( 星期四 ) 上午 十時 。
並 辦理 加州 大學 退休 等 手續 。
包括 一九七八年 獲有 數學 諾貝爾 之 稱 的 費爾茲獎 ,
在 台大 的 四 年 裡 ,
他 語重心長 的 勉勵 同學 們 一 番 話 ,

+ 6
- 0
tests/data_for_tests/io/cws_as/test.txt View File

@@ -0,0 +1,6 @@
許多 社區 長青 學苑 多 開設 有 書法 、 插花 、 土風舞班 ,
文山區 長青 學苑 則 有 個 十分 特別 的 「 英文 歌唱班 」 ,
成員 年齡 均 超過 六十 歲 ,
這 群 白髮蒼蒼 ,
爺爺 、 奶奶級 的 學員 唱起 英文 歌 來 字正腔圓 ,
有模有樣 。

+ 6
- 0
tests/data_for_tests/io/cws_as/train.txt View File

@@ -0,0 +1,6 @@
地點 :
學術 活動 中心 一樓 簡報室 。
主講 :
民族所 所長 莊英章 先生 。
講題 :
閩 、 台 漢人 社會 研究 的 若干 考察 。

+ 6
- 0
tests/data_for_tests/io/cws_cityu/dev.txt View File

@@ -0,0 +1,6 @@
立會 選情 告一段落 民主 進程 還 看 明天
所謂 「 左 」 的 勢力 , 是 指 以 鄭經翰 、 梁國雄 ( 長毛 ) 為 代表 的 激進 民主 勢力 , 他們 尖銳 批評 中央 和 特區 政府 , 積極 為 基層 勞工 爭取 福利 , 可能 會 為 民主派 與 中央 和解 增加 困難 , 牽制 民主黨 走 中產 溫和 路線 。
特區 政府 應該 積極 與 民主派 改善 關係 , 尤其 要 爭取 中間 及 「 右 」 翼 的 民主 勢力 , 因為 這些 人 背後 反映 的 是 香港 的 主流 民意 , 除了 民主 步伐 和 涉及 中央 的 敏感 政治 議題 , 他們 和 建制派 的 溫和 力量 沒有 基本 不同 , 很 容易 達成 跨 黨派 的 共識 , 令 特區 政府 處於 不得不 從 的 被動 位置 , 23 條 立法 撤回 、 追究 SARS 責任 等 , 都 是 記憶猶新 的 例子 。
為 何秀蘭 喝彩 為 香港 人 神傷
單說 立法會 , 自 91 年 以來 , 經歷 5 次 類似 的 地區 直選 。
點票 過程 出現 的 笑話 更 多 。

+ 6
- 0
tests/data_for_tests/io/cws_cityu/test.txt View File

@@ -0,0 +1,6 @@
「 練 得 銅皮鐵骨 」 露宿 早 慣 蚊叮
本 港 約 有 450 至 600 名 露宿者 , 其中 近 四分之一 , 即 約 150 人 露宿 在 深水埗 。
有 外展 社工 稱 , 露宿者 日間 多 到 商場 等 冷氣 場所 避暑 , 流連 至 晚上 11 、 12 時 , 才 用 紙皮 在 公園 外 「 打地鋪 」 , 他們 早已 「 練 得 一 身 銅皮鐵骨 」 , 徹夜 被 蚊 叮 也 習以為常 , 但 社工 在 炎夏 仍 會 頻頻 給 他們 派發 蚊香 。
基督教 關懷 無家者 協會 的 外展 社工 , 過去 一直 有 探訪 李鄭屋 遊樂場 外 的 露宿者 , 該 會 總幹事 賴淑芬 說 , 該 處 的 露宿者 只 有 數 人 , 且 流動性 很 大 。
不管 被 多少 蚊 叮 也 沒 什 感覺
她 指 這些 露宿者 日間 都 會 流連 於 冷氣 場所 , 晚上 才 到 遊樂場 露宿 , 但 礙於 遊樂場 晚上 關門 , 他們 只 可 在 外圍 「 打地鋪 」 。

+ 6
- 0
tests/data_for_tests/io/cws_cityu/train.txt View File

@@ -0,0 +1,6 @@
立法會 選舉 出現 了 戲劇性 的 結果 , 儘管 投票率 創下 新高 , 而 過去 經驗 顯示 高 投票率 對 民主派 較 有利 , 但 由於 名單 協調 不當 及 配票 策略 失誤 , 加上 醜聞 影響 選情 , 民主黨 的 議席 比 上 一 屆 減少 , 由 第 一 大 黨 跌 至 第 三 ;
而 泛民主派 在 30 席 普選 中 亦 只能 取得 18 席 , 比 選前 預期 的 20 席 少 ;
但 在 功能 組別 選舉 卻 有 意外 收穫 , 除 保住 原有 的 5 個 議席 , 還 搶佔 了 醫學 和 會計 兩 個 專業 界別 , 令 議席 總數 達到 25 席 , 比 上 一 屆 多 了 3 席 。
更 值得 注意 的 是 , 泛民主派 候選人 在 普選 中 合共 取得 110萬 張 選票 , 佔 178萬 選票 總數 的 62 % , 顯示 多數 市民 認同 早日 實現 全面 普選 的 民主 訴求 , 這 一 點 應 為 政府 及 各 黨派 人士 所 尊重 。
須 為 2012 全面 普選 創造 條件
親 建制 陣營 方面 , 民建聯 和 自由黨 都 取得 佳績 , 分別 取得 12 席 和 11 席 , 成為 立法會 內 的 第 一 及 第 二 大 黨 。

+ 2
- 0
tests/data_for_tests/io/cws_msra/dev.txt View File

@@ -0,0 +1,2 @@
“ 人们 常 说 生活 是 一 部 教科书 , 而 血 与 火 的 战争 更 是 不可多得 的 教科书 , 她 确实 是 名副其实 的 ‘ 我 的 大学 ’ 。
他 “ 严格要求 自己 , 从 一个 科举 出身 的 进士 成为 一个 伟大 的 民主主义 者 , 进而 成为 一 位 杰出 的 党外 共产主义 战士 , 献身 于 崇高 的 共产主义 事业 。

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save