|
|
@@ -88,6 +88,7 @@ class DataSetLoader(BaseLoader): |
|
|
|
raise NotImplementedError |
|
|
|
|
|
|
|
|
|
|
|
@DataSet.set_reader('read_raw') |
|
|
|
class RawDataSetLoader(DataSetLoader): |
|
|
|
def __init__(self): |
|
|
|
super(RawDataSetLoader, self).__init__() |
|
|
@@ -103,6 +104,7 @@ class RawDataSetLoader(DataSetLoader): |
|
|
|
return convert_seq_dataset(data) |
|
|
|
|
|
|
|
|
|
|
|
@DataSet.set_reader('read_pos') |
|
|
|
class POSDataSetLoader(DataSetLoader): |
|
|
|
"""Dataset Loader for POS Tag datasets. |
|
|
|
|
|
|
@@ -172,6 +174,7 @@ class POSDataSetLoader(DataSetLoader): |
|
|
|
return convert_seq2seq_dataset(data) |
|
|
|
|
|
|
|
|
|
|
|
@DataSet.set_reader('read_tokenize') |
|
|
|
class TokenizeDataSetLoader(DataSetLoader): |
|
|
|
""" |
|
|
|
Data set loader for tokenization data sets |
|
|
@@ -231,6 +234,7 @@ class TokenizeDataSetLoader(DataSetLoader): |
|
|
|
return convert_seq2seq_dataset(data) |
|
|
|
|
|
|
|
|
|
|
|
@DataSet.set_reader('read_class') |
|
|
|
class ClassDataSetLoader(DataSetLoader): |
|
|
|
"""Loader for classification data sets""" |
|
|
|
|
|
|
@@ -269,6 +273,7 @@ class ClassDataSetLoader(DataSetLoader): |
|
|
|
return convert_seq2tag_dataset(data) |
|
|
|
|
|
|
|
|
|
|
|
@DataSet.set_reader('read_conll') |
|
|
|
class ConllLoader(DataSetLoader): |
|
|
|
"""loader for conll format files""" |
|
|
|
|
|
|
@@ -310,6 +315,7 @@ class ConllLoader(DataSetLoader): |
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
@DataSet.set_reader('read_lm') |
|
|
|
class LMDataSetLoader(DataSetLoader): |
|
|
|
"""Language Model Dataset Loader |
|
|
|
|
|
|
@@ -346,6 +352,7 @@ class LMDataSetLoader(DataSetLoader): |
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
@DataSet.set_reader('read_people_daily') |
|
|
|
class PeopleDailyCorpusLoader(DataSetLoader): |
|
|
|
""" |
|
|
|
People Daily Corpus: Chinese word segmentation, POS tag, NER |
|
|
|