@@ -72,8 +72,8 @@ class BertEmbedding(ContextualEmbedding): | |||||
if model_dir_or_name.lower() in PRETRAINED_BERT_MODEL_DIR: | if model_dir_or_name.lower() in PRETRAINED_BERT_MODEL_DIR: | ||||
if 'cn' in model_dir_or_name.lower() and pool_method not in ('first', 'last'): | if 'cn' in model_dir_or_name.lower() and pool_method not in ('first', 'last'): | ||||
logger.warn("For Chinese bert, pooled_method should choose from 'first', 'last' in order to achieve" | |||||
" faster speed.") | |||||
logger.warning("For Chinese bert, pooled_method should choose from 'first', 'last' in order to achieve" | |||||
" faster speed.") | |||||
warnings.warn("For Chinese bert, pooled_method should choose from 'first', 'last' in order to achieve" | warnings.warn("For Chinese bert, pooled_method should choose from 'first', 'last' in order to achieve" | ||||
" faster speed.") | " faster speed.") | ||||
@@ -111,7 +111,7 @@ def _read_conll(path, encoding='utf-8', indexes=None, dropna=True): | |||||
yield line_idx, res | yield line_idx, res | ||||
except Exception as e: | except Exception as e: | ||||
if dropna: | if dropna: | ||||
logger.warn('Invalid instance which ends at line: {} has been dropped.'.format(line_idx)) | |||||
logger.warning('Invalid instance which ends at line: {} has been dropped.'.format(line_idx)) | |||||
continue | continue | ||||
raise ValueError('Invalid instance which ends at line: {}'.format(line_idx)) | raise ValueError('Invalid instance which ends at line: {}'.format(line_idx)) | ||||
elif line.startswith('#'): | elif line.startswith('#'): | ||||
@@ -222,8 +222,8 @@ def _get_base_url(name): | |||||
return url + '/' | return url + '/' | ||||
else: | else: | ||||
URLS = { | URLS = { | ||||
'embedding': "http://dbcloud.irocn.cn:8989/api/public/dl/", | |||||
"dataset": "http://dbcloud.irocn.cn:8989/api/public/dl/dataset/" | |||||
'embedding': "http://fudan.irocn.cn:8989/api/public/dl/", | |||||
"dataset": "http://fudan.irocn.cn:8989/api/public/dl/dataset/" | |||||
} | } | ||||
if name.lower() not in URLS: | if name.lower() not in URLS: | ||||
raise KeyError(f"{name} is not recognized.") | raise KeyError(f"{name} is not recognized.") | ||||
@@ -387,7 +387,7 @@ class SST2Pipe(_CLSPipe): | |||||
f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ | f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ | ||||
f"data set but not in train data set!." | f"data set but not in train data set!." | ||||
warnings.warn(warn_msg) | warnings.warn(warn_msg) | ||||
logger.warn(warn_msg) | |||||
logger.warning(warn_msg) | |||||
datasets = [] | datasets = [] | ||||
for name, dataset in data_bundle.datasets.items(): | for name, dataset in data_bundle.datasets.items(): | ||||
if dataset.has_field(Const.TARGET): | if dataset.has_field(Const.TARGET): | ||||
@@ -121,7 +121,7 @@ class MatchingBertPipe(Pipe): | |||||
f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ | f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ | ||||
f"data set but not in train data set!." | f"data set but not in train data set!." | ||||
warnings.warn(warn_msg) | warnings.warn(warn_msg) | ||||
logger.warn(warn_msg) | |||||
logger.warning(warn_msg) | |||||
has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if | has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if | ||||
dataset.has_field(Const.TARGET)] | dataset.has_field(Const.TARGET)] | ||||
@@ -258,7 +258,7 @@ class MatchingPipe(Pipe): | |||||
f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ | f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ | ||||
f"data set but not in train data set!." | f"data set but not in train data set!." | ||||
warnings.warn(warn_msg) | warnings.warn(warn_msg) | ||||
logger.warn(warn_msg) | |||||
logger.warning(warn_msg) | |||||
has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if | has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if | ||||
dataset.has_field(Const.TARGET)] | dataset.has_field(Const.TARGET)] | ||||
@@ -130,11 +130,12 @@ def _indexize(data_bundle, input_field_names=Const.INPUT, target_field_names=Con | |||||
if ('train' not in name) and (ds.has_field(target_field_name))] | if ('train' not in name) and (ds.has_field(target_field_name))] | ||||
) | ) | ||||
if len(tgt_vocab._no_create_word) > 0: | if len(tgt_vocab._no_create_word) > 0: | ||||
warn_msg = f"There are {len(tgt_vocab._no_create_word)} target labels" \ | |||||
warn_msg = f"There are {len(tgt_vocab._no_create_word)} `{target_field_name}` labels" \ | |||||
f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ | f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ | ||||
f"data set but not in train data set!." | |||||
f"data set but not in train data set!.\n" \ | |||||
f"These label(s) are {tgt_vocab._no_create_word}" | |||||
warnings.warn(warn_msg) | warnings.warn(warn_msg) | ||||
logger.warn(warn_msg) | |||||
logger.warning(warn_msg) | |||||
tgt_vocab.index_dataset(*data_bundle.datasets.values(), field_name=target_field_name) | tgt_vocab.index_dataset(*data_bundle.datasets.values(), field_name=target_field_name) | ||||
data_bundle.set_vocab(tgt_vocab, target_field_name) | data_bundle.set_vocab(tgt_vocab, target_field_name) | ||||
@@ -65,7 +65,7 @@ class BertForSequenceClassification(BaseModel): | |||||
self.bert.model.include_cls_sep = True | self.bert.model.include_cls_sep = True | ||||
warn_msg = "Bert for sequence classification excepts BertEmbedding `include_cls_sep` True, " \ | warn_msg = "Bert for sequence classification excepts BertEmbedding `include_cls_sep` True, " \ | ||||
"but got False. FastNLP has changed it to True." | "but got False. FastNLP has changed it to True." | ||||
logger.warn(warn_msg) | |||||
logger.warning(warn_msg) | |||||
warnings.warn(warn_msg) | warnings.warn(warn_msg) | ||||
def forward(self, words): | def forward(self, words): | ||||
@@ -110,7 +110,7 @@ class BertForSentenceMatching(BaseModel): | |||||
self.bert.model.include_cls_sep = True | self.bert.model.include_cls_sep = True | ||||
warn_msg = "Bert for sentence matching excepts BertEmbedding `include_cls_sep` True, " \ | warn_msg = "Bert for sentence matching excepts BertEmbedding `include_cls_sep` True, " \ | ||||
"but got False. FastNLP has changed it to True." | "but got False. FastNLP has changed it to True." | ||||
logger.warn(warn_msg) | |||||
logger.warning(warn_msg) | |||||
warnings.warn(warn_msg) | warnings.warn(warn_msg) | ||||
def forward(self, words): | def forward(self, words): | ||||
@@ -156,7 +156,7 @@ class BertForMultipleChoice(BaseModel): | |||||
self.bert.model.include_cls_sep = True | self.bert.model.include_cls_sep = True | ||||
warn_msg = "Bert for multiple choice excepts BertEmbedding `include_cls_sep` True, " \ | warn_msg = "Bert for multiple choice excepts BertEmbedding `include_cls_sep` True, " \ | ||||
"but got False. FastNLP has changed it to True." | "but got False. FastNLP has changed it to True." | ||||
logger.warn(warn_msg) | |||||
logger.warning(warn_msg) | |||||
warnings.warn(warn_msg) | warnings.warn(warn_msg) | ||||
def forward(self, words): | def forward(self, words): | ||||
@@ -206,7 +206,7 @@ class BertForTokenClassification(BaseModel): | |||||
self.bert.model.include_cls_sep = False | self.bert.model.include_cls_sep = False | ||||
warn_msg = "Bert for token classification excepts BertEmbedding `include_cls_sep` False, " \ | warn_msg = "Bert for token classification excepts BertEmbedding `include_cls_sep` False, " \ | ||||
"but got True. FastNLP has changed it to False." | "but got True. FastNLP has changed it to False." | ||||
logger.warn(warn_msg) | |||||
logger.warning(warn_msg) | |||||
warnings.warn(warn_msg) | warnings.warn(warn_msg) | ||||
def forward(self, words): | def forward(self, words): | ||||
@@ -250,7 +250,7 @@ class BertForQuestionAnswering(BaseModel): | |||||
self.bert.model.include_cls_sep = True | self.bert.model.include_cls_sep = True | ||||
warn_msg = "Bert for question answering excepts BertEmbedding `include_cls_sep` True, " \ | warn_msg = "Bert for question answering excepts BertEmbedding `include_cls_sep` True, " \ | ||||
"but got False. FastNLP has changed it to True." | "but got False. FastNLP has changed it to True." | ||||
logger.warn(warn_msg) | |||||
logger.warning(warn_msg) | |||||
warnings.warn(warn_msg) | warnings.warn(warn_msg) | ||||
def forward(self, words): | def forward(self, words): | ||||
@@ -488,10 +488,10 @@ class BertModel(nn.Module): | |||||
load(model, prefix='' if hasattr(model, 'bert') else 'bert.') | load(model, prefix='' if hasattr(model, 'bert') else 'bert.') | ||||
if len(missing_keys) > 0: | if len(missing_keys) > 0: | ||||
logger.warn("Weights of {} not initialized from pretrained model: {}".format( | |||||
logger.warning("Weights of {} not initialized from pretrained model: {}".format( | |||||
model.__class__.__name__, missing_keys)) | model.__class__.__name__, missing_keys)) | ||||
if len(unexpected_keys) > 0: | if len(unexpected_keys) > 0: | ||||
logger.warn("Weights from pretrained model not used in {}: {}".format( | |||||
logger.warning("Weights from pretrained model not used in {}: {}".format( | |||||
model.__class__.__name__, unexpected_keys)) | model.__class__.__name__, unexpected_keys)) | ||||
logger.info(f"Load pre-trained BERT parameters from file {weights_path}.") | logger.info(f"Load pre-trained BERT parameters from file {weights_path}.") | ||||
@@ -800,7 +800,7 @@ class BertTokenizer(object): | |||||
for token in tokens: | for token in tokens: | ||||
ids.append(self.vocab[token]) | ids.append(self.vocab[token]) | ||||
if len(ids) > self.max_len: | if len(ids) > self.max_len: | ||||
logger.warn( | |||||
logger.warning( | |||||
"Token indices sequence length is longer than the specified maximum " | "Token indices sequence length is longer than the specified maximum " | ||||
" sequence length for this BERT model ({} > {}). Running this" | " sequence length for this BERT model ({} > {}). Running this" | ||||
" sequence through BERT will result in indexing errors".format(len(ids), self.max_len) | " sequence through BERT will result in indexing errors".format(len(ids), self.max_len) | ||||
@@ -824,8 +824,8 @@ class BertTokenizer(object): | |||||
with open(vocab_file, "w", encoding="utf-8") as writer: | with open(vocab_file, "w", encoding="utf-8") as writer: | ||||
for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): | for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): | ||||
if index != token_index: | if index != token_index: | ||||
logger.warn("Saving vocabulary to {}: vocabulary indices are not consecutive." | |||||
" Please check that the vocabulary is not corrupted!".format(vocab_file)) | |||||
logger.warning("Saving vocabulary to {}: vocabulary indices are not consecutive." | |||||
" Please check that the vocabulary is not corrupted!".format(vocab_file)) | |||||
index = token_index | index = token_index | ||||
writer.write(token + u'\n') | writer.write(token + u'\n') | ||||
index += 1 | index += 1 | ||||
@@ -0,0 +1,10 @@ | |||||
bc/msnbc/00/msnbc_0000 0 0 Hi UH (TOP(FRAG(INTJ*) - - - Dan_Abrams * - | |||||
bc/msnbc/00/msnbc_0000 0 1 everyone NN (NP*) - - - Dan_Abrams * - | |||||
bc/msnbc/00/msnbc_0000 0 2 /. . *)) - - - Dan_Abrams * - | |||||
bc/msnbc/00/msnbc_0000 0 0 first RB (TOP(S(ADVP* - - - Dan_Abrams * (ARGM-TMP* * * * - | |||||
bc/msnbc/00/msnbc_0000 0 1 up RB * - - - Dan_Abrams * * * * * - | |||||
bc/msnbc/00/msnbc_0000 0 2 on IN (PP* - - - Dan_Abrams * * * * * - | |||||
bc/msnbc/00/msnbc_0000 0 3 the DT (NP* - - - Dan_Abrams * * * * * - | |||||
bc/msnbc/00/msnbc_0000 0 4 docket NN *)) docket - - Dan_Abrams * * * * * - |
@@ -0,0 +1,10 @@ | |||||
bc/msnbc/00/msnbc_0007 0 0 Dealing VBG (TOP(VP* deal 01 - speaker_1 * (V*) - | |||||
bc/msnbc/00/msnbc_0007 0 1 with IN (PP* - - - speaker_1 * (ARG1* - | |||||
bc/msnbc/00/msnbc_0007 0 2 serial JJ (NP(NP* - - - speaker_1 * * (156 | |||||
bc/msnbc/00/msnbc_0007 0 3 crimes NNS *) crime - 1 speaker_1 * * 156) | |||||
bc/msnbc/00/msnbc_0007 0 4 per FW (ADVP* - - - speaker_1 * * - | |||||
bc/msnbc/00/msnbc_0007 0 5 se FW *))) - - - speaker_1 * *) - | |||||
bc/msnbc/00/msnbc_0007 0 6 /. . *)) - - - speaker_1 * * - | |||||
bc/msnbc/00/msnbc_0007 0 0 We PRP (TOP(S(NP*) - - - speaker_1 * (ARG0*) * (90) |
@@ -0,0 +1,50 @@ | |||||
bc/msnbc/00/msnbc_0003 0 0 The DT (TOP(S(NP* - - - Chris_Matthews * * (ARG1* * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 1 move NN *) move 02 2 Chris_Matthews * (V*) *) * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 2 comes VBZ (VP* come 03 2 Chris_Matthews * * (V*) * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 3 a DT (SBAR(NP* - - - Chris_Matthews (DATE* * (ARGM-TMP* * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 4 month NN *) month - 2 Chris_Matthews *) * * * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 5 before IN * - - - Chris_Matthews * * * * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 6 the DT (S(NP* - - - Chris_Matthews * * * * (ARG1* (ARG0* * - | |||||
bc/msnbc/00/msnbc_0003 0 7 Senate NNP *) - - - Chris_Matthews (ORG) * * * *) *) * - | |||||
bc/msnbc/00/msnbc_0003 0 8 is VBZ (VP* be 03 - Chris_Matthews * * * (V*) * * * - | |||||
bc/msnbc/00/msnbc_0003 0 9 scheduled VBN (VP* schedule 01 - Chris_Matthews * * * * (V*) * * - | |||||
bc/msnbc/00/msnbc_0003 0 10 to TO (S(VP* - - - Chris_Matthews * * * * (ARG2* * * - | |||||
bc/msnbc/00/msnbc_0003 0 11 hold VB (VP* hold 04 8 Chris_Matthews * * * * * (V*) * - | |||||
bc/msnbc/00/msnbc_0003 0 12 confirmation NN (NP(NP* - - - Chris_Matthews * * * * * (ARG1* (ARG2*) - | |||||
bc/msnbc/00/msnbc_0003 0 13 hearings NNS *) hearing 01 1 Chris_Matthews * * * * * * (V*) - | |||||
bc/msnbc/00/msnbc_0003 0 14 on IN (PP* - - - Chris_Matthews * * * * * * (ARG1* - | |||||
bc/msnbc/00/msnbc_0003 0 15 President NNP (NP(NP(NP* - - - Chris_Matthews * * * * * * * (194 | |||||
bc/msnbc/00/msnbc_0003 0 16 Bush NNP * - - - Chris_Matthews (PERSON) * * * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 17 's POS *) - - - Chris_Matthews * * * * * * * 194) | |||||
bc/msnbc/00/msnbc_0003 0 18 Supreme NNP (NML* - - - Chris_Matthews (ORG* * * * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 19 Court NNP *) - - - Chris_Matthews *) * * * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 20 nominee NN *) - - - Chris_Matthews * * * * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 21 John NNP (NP* - - - Chris_Matthews (PERSON* * * * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 22 Roberts NNP *)))))))))))) - - - Chris_Matthews *) * *) * *) *) *) - | |||||
bc/msnbc/00/msnbc_0003 0 23 /. . *)) - - - Chris_Matthews * * * * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 0 Senator NNP (TOP(S(NP(NP* - - - Chris_Matthews * (ARG1* * * (162 | |||||
bc/msnbc/00/msnbc_0003 0 1 Chris NNP * - - - Chris_Matthews (PERSON* * * * - | |||||
bc/msnbc/00/msnbc_0003 0 2 Dodd NNP *) - - - Chris_Matthews *) * * * - | |||||
bc/msnbc/00/msnbc_0003 0 3 of IN (PP* - - - Chris_Matthews * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 4 Connecticut NNP (NP*))) - - - Chris_Matthews (GPE) *) * * 162) | |||||
bc/msnbc/00/msnbc_0003 0 5 was VBD (VP* be 01 1 Chris_Matthews * (V*) * * - | |||||
bc/msnbc/00/msnbc_0003 0 6 among IN (PP* - - - Chris_Matthews * (ARG2* * * - | |||||
bc/msnbc/00/msnbc_0003 0 7 those DT (NP(NP* - - - Chris_Matthews * * (ARG0* * - | |||||
bc/msnbc/00/msnbc_0003 0 8 Democrats NNPS *) - - - Chris_Matthews (NORP) * *) * - | |||||
bc/msnbc/00/msnbc_0003 0 9 who WP (SBAR(WHNP*) - - - Chris_Matthews * * (R-ARG0*) * - | |||||
bc/msnbc/00/msnbc_0003 0 10 spoke VBD (S(VP* speak 03 5 Chris_Matthews * * (V*) * - | |||||
bc/msnbc/00/msnbc_0003 0 11 out RP (PRT*) - - - Chris_Matthews * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 12 against IN (PP* - - - Chris_Matthews * * (ARG1* * - | |||||
bc/msnbc/00/msnbc_0003 0 13 Bolton NNP (NP(NP* - - - Chris_Matthews (PERSON) * * (ARG1* (31|(130 | |||||
bc/msnbc/00/msnbc_0003 0 14 's POS *) - - - Chris_Matthews * * * *) 31) | |||||
bc/msnbc/00/msnbc_0003 0 15 appointment NN *)) appointment 01 1 Chris_Matthews * * *) (V*) 130) | |||||
bc/msnbc/00/msnbc_0003 0 16 today NN (NP*))))))) today - 2 Chris_Matthews (DATE) *) (ARGM-TMP*) * (121) | |||||
bc/msnbc/00/msnbc_0003 0 17 /. . *)) - - - Chris_Matthews * * * * - | |||||
bc/msnbc/00/msnbc_0003 0 0 I PRP (TOP(S(NP*) - - - Christopher_Dodd * * (ARG0*) * (162) | |||||
bc/msnbc/00/msnbc_0003 0 1 just RB (ADVP*) - - - Christopher_Dodd * * (ARGM-ADV*) * - | |||||
bc/msnbc/00/msnbc_0003 0 2 do VBP (VP* do 01 - Christopher_Dodd * (V*) * * - | |||||
bc/msnbc/00/msnbc_0003 0 3 n't RB * - - - Christopher_Dodd * * (ARGM-NEG*) * - | |||||
bc/msnbc/00/msnbc_0003 0 4 think VB (VP* think 01 1 Christopher_Dodd * * (V*) * - |
@@ -0,0 +1,49 @@ | |||||
-DOCSTART- -X- -X- O | |||||
CRICKET NNP B-NP O | |||||
- : O O | |||||
LEICESTERSHIRE NNP B-NP B-ORG | |||||
TAKE NNP I-NP O | |||||
OVER IN B-PP O | |||||
AT NNP B-NP O | |||||
TOP NNP I-NP O | |||||
AFTER NNP I-NP O | |||||
INNINGS NNP I-NP O | |||||
VICTORY NN I-NP O | |||||
. . O O | |||||
LONDON NNP B-NP B-LOC | |||||
1996-08-30 CD I-NP O | |||||
Phil NNP B-NP B-PER | |||||
Simmons NNP I-NP I-PER | |||||
took VBD B-VP O | |||||
four CD B-NP O | |||||
for IN B-PP O | |||||
38 CD B-NP O | |||||
on IN B-PP O | |||||
Friday NNP B-NP O | |||||
as IN B-PP O | |||||
Leicestershire NNP B-NP B-ORG | |||||
beat VBD B-VP O | |||||
Somerset NNP B-NP B-ORG | |||||
by IN B-PP O | |||||
an DT B-NP O | |||||
innings NN I-NP O | |||||
and CC O O | |||||
39 CD B-NP O | |||||
runs NNS I-NP O | |||||
in IN B-PP O | |||||
two CD B-NP O | |||||
days NNS I-NP O | |||||
to TO B-VP O | |||||
take VB I-VP O | |||||
over IN B-PP O | |||||
at IN B-PP O | |||||
the DT B-NP O | |||||
head NN I-NP O | |||||
of IN B-PP O | |||||
the DT B-NP O | |||||
county NN I-NP O | |||||
championship NN I-NP O | |||||
. . O O |
@@ -0,0 +1,51 @@ | |||||
-DOCSTART- -X- -X- O | |||||
SOCCER NN B-NP O | |||||
- : O O | |||||
JAPAN NNP B-NP B-LOC | |||||
GET VB B-VP O | |||||
LUCKY NNP B-NP O | |||||
WIN NNP I-NP O | |||||
, , O O | |||||
THE NP B-NP B-PER | |||||
CHINA NNP I-NP I-PER | |||||
IN IN B-PP O | |||||
SURPRISE DT B-NP O | |||||
DEFEAT NN I-NP O | |||||
. . O O | |||||
Nadim NNP B-NP B-PER | |||||
Ladki NNP I-NP I-PER | |||||
AL-AIN NNP B-NP B-LOC | |||||
, , O O | |||||
United NNP B-NP B-LOC | |||||
Arab NNP I-NP I-LOC | |||||
Emirates NNPS I-NP I-LOC | |||||
1996-12-06 CD I-NP O | |||||
Japan NNP B-NP B-LOC | |||||
began VBD B-VP O | |||||
the DT B-NP O | |||||
defence NN I-NP O | |||||
of IN B-PP O | |||||
their PRP$ B-NP O | |||||
Asian JJ I-NP B-MISC | |||||
Cup NNP I-NP I-MISC | |||||
title NN I-NP O | |||||
with IN B-PP O | |||||
a DT B-NP O | |||||
lucky JJ I-NP O | |||||
2-1 CD I-NP O | |||||
win VBP B-VP O | |||||
against IN B-PP O | |||||
Syria NNP B-NP B-LOC | |||||
in IN B-PP O | |||||
a DT B-NP O | |||||
Group NNP I-NP O | |||||
C NNP I-NP O | |||||
championship NN I-NP O | |||||
match NN I-NP O | |||||
on IN B-PP O | |||||
Friday NNP B-NP O | |||||
. . O O |
@@ -0,0 +1,48 @@ | |||||
-DOCSTART- -X- -X- O | |||||
EU NNP B-NP B-ORG | |||||
rejects VBZ B-VP O | |||||
German JJ B-NP B-MISC | |||||
call NN I-NP O | |||||
to TO B-VP O | |||||
boycott VB I-VP O | |||||
British JJ B-NP B-MISC | |||||
lamb NN I-NP O | |||||
. . O O | |||||
Peter NNP B-NP B-PER | |||||
Blackburn NNP I-NP I-PER | |||||
BRUSSELS NNP B-NP B-LOC | |||||
1996-08-22 CD I-NP O | |||||
The DT B-NP O | |||||
European NNP I-NP B-ORG | |||||
Commission NNP I-NP I-ORG | |||||
said VBD B-VP O | |||||
on IN B-PP O | |||||
Thursday NNP B-NP O | |||||
it PRP B-NP O | |||||
disagreed VBD B-VP O | |||||
with IN B-PP O | |||||
German JJ B-NP B-MISC | |||||
advice NN I-NP O | |||||
to TO B-PP O | |||||
consumers NNS B-NP O | |||||
to TO B-VP O | |||||
shun VB I-VP O | |||||
British JJ B-NP B-MISC | |||||
lamb NN I-NP O | |||||
until IN B-SBAR O | |||||
scientists NNS B-NP O | |||||
determine VBP B-VP O | |||||
whether IN B-SBAR O | |||||
mad JJ B-NP O | |||||
cow NN I-NP O | |||||
disease NN I-NP O | |||||
can MD B-VP O | |||||
be VB I-VP O | |||||
transmitted VBN I-VP O | |||||
to TO B-PP O | |||||
sheep NN B-NP O | |||||
. . O O |
@@ -26,6 +26,12 @@ class TestWeiboNER(unittest.TestCase): | |||||
class TestConll2003Loader(unittest.TestCase): | class TestConll2003Loader(unittest.TestCase): | ||||
def test__load(self): | |||||
def test_load(self): | |||||
Conll2003Loader()._load('test/data_for_tests/conll_2003_example.txt') | Conll2003Loader()._load('test/data_for_tests/conll_2003_example.txt') | ||||
class TestConllLoader(unittest.TestCase): | |||||
def test_conll(self): | |||||
db = Conll2003Loader().load('test/data_for_tests/io/conll2003') | |||||
print(db) | |||||
@@ -1,6 +1,7 @@ | |||||
import unittest | import unittest | ||||
import os | import os | ||||
from fastNLP.io import MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, Conll2003Pipe, Conll2003NERPipe | |||||
from fastNLP.io import MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, Conll2003Pipe, Conll2003NERPipe, \ | |||||
OntoNotesNERPipe | |||||
@unittest.skipIf('TRAVIS' in os.environ, "Skip in travis") | @unittest.skipIf('TRAVIS' in os.environ, "Skip in travis") | ||||
@@ -38,3 +39,14 @@ class TestNERPipe(unittest.TestCase): | |||||
print(data_bundle) | print(data_bundle) | ||||
data_bundle = pipe(encoding_type='bioes').process_from_file(f'test/data_for_tests/io/{k}') | data_bundle = pipe(encoding_type='bioes').process_from_file(f'test/data_for_tests/io/{k}') | ||||
print(data_bundle) | print(data_bundle) | ||||
class TestConll2003Pipe(unittest.TestCase): | |||||
def test_conll(self): | |||||
with self.assertWarns(Warning): | |||||
data_bundle = Conll2003Pipe().process_from_file('test/data_for_tests/io/conll2003') | |||||
print(data_bundle) | |||||
def test_OntoNotes(self): | |||||
data_bundle = OntoNotesNERPipe().process_from_file('test/data_for_tests/io/OntoNotes') | |||||
print(data_bundle) |
@@ -19,5 +19,6 @@ class TestRunCWSPipe(unittest.TestCase): | |||||
dataset_names = ['msra', 'cityu', 'as', 'pku'] | dataset_names = ['msra', 'cityu', 'as', 'pku'] | ||||
for dataset_name in dataset_names: | for dataset_name in dataset_names: | ||||
with self.subTest(dataset_name=dataset_name): | with self.subTest(dataset_name=dataset_name): | ||||
data_bundle = CWSPipe().process_from_file(f'test/data_for_tests/io/cws_{dataset_name}') | |||||
data_bundle = CWSPipe(bigrams=True, trigrams=True).\ | |||||
process_from_file(f'test/data_for_tests/io/cws_{dataset_name}') | |||||
print(data_bundle) | print(data_bundle) |