diff --git a/fastNLP/embeddings/bert_embedding.py b/fastNLP/embeddings/bert_embedding.py index 84105444..36670a0b 100644 --- a/fastNLP/embeddings/bert_embedding.py +++ b/fastNLP/embeddings/bert_embedding.py @@ -72,8 +72,8 @@ class BertEmbedding(ContextualEmbedding): if model_dir_or_name.lower() in PRETRAINED_BERT_MODEL_DIR: if 'cn' in model_dir_or_name.lower() and pool_method not in ('first', 'last'): - logger.warn("For Chinese bert, pooled_method should choose from 'first', 'last' in order to achieve" - " faster speed.") + logger.warning("For Chinese bert, pooled_method should choose from 'first', 'last' in order to achieve" + " faster speed.") warnings.warn("For Chinese bert, pooled_method should choose from 'first', 'last' in order to achieve" " faster speed.") diff --git a/fastNLP/io/file_reader.py b/fastNLP/io/file_reader.py index b64b115b..3370e660 100644 --- a/fastNLP/io/file_reader.py +++ b/fastNLP/io/file_reader.py @@ -111,7 +111,7 @@ def _read_conll(path, encoding='utf-8', indexes=None, dropna=True): yield line_idx, res except Exception as e: if dropna: - logger.warn('Invalid instance which ends at line: {} has been dropped.'.format(line_idx)) + logger.warning('Invalid instance which ends at line: {} has been dropped.'.format(line_idx)) continue raise ValueError('Invalid instance which ends at line: {}'.format(line_idx)) elif line.startswith('#'): diff --git a/fastNLP/io/file_utils.py b/fastNLP/io/file_utils.py index f76bcd26..6661397b 100644 --- a/fastNLP/io/file_utils.py +++ b/fastNLP/io/file_utils.py @@ -222,8 +222,8 @@ def _get_base_url(name): return url + '/' else: URLS = { - 'embedding': "http://dbcloud.irocn.cn:8989/api/public/dl/", - "dataset": "http://dbcloud.irocn.cn:8989/api/public/dl/dataset/" + 'embedding': "http://fudan.irocn.cn:8989/api/public/dl/", + "dataset": "http://fudan.irocn.cn:8989/api/public/dl/dataset/" } if name.lower() not in URLS: raise KeyError(f"{name} is not recognized.") diff --git a/fastNLP/io/pipe/classification.py b/fastNLP/io/pipe/classification.py index db791ae8..409cfe53 100644 --- a/fastNLP/io/pipe/classification.py +++ b/fastNLP/io/pipe/classification.py @@ -387,7 +387,7 @@ class SST2Pipe(_CLSPipe): f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ f"data set but not in train data set!." warnings.warn(warn_msg) - logger.warn(warn_msg) + logger.warning(warn_msg) datasets = [] for name, dataset in data_bundle.datasets.items(): if dataset.has_field(Const.TARGET): diff --git a/fastNLP/io/pipe/matching.py b/fastNLP/io/pipe/matching.py index aa6db46f..def750c0 100644 --- a/fastNLP/io/pipe/matching.py +++ b/fastNLP/io/pipe/matching.py @@ -121,7 +121,7 @@ class MatchingBertPipe(Pipe): f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ f"data set but not in train data set!." warnings.warn(warn_msg) - logger.warn(warn_msg) + logger.warning(warn_msg) has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if dataset.has_field(Const.TARGET)] @@ -258,7 +258,7 @@ class MatchingPipe(Pipe): f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ f"data set but not in train data set!." warnings.warn(warn_msg) - logger.warn(warn_msg) + logger.warning(warn_msg) has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if dataset.has_field(Const.TARGET)] diff --git a/fastNLP/io/pipe/utils.py b/fastNLP/io/pipe/utils.py index 4925853f..d05ffe96 100644 --- a/fastNLP/io/pipe/utils.py +++ b/fastNLP/io/pipe/utils.py @@ -130,11 +130,12 @@ def _indexize(data_bundle, input_field_names=Const.INPUT, target_field_names=Con if ('train' not in name) and (ds.has_field(target_field_name))] ) if len(tgt_vocab._no_create_word) > 0: - warn_msg = f"There are {len(tgt_vocab._no_create_word)} target labels" \ + warn_msg = f"There are {len(tgt_vocab._no_create_word)} `{target_field_name}` labels" \ f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \ - f"data set but not in train data set!." + f"data set but not in train data set!.\n" \ + f"These label(s) are {tgt_vocab._no_create_word}" warnings.warn(warn_msg) - logger.warn(warn_msg) + logger.warning(warn_msg) tgt_vocab.index_dataset(*data_bundle.datasets.values(), field_name=target_field_name) data_bundle.set_vocab(tgt_vocab, target_field_name) diff --git a/fastNLP/models/bert.py b/fastNLP/models/bert.py index 2bd15eb0..93a294ab 100644 --- a/fastNLP/models/bert.py +++ b/fastNLP/models/bert.py @@ -65,7 +65,7 @@ class BertForSequenceClassification(BaseModel): self.bert.model.include_cls_sep = True warn_msg = "Bert for sequence classification excepts BertEmbedding `include_cls_sep` True, " \ "but got False. FastNLP has changed it to True." - logger.warn(warn_msg) + logger.warning(warn_msg) warnings.warn(warn_msg) def forward(self, words): @@ -110,7 +110,7 @@ class BertForSentenceMatching(BaseModel): self.bert.model.include_cls_sep = True warn_msg = "Bert for sentence matching excepts BertEmbedding `include_cls_sep` True, " \ "but got False. FastNLP has changed it to True." - logger.warn(warn_msg) + logger.warning(warn_msg) warnings.warn(warn_msg) def forward(self, words): @@ -156,7 +156,7 @@ class BertForMultipleChoice(BaseModel): self.bert.model.include_cls_sep = True warn_msg = "Bert for multiple choice excepts BertEmbedding `include_cls_sep` True, " \ "but got False. FastNLP has changed it to True." - logger.warn(warn_msg) + logger.warning(warn_msg) warnings.warn(warn_msg) def forward(self, words): @@ -206,7 +206,7 @@ class BertForTokenClassification(BaseModel): self.bert.model.include_cls_sep = False warn_msg = "Bert for token classification excepts BertEmbedding `include_cls_sep` False, " \ "but got True. FastNLP has changed it to False." - logger.warn(warn_msg) + logger.warning(warn_msg) warnings.warn(warn_msg) def forward(self, words): @@ -250,7 +250,7 @@ class BertForQuestionAnswering(BaseModel): self.bert.model.include_cls_sep = True warn_msg = "Bert for question answering excepts BertEmbedding `include_cls_sep` True, " \ "but got False. FastNLP has changed it to True." - logger.warn(warn_msg) + logger.warning(warn_msg) warnings.warn(warn_msg) def forward(self, words): diff --git a/fastNLP/modules/encoder/bert.py b/fastNLP/modules/encoder/bert.py index 12379718..16b456fb 100644 --- a/fastNLP/modules/encoder/bert.py +++ b/fastNLP/modules/encoder/bert.py @@ -488,10 +488,10 @@ class BertModel(nn.Module): load(model, prefix='' if hasattr(model, 'bert') else 'bert.') if len(missing_keys) > 0: - logger.warn("Weights of {} not initialized from pretrained model: {}".format( + logger.warning("Weights of {} not initialized from pretrained model: {}".format( model.__class__.__name__, missing_keys)) if len(unexpected_keys) > 0: - logger.warn("Weights from pretrained model not used in {}: {}".format( + logger.warning("Weights from pretrained model not used in {}: {}".format( model.__class__.__name__, unexpected_keys)) logger.info(f"Load pre-trained BERT parameters from file {weights_path}.") @@ -800,7 +800,7 @@ class BertTokenizer(object): for token in tokens: ids.append(self.vocab[token]) if len(ids) > self.max_len: - logger.warn( + logger.warning( "Token indices sequence length is longer than the specified maximum " " sequence length for this BERT model ({} > {}). Running this" " sequence through BERT will result in indexing errors".format(len(ids), self.max_len) @@ -824,8 +824,8 @@ class BertTokenizer(object): with open(vocab_file, "w", encoding="utf-8") as writer: for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): if index != token_index: - logger.warn("Saving vocabulary to {}: vocabulary indices are not consecutive." - " Please check that the vocabulary is not corrupted!".format(vocab_file)) + logger.warning("Saving vocabulary to {}: vocabulary indices are not consecutive." + " Please check that the vocabulary is not corrupted!".format(vocab_file)) index = token_index writer.write(token + u'\n') index += 1 diff --git a/test/data_for_tests/io/OntoNotes/dev.txt b/test/data_for_tests/io/OntoNotes/dev.txt new file mode 100644 index 00000000..e99207a1 --- /dev/null +++ b/test/data_for_tests/io/OntoNotes/dev.txt @@ -0,0 +1,10 @@ + +bc/msnbc/00/msnbc_0000 0 0 Hi UH (TOP(FRAG(INTJ*) - - - Dan_Abrams * - +bc/msnbc/00/msnbc_0000 0 1 everyone NN (NP*) - - - Dan_Abrams * - +bc/msnbc/00/msnbc_0000 0 2 /. . *)) - - - Dan_Abrams * - + +bc/msnbc/00/msnbc_0000 0 0 first RB (TOP(S(ADVP* - - - Dan_Abrams * (ARGM-TMP* * * * - +bc/msnbc/00/msnbc_0000 0 1 up RB * - - - Dan_Abrams * * * * * - +bc/msnbc/00/msnbc_0000 0 2 on IN (PP* - - - Dan_Abrams * * * * * - +bc/msnbc/00/msnbc_0000 0 3 the DT (NP* - - - Dan_Abrams * * * * * - +bc/msnbc/00/msnbc_0000 0 4 docket NN *)) docket - - Dan_Abrams * * * * * - diff --git a/test/data_for_tests/io/OntoNotes/test.txt b/test/data_for_tests/io/OntoNotes/test.txt new file mode 100644 index 00000000..c94069e0 --- /dev/null +++ b/test/data_for_tests/io/OntoNotes/test.txt @@ -0,0 +1,10 @@ + +bc/msnbc/00/msnbc_0007 0 0 Dealing VBG (TOP(VP* deal 01 - speaker_1 * (V*) - +bc/msnbc/00/msnbc_0007 0 1 with IN (PP* - - - speaker_1 * (ARG1* - +bc/msnbc/00/msnbc_0007 0 2 serial JJ (NP(NP* - - - speaker_1 * * (156 +bc/msnbc/00/msnbc_0007 0 3 crimes NNS *) crime - 1 speaker_1 * * 156) +bc/msnbc/00/msnbc_0007 0 4 per FW (ADVP* - - - speaker_1 * * - +bc/msnbc/00/msnbc_0007 0 5 se FW *))) - - - speaker_1 * *) - +bc/msnbc/00/msnbc_0007 0 6 /. . *)) - - - speaker_1 * * - + +bc/msnbc/00/msnbc_0007 0 0 We PRP (TOP(S(NP*) - - - speaker_1 * (ARG0*) * (90) diff --git a/test/data_for_tests/io/OntoNotes/train.txt b/test/data_for_tests/io/OntoNotes/train.txt new file mode 100644 index 00000000..36f14c73 --- /dev/null +++ b/test/data_for_tests/io/OntoNotes/train.txt @@ -0,0 +1,50 @@ + +bc/msnbc/00/msnbc_0003 0 0 The DT (TOP(S(NP* - - - Chris_Matthews * * (ARG1* * * * * - +bc/msnbc/00/msnbc_0003 0 1 move NN *) move 02 2 Chris_Matthews * (V*) *) * * * * - +bc/msnbc/00/msnbc_0003 0 2 comes VBZ (VP* come 03 2 Chris_Matthews * * (V*) * * * * - +bc/msnbc/00/msnbc_0003 0 3 a DT (SBAR(NP* - - - Chris_Matthews (DATE* * (ARGM-TMP* * * * * - +bc/msnbc/00/msnbc_0003 0 4 month NN *) month - 2 Chris_Matthews *) * * * * * * - +bc/msnbc/00/msnbc_0003 0 5 before IN * - - - Chris_Matthews * * * * * * * - +bc/msnbc/00/msnbc_0003 0 6 the DT (S(NP* - - - Chris_Matthews * * * * (ARG1* (ARG0* * - +bc/msnbc/00/msnbc_0003 0 7 Senate NNP *) - - - Chris_Matthews (ORG) * * * *) *) * - +bc/msnbc/00/msnbc_0003 0 8 is VBZ (VP* be 03 - Chris_Matthews * * * (V*) * * * - +bc/msnbc/00/msnbc_0003 0 9 scheduled VBN (VP* schedule 01 - Chris_Matthews * * * * (V*) * * - +bc/msnbc/00/msnbc_0003 0 10 to TO (S(VP* - - - Chris_Matthews * * * * (ARG2* * * - +bc/msnbc/00/msnbc_0003 0 11 hold VB (VP* hold 04 8 Chris_Matthews * * * * * (V*) * - +bc/msnbc/00/msnbc_0003 0 12 confirmation NN (NP(NP* - - - Chris_Matthews * * * * * (ARG1* (ARG2*) - +bc/msnbc/00/msnbc_0003 0 13 hearings NNS *) hearing 01 1 Chris_Matthews * * * * * * (V*) - +bc/msnbc/00/msnbc_0003 0 14 on IN (PP* - - - Chris_Matthews * * * * * * (ARG1* - +bc/msnbc/00/msnbc_0003 0 15 President NNP (NP(NP(NP* - - - Chris_Matthews * * * * * * * (194 +bc/msnbc/00/msnbc_0003 0 16 Bush NNP * - - - Chris_Matthews (PERSON) * * * * * * - +bc/msnbc/00/msnbc_0003 0 17 's POS *) - - - Chris_Matthews * * * * * * * 194) +bc/msnbc/00/msnbc_0003 0 18 Supreme NNP (NML* - - - Chris_Matthews (ORG* * * * * * * - +bc/msnbc/00/msnbc_0003 0 19 Court NNP *) - - - Chris_Matthews *) * * * * * * - +bc/msnbc/00/msnbc_0003 0 20 nominee NN *) - - - Chris_Matthews * * * * * * * - +bc/msnbc/00/msnbc_0003 0 21 John NNP (NP* - - - Chris_Matthews (PERSON* * * * * * * - +bc/msnbc/00/msnbc_0003 0 22 Roberts NNP *)))))))))))) - - - Chris_Matthews *) * *) * *) *) *) - +bc/msnbc/00/msnbc_0003 0 23 /. . *)) - - - Chris_Matthews * * * * * * * - + +bc/msnbc/00/msnbc_0003 0 0 Senator NNP (TOP(S(NP(NP* - - - Chris_Matthews * (ARG1* * * (162 +bc/msnbc/00/msnbc_0003 0 1 Chris NNP * - - - Chris_Matthews (PERSON* * * * - +bc/msnbc/00/msnbc_0003 0 2 Dodd NNP *) - - - Chris_Matthews *) * * * - +bc/msnbc/00/msnbc_0003 0 3 of IN (PP* - - - Chris_Matthews * * * * - +bc/msnbc/00/msnbc_0003 0 4 Connecticut NNP (NP*))) - - - Chris_Matthews (GPE) *) * * 162) +bc/msnbc/00/msnbc_0003 0 5 was VBD (VP* be 01 1 Chris_Matthews * (V*) * * - +bc/msnbc/00/msnbc_0003 0 6 among IN (PP* - - - Chris_Matthews * (ARG2* * * - +bc/msnbc/00/msnbc_0003 0 7 those DT (NP(NP* - - - Chris_Matthews * * (ARG0* * - +bc/msnbc/00/msnbc_0003 0 8 Democrats NNPS *) - - - Chris_Matthews (NORP) * *) * - +bc/msnbc/00/msnbc_0003 0 9 who WP (SBAR(WHNP*) - - - Chris_Matthews * * (R-ARG0*) * - +bc/msnbc/00/msnbc_0003 0 10 spoke VBD (S(VP* speak 03 5 Chris_Matthews * * (V*) * - +bc/msnbc/00/msnbc_0003 0 11 out RP (PRT*) - - - Chris_Matthews * * * * - +bc/msnbc/00/msnbc_0003 0 12 against IN (PP* - - - Chris_Matthews * * (ARG1* * - +bc/msnbc/00/msnbc_0003 0 13 Bolton NNP (NP(NP* - - - Chris_Matthews (PERSON) * * (ARG1* (31|(130 +bc/msnbc/00/msnbc_0003 0 14 's POS *) - - - Chris_Matthews * * * *) 31) +bc/msnbc/00/msnbc_0003 0 15 appointment NN *)) appointment 01 1 Chris_Matthews * * *) (V*) 130) +bc/msnbc/00/msnbc_0003 0 16 today NN (NP*))))))) today - 2 Chris_Matthews (DATE) *) (ARGM-TMP*) * (121) +bc/msnbc/00/msnbc_0003 0 17 /. . *)) - - - Chris_Matthews * * * * - + +bc/msnbc/00/msnbc_0003 0 0 I PRP (TOP(S(NP*) - - - Christopher_Dodd * * (ARG0*) * (162) +bc/msnbc/00/msnbc_0003 0 1 just RB (ADVP*) - - - Christopher_Dodd * * (ARGM-ADV*) * - +bc/msnbc/00/msnbc_0003 0 2 do VBP (VP* do 01 - Christopher_Dodd * (V*) * * - +bc/msnbc/00/msnbc_0003 0 3 n't RB * - - - Christopher_Dodd * * (ARGM-NEG*) * - +bc/msnbc/00/msnbc_0003 0 4 think VB (VP* think 01 1 Christopher_Dodd * * (V*) * - diff --git a/test/data_for_tests/io/conll2003/dev.txt b/test/data_for_tests/io/conll2003/dev.txt new file mode 100644 index 00000000..90834721 --- /dev/null +++ b/test/data_for_tests/io/conll2003/dev.txt @@ -0,0 +1,49 @@ +-DOCSTART- -X- -X- O + +CRICKET NNP B-NP O +- : O O +LEICESTERSHIRE NNP B-NP B-ORG +TAKE NNP I-NP O +OVER IN B-PP O +AT NNP B-NP O +TOP NNP I-NP O +AFTER NNP I-NP O +INNINGS NNP I-NP O +VICTORY NN I-NP O +. . O O + +LONDON NNP B-NP B-LOC +1996-08-30 CD I-NP O + +Phil NNP B-NP B-PER +Simmons NNP I-NP I-PER +took VBD B-VP O +four CD B-NP O +for IN B-PP O +38 CD B-NP O +on IN B-PP O +Friday NNP B-NP O +as IN B-PP O +Leicestershire NNP B-NP B-ORG +beat VBD B-VP O +Somerset NNP B-NP B-ORG +by IN B-PP O +an DT B-NP O +innings NN I-NP O +and CC O O +39 CD B-NP O +runs NNS I-NP O +in IN B-PP O +two CD B-NP O +days NNS I-NP O +to TO B-VP O +take VB I-VP O +over IN B-PP O +at IN B-PP O +the DT B-NP O +head NN I-NP O +of IN B-PP O +the DT B-NP O +county NN I-NP O +championship NN I-NP O +. . O O diff --git a/test/data_for_tests/io/conll2003/test.txt b/test/data_for_tests/io/conll2003/test.txt new file mode 100644 index 00000000..b5b3aef0 --- /dev/null +++ b/test/data_for_tests/io/conll2003/test.txt @@ -0,0 +1,51 @@ +-DOCSTART- -X- -X- O + +SOCCER NN B-NP O +- : O O +JAPAN NNP B-NP B-LOC +GET VB B-VP O +LUCKY NNP B-NP O +WIN NNP I-NP O +, , O O +THE NP B-NP B-PER +CHINA NNP I-NP I-PER +IN IN B-PP O +SURPRISE DT B-NP O +DEFEAT NN I-NP O +. . O O + +Nadim NNP B-NP B-PER +Ladki NNP I-NP I-PER + +AL-AIN NNP B-NP B-LOC +, , O O +United NNP B-NP B-LOC +Arab NNP I-NP I-LOC +Emirates NNPS I-NP I-LOC +1996-12-06 CD I-NP O + +Japan NNP B-NP B-LOC +began VBD B-VP O +the DT B-NP O +defence NN I-NP O +of IN B-PP O +their PRP$ B-NP O +Asian JJ I-NP B-MISC +Cup NNP I-NP I-MISC +title NN I-NP O +with IN B-PP O +a DT B-NP O +lucky JJ I-NP O +2-1 CD I-NP O +win VBP B-VP O +against IN B-PP O +Syria NNP B-NP B-LOC +in IN B-PP O +a DT B-NP O +Group NNP I-NP O +C NNP I-NP O +championship NN I-NP O +match NN I-NP O +on IN B-PP O +Friday NNP B-NP O +. . O O diff --git a/test/data_for_tests/io/conll2003/train.txt b/test/data_for_tests/io/conll2003/train.txt new file mode 100644 index 00000000..4f0c4bf2 --- /dev/null +++ b/test/data_for_tests/io/conll2003/train.txt @@ -0,0 +1,48 @@ +-DOCSTART- -X- -X- O + +EU NNP B-NP B-ORG +rejects VBZ B-VP O +German JJ B-NP B-MISC +call NN I-NP O +to TO B-VP O +boycott VB I-VP O +British JJ B-NP B-MISC +lamb NN I-NP O +. . O O + +Peter NNP B-NP B-PER +Blackburn NNP I-NP I-PER + +BRUSSELS NNP B-NP B-LOC +1996-08-22 CD I-NP O + +The DT B-NP O +European NNP I-NP B-ORG +Commission NNP I-NP I-ORG +said VBD B-VP O +on IN B-PP O +Thursday NNP B-NP O +it PRP B-NP O +disagreed VBD B-VP O +with IN B-PP O +German JJ B-NP B-MISC +advice NN I-NP O +to TO B-PP O +consumers NNS B-NP O +to TO B-VP O +shun VB I-VP O +British JJ B-NP B-MISC +lamb NN I-NP O +until IN B-SBAR O +scientists NNS B-NP O +determine VBP B-VP O +whether IN B-SBAR O +mad JJ B-NP O +cow NN I-NP O +disease NN I-NP O +can MD B-VP O +be VB I-VP O +transmitted VBN I-VP O +to TO B-PP O +sheep NN B-NP O +. . O O diff --git a/test/io/loader/test_conll_loader.py b/test/io/loader/test_conll_loader.py index 31859a6b..6668cccf 100644 --- a/test/io/loader/test_conll_loader.py +++ b/test/io/loader/test_conll_loader.py @@ -26,6 +26,12 @@ class TestWeiboNER(unittest.TestCase): class TestConll2003Loader(unittest.TestCase): - def test__load(self): + def test_load(self): Conll2003Loader()._load('test/data_for_tests/conll_2003_example.txt') + +class TestConllLoader(unittest.TestCase): + def test_conll(self): + db = Conll2003Loader().load('test/data_for_tests/io/conll2003') + print(db) + diff --git a/test/io/pipe/test_conll.py b/test/io/pipe/test_conll.py index d60094c2..ad41ae18 100644 --- a/test/io/pipe/test_conll.py +++ b/test/io/pipe/test_conll.py @@ -1,6 +1,7 @@ import unittest import os -from fastNLP.io import MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, Conll2003Pipe, Conll2003NERPipe +from fastNLP.io import MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, Conll2003Pipe, Conll2003NERPipe, \ + OntoNotesNERPipe @unittest.skipIf('TRAVIS' in os.environ, "Skip in travis") @@ -38,3 +39,14 @@ class TestNERPipe(unittest.TestCase): print(data_bundle) data_bundle = pipe(encoding_type='bioes').process_from_file(f'test/data_for_tests/io/{k}') print(data_bundle) + + +class TestConll2003Pipe(unittest.TestCase): + def test_conll(self): + with self.assertWarns(Warning): + data_bundle = Conll2003Pipe().process_from_file('test/data_for_tests/io/conll2003') + print(data_bundle) + + def test_OntoNotes(self): + data_bundle = OntoNotesNERPipe().process_from_file('test/data_for_tests/io/OntoNotes') + print(data_bundle) diff --git a/test/io/pipe/test_cws.py b/test/io/pipe/test_cws.py index 993c16c0..09fce3f0 100644 --- a/test/io/pipe/test_cws.py +++ b/test/io/pipe/test_cws.py @@ -19,5 +19,6 @@ class TestRunCWSPipe(unittest.TestCase): dataset_names = ['msra', 'cityu', 'as', 'pku'] for dataset_name in dataset_names: with self.subTest(dataset_name=dataset_name): - data_bundle = CWSPipe().process_from_file(f'test/data_for_tests/io/cws_{dataset_name}') + data_bundle = CWSPipe(bigrams=True, trigrams=True).\ + process_from_file(f'test/data_for_tests/io/cws_{dataset_name}') print(data_bundle)