Merge branch 'dev0.5.0' of https://github.com/fastnlp/fastNLP into dev0.5.0

5 years ago · bbb67e41ef
--- a/fastNLP/embeddings/bert_embedding.py
+++ b/fastNLP/embeddings/bert_embedding.py
@@ -72,8 +72,8 @@ class BertEmbedding(ContextualEmbedding):

        if model_dir_or_name.lower() in PRETRAINED_BERT_MODEL_DIR:
            if 'cn' in model_dir_or_name.lower() and pool_method not in ('first', 'last'):
                logger.warn("For Chinese bert, pooled_method should choose from 'first', 'last' in order to achieve"
                            " faster speed.")
                logger.warning("For Chinese bert, pooled_method should choose from 'first', 'last' in order to achieve"
                               " faster speed.")
                warnings.warn("For Chinese bert, pooled_method should choose from 'first', 'last' in order to achieve"
                              " faster speed.")
        
--- a/fastNLP/io/file_reader.py
+++ b/fastNLP/io/file_reader.py
@@ -111,7 +111,7 @@ def _read_conll(path, encoding='utf-8', indexes=None, dropna=True):
                        yield line_idx, res
                    except Exception as e:
                        if dropna:
                            logger.warn('Invalid instance which ends at line: {} has been dropped.'.format(line_idx))
                            logger.warning('Invalid instance which ends at line: {} has been dropped.'.format(line_idx))
                            continue
                        raise ValueError('Invalid instance which ends at line: {}'.format(line_idx))
            elif line.startswith('#'):
--- a/fastNLP/io/file_utils.py
+++ b/fastNLP/io/file_utils.py
@@ -222,8 +222,8 @@ def _get_base_url(name):
            return url + '/'
    else:
        URLS = {
            'embedding': "http://dbcloud.irocn.cn:8989/api/public/dl/",
            "dataset": "http://dbcloud.irocn.cn:8989/api/public/dl/dataset/"
            'embedding': "http://fudan.irocn.cn:8989/api/public/dl/",
            "dataset": "http://fudan.irocn.cn:8989/api/public/dl/dataset/"
        }
        if name.lower() not in URLS:
            raise KeyError(f"{name} is not recognized.")
--- a/fastNLP/io/pipe/classification.py
+++ b/fastNLP/io/pipe/classification.py
@@ -387,7 +387,7 @@ class SST2Pipe(_CLSPipe):
                       f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \
                       f"data set but not in train data set!."
            warnings.warn(warn_msg)
            logger.warn(warn_msg)
            logger.warning(warn_msg)
        datasets = []
        for name, dataset in data_bundle.datasets.items():
            if dataset.has_field(Const.TARGET):
--- a/fastNLP/io/pipe/matching.py
+++ b/fastNLP/io/pipe/matching.py
@@ -121,7 +121,7 @@ class MatchingBertPipe(Pipe):
                       f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \
                       f"data set but not in train data set!."
            warnings.warn(warn_msg)
            logger.warn(warn_msg)
            logger.warning(warn_msg)

        has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if
                               dataset.has_field(Const.TARGET)]
@@ -258,7 +258,7 @@ class MatchingPipe(Pipe):
                       f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \
                       f"data set but not in train data set!."
            warnings.warn(warn_msg)
            logger.warn(warn_msg)
            logger.warning(warn_msg)

        has_target_datasets = [dataset for name, dataset in data_bundle.datasets.items() if
                               dataset.has_field(Const.TARGET)]
--- a/fastNLP/io/pipe/utils.py
+++ b/fastNLP/io/pipe/utils.py
@@ -130,11 +130,12 @@ def _indexize(data_bundle, input_field_names=Const.INPUT, target_field_names=Con
                                                        if ('train' not in name) and (ds.has_field(target_field_name))]
                               )
        if len(tgt_vocab._no_create_word) > 0:
            warn_msg = f"There are {len(tgt_vocab._no_create_word)} target labels" \
            warn_msg = f"There are {len(tgt_vocab._no_create_word)} `{target_field_name}` labels" \
                       f" in {[name for name in data_bundle.datasets.keys() if 'train' not in name]} " \
                       f"data set but not in train data set!."
                       f"data set but not in train data set!.\n" \
                       f"These label(s) are {tgt_vocab._no_create_word}"
            warnings.warn(warn_msg)
            logger.warn(warn_msg)
            logger.warning(warn_msg)
        tgt_vocab.index_dataset(*data_bundle.datasets.values(), field_name=target_field_name)
        data_bundle.set_vocab(tgt_vocab, target_field_name)
    
--- a/fastNLP/models/bert.py
+++ b/fastNLP/models/bert.py
@@ -65,7 +65,7 @@ class BertForSequenceClassification(BaseModel):
            self.bert.model.include_cls_sep = True
            warn_msg = "Bert for sequence classification excepts BertEmbedding `include_cls_sep` True, " \
                       "but got False. FastNLP has changed it to True."
            logger.warn(warn_msg)
            logger.warning(warn_msg)
            warnings.warn(warn_msg)

    def forward(self, words):
@@ -110,7 +110,7 @@ class BertForSentenceMatching(BaseModel):
            self.bert.model.include_cls_sep = True
            warn_msg = "Bert for sentence matching excepts BertEmbedding `include_cls_sep` True, " \
                       "but got False. FastNLP has changed it to True."
            logger.warn(warn_msg)
            logger.warning(warn_msg)
            warnings.warn(warn_msg)

    def forward(self, words):
@@ -156,7 +156,7 @@ class BertForMultipleChoice(BaseModel):
            self.bert.model.include_cls_sep = True
            warn_msg = "Bert for multiple choice excepts BertEmbedding `include_cls_sep` True, " \
                       "but got False. FastNLP has changed it to True."
            logger.warn(warn_msg)
            logger.warning(warn_msg)
            warnings.warn(warn_msg)

    def forward(self, words):
@@ -206,7 +206,7 @@ class BertForTokenClassification(BaseModel):
            self.bert.model.include_cls_sep = False
            warn_msg = "Bert for token classification excepts BertEmbedding `include_cls_sep` False, " \
                       "but got True. FastNLP has changed it to False."
            logger.warn(warn_msg)
            logger.warning(warn_msg)
            warnings.warn(warn_msg)

    def forward(self, words):
@@ -250,7 +250,7 @@ class BertForQuestionAnswering(BaseModel):
            self.bert.model.include_cls_sep = True
            warn_msg = "Bert for question answering excepts BertEmbedding `include_cls_sep` True, " \
                       "but got False. FastNLP has changed it to True."
            logger.warn(warn_msg)
            logger.warning(warn_msg)
            warnings.warn(warn_msg)

    def forward(self, words):
--- a/fastNLP/modules/encoder/bert.py
+++ b/fastNLP/modules/encoder/bert.py
@@ -488,10 +488,10 @@ class BertModel(nn.Module):

        load(model, prefix='' if hasattr(model, 'bert') else 'bert.')
        if len(missing_keys) > 0:
            logger.warn("Weights of {} not initialized from pretrained model: {}".format(
            logger.warning("Weights of {} not initialized from pretrained model: {}".format(
                model.__class__.__name__, missing_keys))
        if len(unexpected_keys) > 0:
            logger.warn("Weights from pretrained model not used in {}: {}".format(
            logger.warning("Weights from pretrained model not used in {}: {}".format(
                model.__class__.__name__, unexpected_keys))

        logger.info(f"Load pre-trained BERT parameters from file {weights_path}.")
@@ -800,7 +800,7 @@ class BertTokenizer(object):
        for token in tokens:
            ids.append(self.vocab[token])
        if len(ids) > self.max_len:
            logger.warn(
            logger.warning(
                "Token indices sequence length is longer than the specified maximum "
                " sequence length for this BERT model ({} > {}). Running this"
                " sequence through BERT will result in indexing errors".format(len(ids), self.max_len)
@@ -824,8 +824,8 @@ class BertTokenizer(object):
        with open(vocab_file, "w", encoding="utf-8") as writer:
            for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
                if index != token_index:
                    logger.warn("Saving vocabulary to {}: vocabulary indices are not consecutive."
                          " Please check that the vocabulary is not corrupted!".format(vocab_file))
                    logger.warning("Saving vocabulary to {}: vocabulary indices are not consecutive."
                                   " Please check that the vocabulary is not corrupted!".format(vocab_file))
                    index = token_index
                writer.write(token + u'\n')
                index += 1
--- a/test/data_for_tests/io/OntoNotes/dev.txt
+++ b/test/data_for_tests/io/OntoNotes/dev.txt
@@ -0,0 +1,10 @@

 bc/msnbc/00/msnbc_0000   0   0          Hi   UH   (TOP(FRAG(INTJ*)  -   -   -    Dan_Abrams  *   -
 bc/msnbc/00/msnbc_0000   0   1    everyone   NN              (NP*)  -   -   -    Dan_Abrams  *   -
 bc/msnbc/00/msnbc_0000   0   2          /.    .                *))  -   -   -    Dan_Abrams  *   -

 bc/msnbc/00/msnbc_0000   0    0            first    RB  (TOP(S(ADVP*           -    -   -    Dan_Abrams   *  (ARGM-TMP*        *            *        *          -
 bc/msnbc/00/msnbc_0000   0    1               up    RB             *           -    -   -    Dan_Abrams          *            *        *            *        *          -
 bc/msnbc/00/msnbc_0000   0    2               on    IN          (PP*           -    -   -    Dan_Abrams          *            *        *            *        *          -
 bc/msnbc/00/msnbc_0000   0    3              the    DT          (NP*           -    -   -    Dan_Abrams          *            *        *            *        *          -
 bc/msnbc/00/msnbc_0000   0    4           docket    NN            *))      docket   -   -    Dan_Abrams          *            *        *            *        *          -
--- a/test/data_for_tests/io/OntoNotes/test.txt
+++ b/test/data_for_tests/io/OntoNotes/test.txt
@@ -0,0 +1,10 @@

 bc/msnbc/00/msnbc_0007   0   0    Dealing   VBG  (TOP(VP*     deal  01   -   speaker_1   *      (V*)     -
 bc/msnbc/00/msnbc_0007   0   1       with    IN      (PP*       -    -   -   speaker_1   *   (ARG1*      -
 bc/msnbc/00/msnbc_0007   0   2     serial    JJ   (NP(NP*       -    -   -   speaker_1   *        *   (156
 bc/msnbc/00/msnbc_0007   0   3     crimes   NNS         *)   crime   -   1   speaker_1   *        *    156)
 bc/msnbc/00/msnbc_0007   0   4        per    FW    (ADVP*       -    -   -   speaker_1   *        *      -
 bc/msnbc/00/msnbc_0007   0   5         se    FW       *)))      -    -   -   speaker_1   *        *)     -
 bc/msnbc/00/msnbc_0007   0   6         /.     .        *))      -    -   -   speaker_1   *        *      -

 bc/msnbc/00/msnbc_0007   0   0          We   PRP   (TOP(S(NP*)          -    -   -   speaker_1   *        (ARG0*)        *    (90)
--- a/test/data_for_tests/io/OntoNotes/train.txt
+++ b/test/data_for_tests/io/OntoNotes/train.txt
@@ -0,0 +1,50 @@

 bc/msnbc/00/msnbc_0003   0    0             The    DT     (TOP(S(NP*          -    -   -    Chris_Matthews         *      *       (ARG1*      *        *        *         *      -
 bc/msnbc/00/msnbc_0003   0    1            move    NN              *)       move  02   2    Chris_Matthews         *    (V*)           *)     *        *        *         *      -
 bc/msnbc/00/msnbc_0003   0    2           comes   VBZ           (VP*        come  03   2    Chris_Matthews         *      *          (V*)     *        *        *         *      -
 bc/msnbc/00/msnbc_0003   0    3               a    DT      (SBAR(NP*          -    -   -    Chris_Matthews    (DATE*      *   (ARGM-TMP*      *        *        *         *      -
 bc/msnbc/00/msnbc_0003   0    4           month    NN              *)      month   -   2    Chris_Matthews         *)     *            *      *        *        *         *      -
 bc/msnbc/00/msnbc_0003   0    5          before    IN              *          -    -   -    Chris_Matthews         *      *            *      *        *        *         *      -
 bc/msnbc/00/msnbc_0003   0    6             the    DT         (S(NP*          -    -   -    Chris_Matthews         *      *            *      *   (ARG1*   (ARG0*         *      -
 bc/msnbc/00/msnbc_0003   0    7          Senate   NNP              *)         -    -   -    Chris_Matthews      (ORG)     *            *      *        *)       *)        *      -
 bc/msnbc/00/msnbc_0003   0    8              is   VBZ           (VP*          be  03   -    Chris_Matthews         *      *            *    (V*)       *        *         *      -
 bc/msnbc/00/msnbc_0003   0    9       scheduled   VBN           (VP*    schedule  01   -    Chris_Matthews         *      *            *      *      (V*)       *         *      -
 bc/msnbc/00/msnbc_0003   0   10              to    TO         (S(VP*          -    -   -    Chris_Matthews         *      *            *      *   (ARG2*        *         *      -
 bc/msnbc/00/msnbc_0003   0   11            hold    VB           (VP*        hold  04   8    Chris_Matthews         *      *            *      *        *      (V*)        *      -
 bc/msnbc/00/msnbc_0003   0   12    confirmation    NN        (NP(NP*          -    -   -    Chris_Matthews         *      *            *      *        *   (ARG1*    (ARG2*)     -
 bc/msnbc/00/msnbc_0003   0   13        hearings   NNS              *)    hearing  01   1    Chris_Matthews         *      *            *      *        *        *       (V*)     -
 bc/msnbc/00/msnbc_0003   0   14              on    IN           (PP*          -    -   -    Chris_Matthews         *      *            *      *        *        *    (ARG1*      -
 bc/msnbc/00/msnbc_0003   0   15       President   NNP     (NP(NP(NP*          -    -   -    Chris_Matthews         *      *            *      *        *        *         *   (194
 bc/msnbc/00/msnbc_0003   0   16            Bush   NNP              *          -    -   -    Chris_Matthews   (PERSON)     *            *      *        *        *         *      -
 bc/msnbc/00/msnbc_0003   0   17              's   POS              *)         -    -   -    Chris_Matthews         *      *            *      *        *        *         *    194)
 bc/msnbc/00/msnbc_0003   0   18         Supreme   NNP          (NML*          -    -   -    Chris_Matthews     (ORG*      *            *      *        *        *         *      -
 bc/msnbc/00/msnbc_0003   0   19           Court   NNP              *)         -    -   -    Chris_Matthews         *)     *            *      *        *        *         *      -
 bc/msnbc/00/msnbc_0003   0   20         nominee    NN              *)         -    -   -    Chris_Matthews         *      *            *      *        *        *         *      -
 bc/msnbc/00/msnbc_0003   0   21            John   NNP           (NP*          -    -   -    Chris_Matthews  (PERSON*      *            *      *        *        *         *      -
 bc/msnbc/00/msnbc_0003   0   22         Roberts   NNP   *))))))))))))         -    -   -    Chris_Matthews         *)     *            *)     *        *)       *)        *)     -
 bc/msnbc/00/msnbc_0003   0   23              /.     .             *))         -    -   -    Chris_Matthews         *      *            *      *        *        *         *      -

 bc/msnbc/00/msnbc_0003   0    0        Senator    NNP  (TOP(S(NP(NP*             -    -   -    Chris_Matthews         *   (ARG1*             *        *       (162
 bc/msnbc/00/msnbc_0003   0    1          Chris    NNP              *             -    -   -    Chris_Matthews  (PERSON*        *             *        *          -
 bc/msnbc/00/msnbc_0003   0    2           Dodd    NNP              *)            -    -   -    Chris_Matthews         *)       *             *        *          -
 bc/msnbc/00/msnbc_0003   0    3             of     IN           (PP*             -    -   -    Chris_Matthews         *        *             *        *          -
 bc/msnbc/00/msnbc_0003   0    4    Connecticut    NNP         (NP*)))            -    -   -    Chris_Matthews      (GPE)       *)            *        *        162)
 bc/msnbc/00/msnbc_0003   0    5            was    VBD           (VP*             be  01   1    Chris_Matthews         *      (V*)            *        *          -
 bc/msnbc/00/msnbc_0003   0    6          among     IN           (PP*             -    -   -    Chris_Matthews         *   (ARG2*             *        *          -
 bc/msnbc/00/msnbc_0003   0    7          those     DT        (NP(NP*             -    -   -    Chris_Matthews         *        *        (ARG0*        *          -
 bc/msnbc/00/msnbc_0003   0    8      Democrats   NNPS              *)            -    -   -    Chris_Matthews     (NORP)       *             *)       *          -
 bc/msnbc/00/msnbc_0003   0    9            who     WP    (SBAR(WHNP*)            -    -   -    Chris_Matthews         *        *      (R-ARG0*)       *          -
 bc/msnbc/00/msnbc_0003   0   10          spoke    VBD         (S(VP*          speak  03   5    Chris_Matthews         *        *           (V*)       *          -
 bc/msnbc/00/msnbc_0003   0   11            out     RP          (PRT*)            -    -   -    Chris_Matthews         *        *             *        *          -
 bc/msnbc/00/msnbc_0003   0   12        against     IN           (PP*             -    -   -    Chris_Matthews         *        *        (ARG1*        *          -
 bc/msnbc/00/msnbc_0003   0   13         Bolton    NNP        (NP(NP*             -    -   -    Chris_Matthews   (PERSON)       *             *   (ARG1*   (31|(130
 bc/msnbc/00/msnbc_0003   0   14             's    POS              *)            -    -   -    Chris_Matthews         *        *             *        *)        31)
 bc/msnbc/00/msnbc_0003   0   15    appointment     NN             *))   appointment  01   1    Chris_Matthews         *        *             *)     (V*)       130)
 bc/msnbc/00/msnbc_0003   0   16          today     NN     (NP*)))))))         today   -   2    Chris_Matthews     (DATE)       *)   (ARGM-TMP*)       *       (121)
 bc/msnbc/00/msnbc_0003   0   17             /.      .             *))            -    -   -    Chris_Matthews         *        *             *        *          -

 bc/msnbc/00/msnbc_0003   0    0        I   PRP     (TOP(S(NP*)      -    -   -    Christopher_Dodd  *      *        (ARG0*)            *    (162)
 bc/msnbc/00/msnbc_0003   0    1     just    RB         (ADVP*)      -    -   -    Christopher_Dodd  *      *    (ARGM-ADV*)            *       -
 bc/msnbc/00/msnbc_0003   0    2       do   VBP           (VP*       do  01   -    Christopher_Dodd  *    (V*)            *             *       -
 bc/msnbc/00/msnbc_0003   0    3      n't    RB              *       -    -   -    Christopher_Dodd  *      *    (ARGM-NEG*)            *       -
 bc/msnbc/00/msnbc_0003   0    4    think    VB           (VP*    think  01   1    Christopher_Dodd  *      *           (V*)            *       -
--- a/test/data_for_tests/io/conll2003/dev.txt
+++ b/test/data_for_tests/io/conll2003/dev.txt
@@ -0,0 +1,49 @@
 -DOCSTART- -X- -X- O

 CRICKET NNP B-NP O
 - : O O
 LEICESTERSHIRE NNP B-NP B-ORG
 TAKE NNP I-NP O
 OVER IN B-PP O
 AT NNP B-NP O
 TOP NNP I-NP O
 AFTER NNP I-NP O
 INNINGS NNP I-NP O
 VICTORY NN I-NP O
 . . O O

 LONDON NNP B-NP B-LOC
 1996-08-30 CD I-NP O

 Phil NNP B-NP B-PER
 Simmons NNP I-NP I-PER
 took VBD B-VP O
 four CD B-NP O
 for IN B-PP O
 38 CD B-NP O
 on IN B-PP O
 Friday NNP B-NP O
 as IN B-PP O
 Leicestershire NNP B-NP B-ORG
 beat VBD B-VP O
 Somerset NNP B-NP B-ORG
 by IN B-PP O
 an DT B-NP O
 innings NN I-NP O
 and CC O O
 39 CD B-NP O
 runs NNS I-NP O
 in IN B-PP O
 two CD B-NP O
 days NNS I-NP O
 to TO B-VP O
 take VB I-VP O
 over IN B-PP O
 at IN B-PP O
 the DT B-NP O
 head NN I-NP O
 of IN B-PP O
 the DT B-NP O
 county NN I-NP O
 championship NN I-NP O
 . . O O
--- a/test/data_for_tests/io/conll2003/test.txt
+++ b/test/data_for_tests/io/conll2003/test.txt
@@ -0,0 +1,51 @@
 -DOCSTART- -X- -X- O

 SOCCER NN B-NP O
 - : O O
 JAPAN NNP B-NP B-LOC
 GET VB B-VP O
 LUCKY NNP B-NP O
 WIN NNP I-NP O
 , , O O
 THE NP B-NP B-PER
 CHINA NNP I-NP I-PER
 IN IN B-PP O
 SURPRISE DT B-NP O
 DEFEAT NN I-NP O
 . . O O

 Nadim NNP B-NP B-PER
 Ladki NNP I-NP I-PER

 AL-AIN NNP B-NP B-LOC
 , , O O
 United NNP B-NP B-LOC
 Arab NNP I-NP I-LOC
 Emirates NNPS I-NP I-LOC
 1996-12-06 CD I-NP O

 Japan NNP B-NP B-LOC
 began VBD B-VP O
 the DT B-NP O
 defence NN I-NP O
 of IN B-PP O
 their PRP$ B-NP O
 Asian JJ I-NP B-MISC
 Cup NNP I-NP I-MISC
 title NN I-NP O
 with IN B-PP O
 a DT B-NP O
 lucky JJ I-NP O
 2-1 CD I-NP O
 win VBP B-VP O
 against IN B-PP O
 Syria NNP B-NP B-LOC
 in IN B-PP O
 a DT B-NP O
 Group NNP I-NP O
 C NNP I-NP O
 championship NN I-NP O
 match NN I-NP O
 on IN B-PP O
 Friday NNP B-NP O
 . . O O
--- a/test/data_for_tests/io/conll2003/train.txt
+++ b/test/data_for_tests/io/conll2003/train.txt
@@ -0,0 +1,48 @@
 -DOCSTART- -X- -X- O

 EU NNP B-NP B-ORG
 rejects VBZ B-VP O
 German JJ B-NP B-MISC
 call NN I-NP O
 to TO B-VP O
 boycott VB I-VP O
 British JJ B-NP B-MISC
 lamb NN I-NP O
 . . O O

 Peter NNP B-NP B-PER
 Blackburn NNP I-NP I-PER

 BRUSSELS NNP B-NP B-LOC
 1996-08-22 CD I-NP O

 The DT B-NP O
 European NNP I-NP B-ORG
 Commission NNP I-NP I-ORG
 said VBD B-VP O
 on IN B-PP O
 Thursday NNP B-NP O
 it PRP B-NP O
 disagreed VBD B-VP O
 with IN B-PP O
 German JJ B-NP B-MISC
 advice NN I-NP O
 to TO B-PP O
 consumers NNS B-NP O
 to TO B-VP O
 shun VB I-VP O
 British JJ B-NP B-MISC
 lamb NN I-NP O
 until IN B-SBAR O
 scientists NNS B-NP O
 determine VBP B-VP O
 whether IN B-SBAR O
 mad JJ B-NP O
 cow NN I-NP O
 disease NN I-NP O
 can MD B-VP O
 be VB I-VP O
 transmitted VBN I-VP O
 to TO B-PP O
 sheep NN B-NP O
 . . O O
--- a/test/io/loader/test_conll_loader.py
+++ b/test/io/loader/test_conll_loader.py
@@ -26,6 +26,12 @@ class TestWeiboNER(unittest.TestCase):


 class TestConll2003Loader(unittest.TestCase):
    def test__load(self):
    def test_load(self):
        Conll2003Loader()._load('test/data_for_tests/conll_2003_example.txt')


 class TestConllLoader(unittest.TestCase):
    def test_conll(self):
        db = Conll2003Loader().load('test/data_for_tests/io/conll2003')
        print(db)

--- a/test/io/pipe/test_conll.py
+++ b/test/io/pipe/test_conll.py
@@ -1,6 +1,7 @@
 import unittest
 import os
 from fastNLP.io import MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, Conll2003Pipe, Conll2003NERPipe
 from fastNLP.io import MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, Conll2003Pipe, Conll2003NERPipe, \
    OntoNotesNERPipe


@unittest.skipIf('TRAVIS' in os.environ, "Skip in travis")
@@ -38,3 +39,14 @@ class TestNERPipe(unittest.TestCase):
                print(data_bundle)
                data_bundle = pipe(encoding_type='bioes').process_from_file(f'test/data_for_tests/io/{k}')
                print(data_bundle)


 class TestConll2003Pipe(unittest.TestCase):
    def test_conll(self):
        with self.assertWarns(Warning):
            data_bundle = Conll2003Pipe().process_from_file('test/data_for_tests/io/conll2003')
        print(data_bundle)

    def test_OntoNotes(self):
        data_bundle = OntoNotesNERPipe().process_from_file('test/data_for_tests/io/OntoNotes')
        print(data_bundle)
--- a/test/io/pipe/test_cws.py
+++ b/test/io/pipe/test_cws.py
@@ -19,5 +19,6 @@ class TestRunCWSPipe(unittest.TestCase):
        dataset_names = ['msra', 'cityu', 'as', 'pku']
        for dataset_name in dataset_names:
            with self.subTest(dataset_name=dataset_name):
                data_bundle = CWSPipe().process_from_file(f'test/data_for_tests/io/cws_{dataset_name}')
                data_bundle = CWSPipe(bigrams=True, trigrams=True).\
                    process_from_file(f'test/data_for_tests/io/cws_{dataset_name}')
                print(data_bundle)