From 7b4e099c5267efb6a4a88b9d789a0940be05bb56 Mon Sep 17 00:00:00 2001 From: yh_cc Date: Mon, 22 Feb 2021 12:01:18 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dspacy=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E5=8D=87=E7=BA=A7=E5=AF=BC=E8=87=B4=E7=9A=84=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E5=8A=A0=E8=BD=BD=E5=A4=B1=E8=B4=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fastNLP/core/utils.py | 2 +- fastNLP/io/pipe/utils.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fastNLP/core/utils.py b/fastNLP/core/utils.py index 589968a7..a7a286d0 100644 --- a/fastNLP/core/utils.py +++ b/fastNLP/core/utils.py @@ -268,7 +268,7 @@ def _prepare_cache_filepath(filepath): raise RuntimeError("The cache_file_path must be a file, not a directory.") cache_dir = os.path.dirname(_cache_filepath) if not os.path.exists(cache_dir): - os.makedirs(cache_dir) + os.makedirs(cache_dir, exist_ok=True) def cache_results(_cache_fp, _refresh=False, _verbose=1): diff --git a/fastNLP/io/pipe/utils.py b/fastNLP/io/pipe/utils.py index fdd6f2cd..f3f0e649 100644 --- a/fastNLP/io/pipe/utils.py +++ b/fastNLP/io/pipe/utils.py @@ -12,6 +12,7 @@ import warnings from ...core.const import Const from ...core.vocabulary import Vocabulary from ...core._logger import logger +from pkg_resources import parse_version def iob2(tags: List[str]) -> List[str]: @@ -82,7 +83,10 @@ def get_tokenizer(tokenize_method: str, lang='en'): spacy.prefer_gpu() if lang != 'en': raise RuntimeError("Spacy only supports en right right.") - en = spacy.load(lang) + if parse_version(spacy.__version__) >= parse_version('3.0'): + en = spacy.load('en_core_web_sm') + else: + en = spacy.load(lang) tokenizer = lambda x: [w.text for w in en.tokenizer(x)] elif tokenize_method in tokenizer_dict: tokenizer = tokenizer_dict[tokenize_method]