diff --git a/fastNLP/core/utils.py b/fastNLP/core/utils.py
index 589968a7..a7a286d0 100644
--- a/fastNLP/core/utils.py
+++ b/fastNLP/core/utils.py
@@ -268,7 +268,7 @@ def _prepare_cache_filepath(filepath):
         raise RuntimeError("The cache_file_path must be a file, not a directory.")
     cache_dir = os.path.dirname(_cache_filepath)
     if not os.path.exists(cache_dir):
-        os.makedirs(cache_dir)
+        os.makedirs(cache_dir, exist_ok=True)
 
 
 def cache_results(_cache_fp, _refresh=False, _verbose=1):
diff --git a/fastNLP/io/pipe/utils.py b/fastNLP/io/pipe/utils.py
index fdd6f2cd..f3f0e649 100644
--- a/fastNLP/io/pipe/utils.py
+++ b/fastNLP/io/pipe/utils.py
@@ -12,6 +12,7 @@ import warnings
 from ...core.const import Const
 from ...core.vocabulary import Vocabulary
 from ...core._logger import logger
+from pkg_resources import parse_version
 
 
 def iob2(tags: List[str]) -> List[str]:
@@ -82,7 +83,10 @@ def get_tokenizer(tokenize_method: str, lang='en'):
         spacy.prefer_gpu()
         if lang != 'en':
             raise RuntimeError("Spacy only supports en right right.")
-        en = spacy.load(lang)
+        if parse_version(spacy.__version__) >= parse_version('3.0'):
+            en = spacy.load('en_core_web_sm')
+        else:
+            en = spacy.load(lang)
         tokenizer = lambda x: [w.text for w in en.tokenizer(x)]
     elif tokenize_method in tokenizer_dict:
         tokenizer = tokenizer_dict[tokenize_method]