Browse Source

[to #42322933] fix bug for multi-lang text

支持多语言tokenize(830模型)
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9900916
master
pangda yingda.chen 3 years ago
parent
commit
83b0adf0a2
1 changed files with 1 additions and 1 deletions
  1. +1
    -1
      modelscope/preprocessors/nlp.py

+ 1
- 1
modelscope/preprocessors/nlp.py View File

@@ -533,7 +533,7 @@ class NERPreprocessor(Preprocessor):
self.model_dir: str = model_dir
self.sequence_length = kwargs.pop('sequence_length', 512)
self.tokenizer = AutoTokenizer.from_pretrained(
model_dir, use_fast=False)
model_dir, use_fast=True)
self.is_split_into_words = self.tokenizer.init_kwargs.get(
'is_split_into_words', False)



Loading…
Cancel
Save