From 0c990e7b5053fc37ebf08c5e8c0661eacd7ac3a9 Mon Sep 17 00:00:00 2001 From: Gosicfly Date: Sun, 11 Oct 2020 10:59:52 +0800 Subject: [PATCH] Update vocabulary.py (#325) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit strip只需要将\n去掉,否则会将一些特殊字符去掉,造成split的时候长度出错 --- fastNLP/core/vocabulary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fastNLP/core/vocabulary.py b/fastNLP/core/vocabulary.py index 1c7d33c5..0b010c02 100644 --- a/fastNLP/core/vocabulary.py +++ b/fastNLP/core/vocabulary.py @@ -540,7 +540,7 @@ class Vocabulary(object): vocab = Vocabulary() for line in f: - line = line.strip() + line = line.strip('\n') if line: name, value = line.split() if name in ('max_size', 'min_freq'): @@ -557,7 +557,7 @@ class Vocabulary(object): no_create_entry_counter = {} word2idx = {} for line in f: - line = line.strip() + line = line.strip('\n') if line: parts = line.split('\t') word,count,idx,no_create_entry = parts[0], int(parts[1]), int(parts[2]), int(parts[3])