From 15c7c073beec5a7e5d5f2f1408bb7ba84150477e Mon Sep 17 00:00:00 2001 From: yunfan Date: Wed, 19 Jun 2019 17:04:25 +0800 Subject: [PATCH] fix embed_loader --- fastNLP/io/embed_loader.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fastNLP/io/embed_loader.py b/fastNLP/io/embed_loader.py index 5237a8a7..e046f1df 100644 --- a/fastNLP/io/embed_loader.py +++ b/fastNLP/io/embed_loader.py @@ -72,7 +72,8 @@ class EmbedLoader(BaseLoader): for idx, line in enumerate(f, start_idx): try: parts = line.strip().split() - word = parts[0] + word = ''.join(parts[:-dim]) + nums = parts[-dim:] # 对齐unk与pad if word==padding and vocab.padding is not None: word = vocab.padding @@ -80,7 +81,7 @@ class EmbedLoader(BaseLoader): word = vocab.unknown if word in vocab: index = vocab.to_index(word) - matrix[index] = np.fromstring(' '.join(parts[1:]), sep=' ', dtype=dtype, count=dim) + matrix[index] = np.fromstring(' '.join(nums), sep=' ', dtype=dtype, count=dim) hit_flags[index] = True except Exception as e: if error == 'ignore': @@ -135,10 +136,11 @@ class EmbedLoader(BaseLoader): for idx, line in enumerate(f, start=start): try: parts = line.strip().split() - word = parts[0] if dim == -1: dim = len(parts) - 1 - vec = np.fromstring(' '.join(parts[1:]), sep=' ', dtype=dtype, count=dim) + word = ''.join(parts[:-dim]) + nums = parts[-dim:] + vec = np.fromstring(' '.join(nums), sep=' ', dtype=dtype, count=dim) vec_dict[word] = vec vocab.add_word(word) if unknown is not None and unknown == word: