Browse Source

Solve the GPTTokenizer dependency problem

tags/v0.5.5
yh_cc 4 years ago
parent
commit
2dee67129a
2 changed files with 2 additions and 22 deletions
  1. +0
    -21
      fastNLP/modules/tokenizer/gpt2_tokenizer.py
  2. +2
    -1
      requirements.txt

+ 0
- 21
fastNLP/modules/tokenizer/gpt2_tokenizer.py View File

@@ -71,24 +71,6 @@ VOCAB_FILES_NAMES = {
}


PRETRAINED_VOCAB_FILES_MAP = {
"vocab_file": {
"gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json",
"gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-vocab.json",
"gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-vocab.json",
"gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-vocab.json",
"distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-vocab.json",
},
"merges_file": {
"gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt",
"gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-merges.txt",
"gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-merges.txt",
"gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-merges.txt",
"distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-merges.txt",
},
}


PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"en-small": 1024,
'en': 1024,
@@ -128,9 +110,6 @@ class GPT2Tokenizer:
the spaces at the beginning of a string: `tokenizer.decode(tokenizer.encode(" Hello")) = "Hello"`
"""

vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP

SPECIAL_TOKENS_ATTRIBUTES = [
"bos_token",
"eos_token",


+ 2
- 1
requirements.txt View File

@@ -5,4 +5,5 @@ nltk>=3.4.1
prettytable>=0.7.2
requests
spacy
prettytable>=0.7.2
prettytable>=0.7.2
regex!=2019.12.17

Loading…
Cancel
Save