diff --git a/fastNLP/modules/tokenizer/gpt2_tokenizer.py b/fastNLP/modules/tokenizer/gpt2_tokenizer.py index fec8ef15..9cfa8f2c 100644 --- a/fastNLP/modules/tokenizer/gpt2_tokenizer.py +++ b/fastNLP/modules/tokenizer/gpt2_tokenizer.py @@ -71,24 +71,6 @@ VOCAB_FILES_NAMES = { } -PRETRAINED_VOCAB_FILES_MAP = { - "vocab_file": { - "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json", - "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-vocab.json", - "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-vocab.json", - "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-vocab.json", - "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-vocab.json", - }, - "merges_file": { - "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt", - "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-merges.txt", - "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-merges.txt", - "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-merges.txt", - "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-merges.txt", - }, -} - - PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { "en-small": 1024, 'en': 1024, @@ -128,9 +110,6 @@ class GPT2Tokenizer: the spaces at the beginning of a string: `tokenizer.decode(tokenizer.encode(" Hello")) = "Hello"` """ - vocab_files_names = VOCAB_FILES_NAMES - pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP - SPECIAL_TOKENS_ATTRIBUTES = [ "bos_token", "eos_token", diff --git a/requirements.txt b/requirements.txt index b07aed3f..242301be 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ nltk>=3.4.1 prettytable>=0.7.2 requests spacy -prettytable>=0.7.2 \ No newline at end of file +prettytable>=0.7.2 +regex!=2019.12.17 \ No newline at end of file