Solve the GPTTokenizer dependency problem

5 years ago · 2dee67129a
--- a/fastNLP/modules/tokenizer/gpt2_tokenizer.py
+++ b/fastNLP/modules/tokenizer/gpt2_tokenizer.py
@@ -71,24 +71,6 @@ VOCAB_FILES_NAMES = {
 }


 PRETRAINED_VOCAB_FILES_MAP = {
    "vocab_file": {
        "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json",
        "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-vocab.json",
        "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-vocab.json",
        "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-vocab.json",
        "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-vocab.json",
    },
    "merges_file": {
        "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt",
        "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-merges.txt",
        "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-merges.txt",
        "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-merges.txt",
        "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-merges.txt",
    },
 }


 PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
    "en-small": 1024,
    'en': 1024,
@@ -128,9 +110,6 @@ class GPT2Tokenizer:
          the spaces at the beginning of a string: `tokenizer.decode(tokenizer.encode(" Hello")) = "Hello"`
    """

    vocab_files_names = VOCAB_FILES_NAMES
    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP

    SPECIAL_TOKENS_ATTRIBUTES = [
        "bos_token",
        "eos_token",
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,5 @@ nltk>=3.4.1
 prettytable>=0.7.2
 requests
 spacy
 prettytable>=0.7.2
 prettytable>=0.7.2
 regex!=2019.12.17