Browse Source

unfinished

master
雨泓 3 years ago
parent
commit
31c774936b
12 changed files with 100 additions and 91 deletions
  1. +8
    -4
      modelscope/metainfo.py
  2. +6
    -0
      modelscope/models/nlp/masked_language_model.py
  3. +1
    -1
      modelscope/models/nlp/sbert_for_nli.py
  4. +3
    -3
      modelscope/models/nlp/sbert_for_token_classification.py
  5. +16
    -12
      modelscope/pipelines/nlp/fill_mask_pipeline.py
  6. +16
    -20
      modelscope/pipelines/nlp/nli_pipeline.py
  7. +5
    -3
      modelscope/pipelines/nlp/sentence_similarity_pipeline.py
  8. +11
    -16
      modelscope/pipelines/nlp/sentiment_classification_pipeline.py
  9. +6
    -6
      modelscope/pipelines/nlp/text_generation_pipeline.py
  10. +9
    -8
      modelscope/pipelines/nlp/word_segmentation_pipeline.py
  11. +7
    -6
      modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
  12. +12
    -12
      modelscope/preprocessors/nlp.py

+ 8
- 4
modelscope/metainfo.py View File

@@ -46,6 +46,10 @@ class Pipelines(object):
word_segmentation = 'word-segmentation' word_segmentation = 'word-segmentation'
text_generation = 'text-generation' text_generation = 'text-generation'
sentiment_analysis = 'sentiment-analysis' sentiment_analysis = 'sentiment-analysis'
sentiment_classification = "sentiment-classification"
zero_shot_classification = "zero-shot-classification"
fill_mask = "fill-mask"
nli = "nli"


# audio tasks # audio tasks
sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts' sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts'
@@ -85,10 +89,10 @@ class Preprocessors(object):
# nlp preprocessor # nlp preprocessor
bert_seq_cls_tokenizer = 'bert-seq-cls-tokenizer' bert_seq_cls_tokenizer = 'bert-seq-cls-tokenizer'
palm_text_gen_tokenizer = 'palm-text-gen-tokenizer' palm_text_gen_tokenizer = 'palm-text-gen-tokenizer'
sbert_token_cls_tokenizer = 'sbert-token-cls-tokenizer'
sbert_nli_tokenizer = 'sbert-nli-tokenizer'
sbert_sen_cls_tokenizer = 'sbert-sen-cls-tokenizer'
sbert_zero_shot_cls_tokenizer = 'sbert-zero-shot-cls-tokenizer'
token_cls_tokenizer = 'token-cls-tokenizer'
nli_tokenizer = 'nli-tokenizer'
sen_cls_tokenizer = 'sen-cls-tokenizer'
zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer'


# audio preprocessor # audio preprocessor
linear_aec_fbank = 'linear-aec-fbank' linear_aec_fbank = 'linear-aec-fbank'


+ 6
- 0
modelscope/models/nlp/masked_language_model.py View File

@@ -19,6 +19,12 @@ class MaskedLMModelBase(Model):
def build_model(self): def build_model(self):
raise NotImplementedError() raise NotImplementedError()


@property
def config(self):
if hasattr(self.model, "config"):
return self.model.config
return None

def forward(self, inputs: Dict[str, Tensor]) -> Dict[str, np.ndarray]: def forward(self, inputs: Dict[str, Tensor]) -> Dict[str, np.ndarray]:
"""return the result by the model """return the result by the model




+ 1
- 1
modelscope/models/nlp/sbert_for_nli.py View File

@@ -1,4 +1,4 @@
from modelscope.utils.constant import Tasks
from ...utils.constant import Tasks
from .sbert_for_sequence_classification import SbertForSequenceClassificationBase from .sbert_for_sequence_classification import SbertForSequenceClassificationBase
from ..builder import MODELS from ..builder import MODELS
from ...metainfo import Models from ...metainfo import Models


+ 3
- 3
modelscope/models/nlp/sbert_for_token_classification.py View File

@@ -2,18 +2,17 @@ from typing import Any, Dict, Union


import numpy as np import numpy as np
import torch import torch
from sofa import SbertConfig, SbertForTokenClassification


from modelscope.metainfo import Models from modelscope.metainfo import Models
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from ..base import Model, Tensor from ..base import Model, Tensor
from ..builder import MODELS from ..builder import MODELS


__all__ = ['StructBertForTokenClassification']
__all__ = ['SbertForTokenClassification']




@MODELS.register_module(Tasks.word_segmentation, module_name=Models.structbert) @MODELS.register_module(Tasks.word_segmentation, module_name=Models.structbert)
class StructBertForTokenClassification(Model):
class SbertForTokenClassification(Model):


def __init__(self, model_dir: str, *args, **kwargs): def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the word segmentation model from the `model_dir` path. """initialize the word segmentation model from the `model_dir` path.
@@ -25,6 +24,7 @@ class StructBertForTokenClassification(Model):
""" """
super().__init__(model_dir, *args, **kwargs) super().__init__(model_dir, *args, **kwargs)
self.model_dir = model_dir self.model_dir = model_dir
from sofa import SbertConfig, SbertForTokenClassification
self.model = SbertForTokenClassification.from_pretrained( self.model = SbertForTokenClassification.from_pretrained(
self.model_dir) self.model_dir)
self.config = SbertConfig.from_pretrained(self.model_dir) self.config = SbertConfig.from_pretrained(self.model_dir)


+ 16
- 12
modelscope/pipelines/nlp/fill_mask_pipeline.py View File

@@ -1,38 +1,41 @@
from typing import Dict, Optional, Union from typing import Dict, Optional, Union


from modelscope.models import Model
from modelscope.models.nlp.masked_language_model import \
AliceMindBaseForMaskedLM
from modelscope.preprocessors import FillMaskPreprocessor
from modelscope.utils.constant import Tasks
from ...models import Model
from ...models.nlp.masked_language_model import \
MaskedLMModelBase
from ...preprocessors import FillMaskPreprocessor
from ...utils.constant import Tasks
from ..base import Pipeline, Tensor from ..base import Pipeline, Tensor
from ..builder import PIPELINES from ..builder import PIPELINES
from ...metainfo import Pipelines


__all__ = ['FillMaskPipeline'] __all__ = ['FillMaskPipeline']




@PIPELINES.register_module(Tasks.fill_mask, module_name=r'sbert')
@PIPELINES.register_module(Tasks.fill_mask, module_name=r'veco')
@PIPELINES.register_module(Tasks.fill_mask, module_name=Pipelines.fill_mask)
class FillMaskPipeline(Pipeline): class FillMaskPipeline(Pipeline):


def __init__(self, def __init__(self,
model: Union[AliceMindBaseForMaskedLM, str],
model: Union[MaskedLMModelBase, str],
preprocessor: Optional[FillMaskPreprocessor] = None, preprocessor: Optional[FillMaskPreprocessor] = None,
first_sequence="sentense",
**kwargs): **kwargs):
"""use `model` and `preprocessor` to create a nlp fill mask pipeline for prediction """use `model` and `preprocessor` to create a nlp fill mask pipeline for prediction


Args: Args:
model (AliceMindBaseForMaskedLM): a model instance
model (MaskedLMModelBase): a model instance
preprocessor (FillMaskPreprocessor): a preprocessor instance preprocessor (FillMaskPreprocessor): a preprocessor instance
""" """
fill_mask_model = model if isinstance( fill_mask_model = model if isinstance(
model, AliceMindBaseForMaskedLM) else Model.from_pretrained(model)
model, MaskedLMModelBase) else Model.from_pretrained(model)
assert fill_mask_model.config is not None

if preprocessor is None: if preprocessor is None:
preprocessor = FillMaskPreprocessor( preprocessor = FillMaskPreprocessor(
fill_mask_model.model_dir, fill_mask_model.model_dir,
first_sequence='sentence',
first_sequence=first_sequence,
second_sequence=None) second_sequence=None)
super().__init__(model=model, preprocessor=preprocessor, **kwargs)
super().__init__(model=fill_mask_model, preprocessor=preprocessor, **kwargs)
self.preprocessor = preprocessor self.preprocessor = preprocessor
self.tokenizer = preprocessor.tokenizer self.tokenizer = preprocessor.tokenizer
self.mask_id = {'veco': 250001, 'sbert': 103} self.mask_id = {'veco': 250001, 'sbert': 103}
@@ -82,6 +85,7 @@ class FillMaskPipeline(Pipeline):


pred_strings = [] pred_strings = []
for ids in rst_ids: # batch for ids in rst_ids: # batch
# TODO vocab size is not stable
if self.model.config.vocab_size == 21128: # zh bert if self.model.config.vocab_size == 21128: # zh bert
pred_string = self.tokenizer.convert_ids_to_tokens(ids) pred_string = self.tokenizer.convert_ids_to_tokens(ids)
pred_string = ''.join(pred_string) pred_string = ''.join(pred_string)


+ 16
- 20
modelscope/pipelines/nlp/nli_pipeline.py View File

@@ -1,27 +1,31 @@
import os
import uuid import uuid
from typing import Any, Dict, Union from typing import Any, Dict, Union


import json
import uuid
from typing import Any, Dict, Union

import numpy as np import numpy as np


from modelscope.models.nlp import SbertForNLI
from modelscope.preprocessors import NLIPreprocessor
from modelscope.utils.constant import Tasks
from ...models import Model
from ..base import Input, Pipeline
from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ...metainfo import Pipelines
from ...models import Model
from ...models.nlp import SbertForNLI
from ...preprocessors import NLIPreprocessor
from ...utils.constant import Tasks


__all__ = ['NLIPipeline'] __all__ = ['NLIPipeline']




@PIPELINES.register_module( @PIPELINES.register_module(
Tasks.nli, module_name=r'nlp_structbert_nli_chinese-base')
Tasks.nli, module_name=Pipelines.nli)
class NLIPipeline(Pipeline): class NLIPipeline(Pipeline):


def __init__(self, def __init__(self,
model: Union[SbertForNLI, str], model: Union[SbertForNLI, str],
preprocessor: NLIPreprocessor = None, preprocessor: NLIPreprocessor = None,
first_sequence="first_sequence",
second_sequence="second_sequence",
**kwargs): **kwargs):
"""use `model` and `preprocessor` to create a nlp text classification pipeline for prediction """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction


@@ -36,20 +40,12 @@ class NLIPipeline(Pipeline):
if preprocessor is None: if preprocessor is None:
preprocessor = NLIPreprocessor( preprocessor = NLIPreprocessor(
sc_model.model_dir, sc_model.model_dir,
first_sequence='first_sequence',
second_sequence='second_sequence')
first_sequence=first_sequence,
second_sequence=second_sequence)
super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs)
assert len(sc_model.id2label) > 0


self.label_path = os.path.join(sc_model.model_dir,
'label_mapping.json')
with open(self.label_path) as f:
self.label_mapping = json.load(f)
self.label_id_to_name = {
idx: name
for name, idx in self.label_mapping.items()
}

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
def postprocess(self, inputs: Dict[str, Any], **postprocess_params) -> Dict[str, str]:
"""process the prediction results """process the prediction results


Args: Args:


+ 5
- 3
modelscope/pipelines/nlp/sentence_similarity_pipeline.py View File

@@ -20,6 +20,8 @@ class SentenceSimilarityPipeline(Pipeline):
def __init__(self, def __init__(self,
model: Union[Model, str], model: Union[Model, str],
preprocessor: SequenceClassificationPreprocessor = None, preprocessor: SequenceClassificationPreprocessor = None,
first_sequence="first_sequence",
second_sequence="second_sequence",
**kwargs): **kwargs):
"""use `model` and `preprocessor` to create a nlp sentence similarity pipeline for prediction """use `model` and `preprocessor` to create a nlp sentence similarity pipeline for prediction


@@ -35,14 +37,14 @@ class SentenceSimilarityPipeline(Pipeline):
if preprocessor is None: if preprocessor is None:
preprocessor = SequenceClassificationPreprocessor( preprocessor = SequenceClassificationPreprocessor(
sc_model.model_dir, sc_model.model_dir,
first_sequence='first_sequence',
second_sequence='second_sequence')
first_sequence=first_sequence,
second_sequence=second_sequence)
super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs)


assert hasattr(self.model, 'id2label'), \ assert hasattr(self.model, 'id2label'), \
'id2label map should be initalizaed in init function.' 'id2label map should be initalizaed in init function.'


def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
def postprocess(self, inputs: Dict[str, Any], **postprocess_params) -> Dict[str, str]:
"""process the prediction results """process the prediction results


Args: Args:


+ 11
- 16
modelscope/pipelines/nlp/sentiment_classification_pipeline.py View File

@@ -5,24 +5,27 @@ from typing import Any, Dict, Union
import json import json
import numpy as np import numpy as np


from modelscope.models.nlp import SbertForSentimentClassification
from modelscope.preprocessors import SentimentClassificationPreprocessor
from modelscope.utils.constant import Tasks
from ...models.nlp import SbertForSentimentClassification
from ...preprocessors import SentimentClassificationPreprocessor
from ...utils.constant import Tasks
from ...models import Model from ...models import Model
from ..base import Input, Pipeline from ..base import Input, Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ...metainfo import Pipelines


__all__ = ['SentimentClassificationPipeline'] __all__ = ['SentimentClassificationPipeline']




@PIPELINES.register_module( @PIPELINES.register_module(
Tasks.sentiment_classification, Tasks.sentiment_classification,
module_name=r'sbert-sentiment-classification')
module_name=Pipelines.sentiment_classification)
class SentimentClassificationPipeline(Pipeline): class SentimentClassificationPipeline(Pipeline):


def __init__(self, def __init__(self,
model: Union[SbertForSentimentClassification, str], model: Union[SbertForSentimentClassification, str],
preprocessor: SentimentClassificationPreprocessor = None, preprocessor: SentimentClassificationPreprocessor = None,
first_sequence="first_sequence",
second_sequence="second_sequence",
**kwargs): **kwargs):
"""use `model` and `preprocessor` to create a nlp text classification pipeline for prediction """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction


@@ -38,20 +41,12 @@ class SentimentClassificationPipeline(Pipeline):
if preprocessor is None: if preprocessor is None:
preprocessor = SentimentClassificationPreprocessor( preprocessor = SentimentClassificationPreprocessor(
sc_model.model_dir, sc_model.model_dir,
first_sequence='first_sequence',
second_sequence='second_sequence')
first_sequence=first_sequence,
second_sequence=second_sequence)
super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs)
assert len(sc_model.id2label) > 0


self.label_path = os.path.join(sc_model.model_dir,
'label_mapping.json')
with open(self.label_path) as f:
self.label_mapping = json.load(f)
self.label_id_to_name = {
idx: name
for name, idx in self.label_mapping.items()
}

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
def postprocess(self, inputs: Dict[str, Any], **postprocess_params) -> Dict[str, str]:
"""process the prediction results """process the prediction results


Args: Args:


+ 6
- 6
modelscope/pipelines/nlp/text_generation_pipeline.py View File

@@ -1,10 +1,10 @@
from typing import Dict, Optional, Union from typing import Dict, Optional, Union


from modelscope.metainfo import Pipelines
from modelscope.models import Model
from modelscope.models.nlp import PalmForTextGeneration
from modelscope.preprocessors import TextGenerationPreprocessor
from modelscope.utils.constant import Tasks
from ...metainfo import Pipelines
from ...models import Model
from ...models.nlp import PalmForTextGeneration
from ...preprocessors import TextGenerationPreprocessor
from ...utils.constant import Tasks
from ..base import Pipeline, Tensor from ..base import Pipeline, Tensor
from ..builder import PIPELINES from ..builder import PIPELINES


@@ -36,7 +36,7 @@ class TextGenerationPipeline(Pipeline):
super().__init__(model=model, preprocessor=preprocessor, **kwargs) super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.tokenizer = model.tokenizer self.tokenizer = model.tokenizer


def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]:
def postprocess(self, inputs: Dict[str, Tensor], **postprocess_params) -> Dict[str, str]:
"""process the prediction results """process the prediction results


Args: Args:


+ 9
- 8
modelscope/pipelines/nlp/word_segmentation_pipeline.py View File

@@ -1,10 +1,10 @@
from typing import Any, Dict, Optional, Union from typing import Any, Dict, Optional, Union


from modelscope.metainfo import Pipelines
from modelscope.models import Model
from modelscope.models.nlp import StructBertForTokenClassification
from modelscope.preprocessors import TokenClassifcationPreprocessor
from modelscope.utils.constant import Tasks
from ...metainfo import Pipelines
from ...models import Model
from ...models.nlp import SbertForTokenClassification
from ...preprocessors import TokenClassifcationPreprocessor
from ...utils.constant import Tasks
from ..base import Pipeline, Tensor from ..base import Pipeline, Tensor
from ..builder import PIPELINES from ..builder import PIPELINES


@@ -16,7 +16,7 @@ __all__ = ['WordSegmentationPipeline']
class WordSegmentationPipeline(Pipeline): class WordSegmentationPipeline(Pipeline):


def __init__(self, def __init__(self,
model: Union[StructBertForTokenClassification, str],
model: Union[SbertForTokenClassification, str],
preprocessor: Optional[TokenClassifcationPreprocessor] = None, preprocessor: Optional[TokenClassifcationPreprocessor] = None,
**kwargs): **kwargs):
"""use `model` and `preprocessor` to create a nlp word segmentation pipeline for prediction """use `model` and `preprocessor` to create a nlp word segmentation pipeline for prediction
@@ -27,15 +27,16 @@ class WordSegmentationPipeline(Pipeline):
""" """
model = model if isinstance( model = model if isinstance(
model, model,
StructBertForTokenClassification) else Model.from_pretrained(model)
SbertForTokenClassification) else Model.from_pretrained(model)
if preprocessor is None: if preprocessor is None:
preprocessor = TokenClassifcationPreprocessor(model.model_dir) preprocessor = TokenClassifcationPreprocessor(model.model_dir)
super().__init__(model=model, preprocessor=preprocessor, **kwargs) super().__init__(model=model, preprocessor=preprocessor, **kwargs)
self.tokenizer = preprocessor.tokenizer self.tokenizer = preprocessor.tokenizer
self.config = model.config self.config = model.config
assert len(self.config.id2label) > 0
self.id2label = self.config.id2label self.id2label = self.config.id2label


def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]:
def postprocess(self, inputs: Dict[str, Any], **postprocess_params) -> Dict[str, str]:
"""process the prediction results """process the prediction results


Args: Args:


+ 7
- 6
modelscope/pipelines/nlp/zero_shot_classification_pipeline.py View File

@@ -6,10 +6,11 @@ import json
import numpy as np import numpy as np
from scipy.special import softmax from scipy.special import softmax


from modelscope.models.nlp import SbertForZeroShotClassification
from modelscope.preprocessors import SbertZeroShotClassificationPreprocessor
from modelscope.utils.constant import Tasks
from ...models.nlp import SbertForZeroShotClassification
from ...preprocessors import ZeroShotClassificationPreprocessor
from ...utils.constant import Tasks
from ...models import Model from ...models import Model
from ...metainfo import Pipelines
from ..base import Input, Pipeline from ..base import Input, Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES


@@ -18,12 +19,12 @@ __all__ = ['ZeroShotClassificationPipeline']


@PIPELINES.register_module( @PIPELINES.register_module(
Tasks.zero_shot_classification, Tasks.zero_shot_classification,
module_name=r'bert-zero-shot-classification')
module_name=Pipelines.zero_shot_classification)
class ZeroShotClassificationPipeline(Pipeline): class ZeroShotClassificationPipeline(Pipeline):


def __init__(self, def __init__(self,
model: Union[SbertForZeroShotClassification, str], model: Union[SbertForZeroShotClassification, str],
preprocessor: SbertZeroShotClassificationPreprocessor = None,
preprocessor: ZeroShotClassificationPreprocessor = None,
**kwargs): **kwargs):
"""use `model` and `preprocessor` to create a nlp text classification pipeline for prediction """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction


@@ -32,7 +33,7 @@ class ZeroShotClassificationPipeline(Pipeline):
preprocessor (SentimentClassificationPreprocessor): a preprocessor instance preprocessor (SentimentClassificationPreprocessor): a preprocessor instance
""" """
assert isinstance(model, str) or isinstance(model, SbertForZeroShotClassification), \ assert isinstance(model, str) or isinstance(model, SbertForZeroShotClassification), \
'model must be a single str or BertForZeroShotClassification'
'model must be a single str or SbertForZeroShotClassification'
sc_model = model if isinstance( sc_model = model if isinstance(
model, model,
SbertForZeroShotClassification) else Model.from_pretrained(model) SbertForZeroShotClassification) else Model.from_pretrained(model)


+ 12
- 12
modelscope/preprocessors/nlp.py View File

@@ -14,9 +14,9 @@ from .builder import PREPROCESSORS


__all__ = [ __all__ = [
'Tokenize', 'SequenceClassificationPreprocessor', 'Tokenize', 'SequenceClassificationPreprocessor',
'PalmTextGenerationPreprocessor', 'SbertZeroShotClassificationPreprocessor',
'SbertTokenClassifcationPreprocessor', 'SbertNLIPreprocessor',
'SbertSentimentClassificationPreprocessor', 'FillMaskPreprocessor'
'TextGenerationPreprocessor', 'ZeroShotClassificationPreprocessor',
'TokenClassifcationPreprocessor', 'NLIPreprocessor',
'SentimentClassificationPreprocessor', 'FillMaskPreprocessor'
] ]




@@ -35,8 +35,8 @@ class Tokenize(Preprocessor):




@PREPROCESSORS.register_module( @PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.sbert_nli_tokenizer)
class SbertNLIPreprocessor(Preprocessor):
Fields.nlp, module_name=Preprocessors.nli_tokenizer)
class NLIPreprocessor(Preprocessor):


def __init__(self, model_dir: str, *args, **kwargs): def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path """preprocess the data via the vocab.txt from the `model_dir` path
@@ -105,8 +105,8 @@ class SbertNLIPreprocessor(Preprocessor):




@PREPROCESSORS.register_module( @PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.sbert_sen_cls_tokenizer)
class SbertSentimentClassificationPreprocessor(Preprocessor):
Fields.nlp, module_name=Preprocessors.sen_cls_tokenizer)
class SentimentClassificationPreprocessor(Preprocessor):


def __init__(self, model_dir: str, *args, **kwargs): def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path """preprocess the data via the vocab.txt from the `model_dir` path
@@ -264,7 +264,7 @@ class SequenceClassificationPreprocessor(Preprocessor):


@PREPROCESSORS.register_module( @PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.palm_text_gen_tokenizer) Fields.nlp, module_name=Preprocessors.palm_text_gen_tokenizer)
class PalmTextGenerationPreprocessor(Preprocessor):
class TextGenerationPreprocessor(Preprocessor):


def __init__(self, model_dir: str, tokenizer, *args, **kwargs): def __init__(self, model_dir: str, tokenizer, *args, **kwargs):
"""preprocess the data using the vocab.txt from the `model_dir` path """preprocess the data using the vocab.txt from the `model_dir` path
@@ -374,8 +374,8 @@ class FillMaskPreprocessor(Preprocessor):




@PREPROCESSORS.register_module( @PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.sbert_zero_shot_cls_tokenizer)
class SbertZeroShotClassificationPreprocessor(Preprocessor):
Fields.nlp, module_name=Preprocessors.zero_shot_cls_tokenizer)
class ZeroShotClassificationPreprocessor(Preprocessor):


def __init__(self, model_dir: str, *args, **kwargs): def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path """preprocess the data via the vocab.txt from the `model_dir` path
@@ -418,8 +418,8 @@ class SbertZeroShotClassificationPreprocessor(Preprocessor):




@PREPROCESSORS.register_module( @PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.sbert_token_cls_tokenizer)
class SbertTokenClassifcationPreprocessor(Preprocessor):
Fields.nlp, module_name=Preprocessors.token_cls_tokenizer)
class TokenClassifcationPreprocessor(Preprocessor):


def __init__(self, model_dir: str, *args, **kwargs): def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path """preprocess the data via the vocab.txt from the `model_dir` path


Loading…
Cancel
Save