@@ -48,7 +48,6 @@ class Pipelines(object): | |||
text_generation = 'text-generation' | |||
sentiment_analysis = 'sentiment-analysis' | |||
sentiment_classification = 'sentiment-classification' | |||
zero_shot_classification = 'zero-shot-classification' | |||
fill_mask = 'fill-mask' | |||
nli = 'nli' | |||
dialog_intent_prediction = 'dialog-intent-prediction' | |||
@@ -95,7 +94,6 @@ class Preprocessors(object): | |||
token_cls_tokenizer = 'token-cls-tokenizer' | |||
nli_tokenizer = 'nli-tokenizer' | |||
sen_cls_tokenizer = 'sen-cls-tokenizer' | |||
zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer' | |||
# audio preprocessor | |||
linear_aec_fbank = 'linear-aec-fbank' | |||
@@ -7,5 +7,5 @@ from .builder import MODELS, build_model | |||
from .multi_model import OfaForImageCaptioning | |||
from .nlp import (BertForSequenceClassification, SbertForNLI, | |||
SbertForSentenceSimilarity, SbertForSentimentClassification, | |||
SbertForTokenClassification, SbertForZeroShotClassification, | |||
StructBertForMaskedLM, VecoForMaskedLM) | |||
SbertForTokenClassification, StructBertForMaskedLM, | |||
VecoForMaskedLM) |
@@ -5,6 +5,5 @@ from .sbert_for_nli import * # noqa F403 | |||
from .sbert_for_sentence_similarity import * # noqa F403 | |||
from .sbert_for_sentiment_classification import * # noqa F403 | |||
from .sbert_for_token_classification import * # noqa F403 | |||
from .sbert_for_zero_shot_classification import * # noqa F403 | |||
from .space.dialog_intent_prediction_model import * # noqa F403 | |||
from .space.dialog_modeling_model import * # noqa F403 |
@@ -1,50 +0,0 @@ | |||
from typing import Any, Dict | |||
import numpy as np | |||
from modelscope.utils.constant import Tasks | |||
from ...metainfo import Models | |||
from ..base import Model | |||
from ..builder import MODELS | |||
__all__ = ['SbertForZeroShotClassification'] | |||
@MODELS.register_module( | |||
Tasks.zero_shot_classification, module_name=Models.structbert) | |||
class SbertForZeroShotClassification(Model): | |||
def __init__(self, model_dir: str, *args, **kwargs): | |||
"""initialize the zero shot classification model from the `model_dir` path. | |||
Args: | |||
model_dir (str): the model path. | |||
""" | |||
super().__init__(model_dir, *args, **kwargs) | |||
from sofa import SbertForSequenceClassification | |||
self.model = SbertForSequenceClassification.from_pretrained(model_dir) | |||
def train(self): | |||
return self.model.train() | |||
def eval(self): | |||
return self.model.eval() | |||
def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]: | |||
"""return the result by the model | |||
Args: | |||
input (Dict[str, Any]): the preprocessed data | |||
Returns: | |||
Dict[str, np.ndarray]: results | |||
Example: | |||
{ | |||
'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value | |||
} | |||
""" | |||
outputs = self.model(**input) | |||
logits = outputs['logits'].numpy() | |||
res = {'logits': logits} | |||
return res |
@@ -31,9 +31,6 @@ DEFAULT_MODEL_FOR_PIPELINE = { | |||
'damo/nlp_structbert_sentiment-classification_chinese-base'), | |||
Tasks.text_classification: ('bert-sentiment-analysis', | |||
'damo/bert-base-sst2'), | |||
Tasks.zero_shot_classification: | |||
(Pipelines.zero_shot_classification, | |||
'damo/nlp_structbert_zero-shot-classification_chinese-base'), | |||
Tasks.image_matting: (Pipelines.image_matting, | |||
'damo/cv_unet_image-matting'), | |||
Tasks.text_classification: (Pipelines.sentiment_analysis, | |||
@@ -7,4 +7,3 @@ from .sentiment_classification_pipeline import * # noqa F403 | |||
from .sequence_classification_pipeline import * # noqa F403 | |||
from .text_generation_pipeline import * # noqa F403 | |||
from .word_segmentation_pipeline import * # noqa F403 | |||
from .zero_shot_classification_pipeline import * # noqa F403 |
@@ -1,98 +0,0 @@ | |||
import os | |||
import uuid | |||
from typing import Any, Dict, Union | |||
import json | |||
import numpy as np | |||
import torch | |||
from scipy.special import softmax | |||
from ...metainfo import Pipelines | |||
from ...models import Model | |||
from ...models.nlp import SbertForZeroShotClassification | |||
from ...preprocessors import ZeroShotClassificationPreprocessor | |||
from ...utils.constant import Tasks | |||
from ..base import Input, Pipeline | |||
from ..builder import PIPELINES | |||
__all__ = ['ZeroShotClassificationPipeline'] | |||
@PIPELINES.register_module( | |||
Tasks.zero_shot_classification, | |||
module_name=Pipelines.zero_shot_classification) | |||
class ZeroShotClassificationPipeline(Pipeline): | |||
def __init__(self, | |||
model: Union[SbertForZeroShotClassification, str], | |||
preprocessor: ZeroShotClassificationPreprocessor = None, | |||
**kwargs): | |||
"""use `model` and `preprocessor` to create a nlp text classification pipeline for prediction | |||
Args: | |||
model (SbertForSentimentClassification): a model instance | |||
preprocessor (SentimentClassificationPreprocessor): a preprocessor instance | |||
""" | |||
assert isinstance(model, str) or isinstance(model, SbertForZeroShotClassification), \ | |||
'model must be a single str or SbertForZeroShotClassification' | |||
sc_model = model if isinstance( | |||
model, | |||
SbertForZeroShotClassification) else Model.from_pretrained(model) | |||
self.entailment_id = 0 | |||
self.contradiction_id = 2 | |||
if preprocessor is None: | |||
preprocessor = ZeroShotClassificationPreprocessor( | |||
sc_model.model_dir) | |||
sc_model.eval() | |||
super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) | |||
def _sanitize_parameters(self, **kwargs): | |||
preprocess_params = {} | |||
postprocess_params = {} | |||
if 'candidate_labels' in kwargs: | |||
candidate_labels = kwargs.pop('candidate_labels') | |||
preprocess_params['candidate_labels'] = candidate_labels | |||
postprocess_params['candidate_labels'] = candidate_labels | |||
else: | |||
raise ValueError('You must include at least one label.') | |||
preprocess_params['hypothesis_template'] = kwargs.pop( | |||
'hypothesis_template', '{}') | |||
postprocess_params['multi_label'] = kwargs.pop('multi_label', False) | |||
return preprocess_params, {}, postprocess_params | |||
def forward(self, inputs: Dict[str, Any], | |||
**forward_params) -> Dict[str, Any]: | |||
with torch.no_grad(): | |||
return super().forward(inputs, **forward_params) | |||
def postprocess(self, | |||
inputs: Dict[str, Any], | |||
candidate_labels, | |||
multi_label=False) -> Dict[str, Any]: | |||
"""process the prediction results | |||
Args: | |||
inputs (Dict[str, Any]): _description_ | |||
Returns: | |||
Dict[str, Any]: the prediction results | |||
""" | |||
logits = inputs['logits'] | |||
if multi_label or len(candidate_labels) == 1: | |||
logits = logits[..., [self.contradiction_id, self.entailment_id]] | |||
scores = softmax(logits, axis=-1)[..., 1] | |||
else: | |||
logits = logits[..., self.entailment_id] | |||
scores = softmax(logits, axis=-1) | |||
reversed_index = list(reversed(scores.argsort())) | |||
result = { | |||
'labels': [candidate_labels[i] for i in reversed_index], | |||
'scores': [scores[i].item() for i in reversed_index], | |||
} | |||
return result |
@@ -13,9 +13,9 @@ from .builder import PREPROCESSORS | |||
__all__ = [ | |||
'Tokenize', 'SequenceClassificationPreprocessor', | |||
'TextGenerationPreprocessor', 'ZeroShotClassificationPreprocessor', | |||
'TokenClassifcationPreprocessor', 'NLIPreprocessor', | |||
'SentimentClassificationPreprocessor', 'FillMaskPreprocessor' | |||
'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor', | |||
'NLIPreprocessor', 'SentimentClassificationPreprocessor', | |||
'FillMaskPreprocessor' | |||
] | |||
@@ -372,50 +372,6 @@ class FillMaskPreprocessor(Preprocessor): | |||
return {k: torch.tensor(v) for k, v in rst.items()} | |||
@PREPROCESSORS.register_module( | |||
Fields.nlp, module_name=Preprocessors.zero_shot_cls_tokenizer) | |||
class ZeroShotClassificationPreprocessor(Preprocessor): | |||
def __init__(self, model_dir: str, *args, **kwargs): | |||
"""preprocess the data via the vocab.txt from the `model_dir` path | |||
Args: | |||
model_dir (str): model path | |||
""" | |||
super().__init__(*args, **kwargs) | |||
from sofa import SbertTokenizer | |||
self.model_dir: str = model_dir | |||
self.sequence_length = kwargs.pop('sequence_length', 512) | |||
self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir) | |||
@type_assert(object, str) | |||
def __call__(self, data: str, hypothesis_template: str, | |||
candidate_labels: list) -> Dict[str, Any]: | |||
"""process the raw input data | |||
Args: | |||
data (str): a sentence | |||
Example: | |||
'you are so handsome.' | |||
Returns: | |||
Dict[str, Any]: the preprocessed data | |||
""" | |||
pairs = [[data, hypothesis_template.format(label)] | |||
for label in candidate_labels] | |||
features = self.tokenizer( | |||
pairs, | |||
padding=True, | |||
truncation=True, | |||
max_length=self.sequence_length, | |||
return_tensors='pt', | |||
truncation_strategy='only_first') | |||
return features | |||
@PREPROCESSORS.register_module( | |||
Fields.nlp, module_name=Preprocessors.token_cls_tokenizer) | |||
class TokenClassifcationPreprocessor(Preprocessor): | |||
@@ -32,7 +32,6 @@ class Tasks(object): | |||
action_recognition = 'action-recognition' | |||
# nlp tasks | |||
zero_shot_classification = 'zero-shot-classification' | |||
word_segmentation = 'word-segmentation' | |||
nli = 'nli' | |||
sentiment_classification = 'sentiment-classification' | |||
@@ -1,64 +0,0 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import unittest | |||
from modelscope.hub.snapshot_download import snapshot_download | |||
from modelscope.models import Model | |||
from modelscope.models.nlp import SbertForZeroShotClassification | |||
from modelscope.pipelines import ZeroShotClassificationPipeline, pipeline | |||
from modelscope.preprocessors import ZeroShotClassificationPreprocessor | |||
from modelscope.utils.constant import Tasks | |||
from modelscope.utils.test_utils import test_level | |||
class ZeroShotClassificationTest(unittest.TestCase): | |||
model_id = 'damo/nlp_structbert_zero-shot-classification_chinese-base' | |||
sentence = '全新突破 解放军运20版空中加油机曝光' | |||
labels = ['文化', '体育', '娱乐', '财经', '家居', '汽车', '教育', '科技', '军事'] | |||
template = '这篇文章的标题是{}' | |||
@unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||
def test_run_with_direct_file_download(self): | |||
cache_path = snapshot_download(self.model_id) | |||
tokenizer = ZeroShotClassificationPreprocessor(cache_path) | |||
model = SbertForZeroShotClassification(cache_path, tokenizer=tokenizer) | |||
pipeline1 = ZeroShotClassificationPipeline( | |||
model, preprocessor=tokenizer) | |||
pipeline2 = pipeline( | |||
Tasks.zero_shot_classification, | |||
model=model, | |||
preprocessor=tokenizer) | |||
print( | |||
f'sentence: {self.sentence}\n' | |||
f'pipeline1:{pipeline1(input=self.sentence,candidate_labels=self.labels)}' | |||
) | |||
print() | |||
print( | |||
f'sentence: {self.sentence}\n' | |||
f'pipeline2: {pipeline2(self.sentence,candidate_labels=self.labels,hypothesis_template=self.template)}' | |||
) | |||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
def test_run_with_model_from_modelhub(self): | |||
model = Model.from_pretrained(self.model_id) | |||
tokenizer = ZeroShotClassificationPreprocessor(model.model_dir) | |||
pipeline_ins = pipeline( | |||
task=Tasks.zero_shot_classification, | |||
model=model, | |||
preprocessor=tokenizer) | |||
print(pipeline_ins(input=self.sentence, candidate_labels=self.labels)) | |||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
def test_run_with_model_name(self): | |||
pipeline_ins = pipeline( | |||
task=Tasks.zero_shot_classification, model=self.model_id) | |||
print(pipeline_ins(input=self.sentence, candidate_labels=self.labels)) | |||
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
def test_run_with_default_model(self): | |||
pipeline_ins = pipeline(task=Tasks.zero_shot_classification) | |||
print(pipeline_ins(input=self.sentence, candidate_labels=self.labels)) | |||
if __name__ == '__main__': | |||
unittest.main() |