Browse Source

remove zeroshot

master
智丞 3 years ago
parent
commit
08e03ca9b0
10 changed files with 5 additions and 269 deletions
  1. +0
    -2
      modelscope/metainfo.py
  2. +2
    -2
      modelscope/models/__init__.py
  3. +0
    -1
      modelscope/models/nlp/__init__.py
  4. +0
    -50
      modelscope/models/nlp/sbert_for_zero_shot_classification.py
  5. +0
    -3
      modelscope/pipelines/builder.py
  6. +0
    -1
      modelscope/pipelines/nlp/__init__.py
  7. +0
    -98
      modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
  8. +3
    -47
      modelscope/preprocessors/nlp.py
  9. +0
    -1
      modelscope/utils/constant.py
  10. +0
    -64
      tests/pipelines/test_zero_shot_classification.py

+ 0
- 2
modelscope/metainfo.py View File

@@ -48,7 +48,6 @@ class Pipelines(object):
text_generation = 'text-generation'
sentiment_analysis = 'sentiment-analysis'
sentiment_classification = 'sentiment-classification'
zero_shot_classification = 'zero-shot-classification'
fill_mask = 'fill-mask'
nli = 'nli'
dialog_intent_prediction = 'dialog-intent-prediction'
@@ -95,7 +94,6 @@ class Preprocessors(object):
token_cls_tokenizer = 'token-cls-tokenizer'
nli_tokenizer = 'nli-tokenizer'
sen_cls_tokenizer = 'sen-cls-tokenizer'
zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer'

# audio preprocessor
linear_aec_fbank = 'linear-aec-fbank'


+ 2
- 2
modelscope/models/__init__.py View File

@@ -7,5 +7,5 @@ from .builder import MODELS, build_model
from .multi_model import OfaForImageCaptioning
from .nlp import (BertForSequenceClassification, SbertForNLI,
SbertForSentenceSimilarity, SbertForSentimentClassification,
SbertForTokenClassification, SbertForZeroShotClassification,
StructBertForMaskedLM, VecoForMaskedLM)
SbertForTokenClassification, StructBertForMaskedLM,
VecoForMaskedLM)

+ 0
- 1
modelscope/models/nlp/__init__.py View File

@@ -5,6 +5,5 @@ from .sbert_for_nli import * # noqa F403
from .sbert_for_sentence_similarity import * # noqa F403
from .sbert_for_sentiment_classification import * # noqa F403
from .sbert_for_token_classification import * # noqa F403
from .sbert_for_zero_shot_classification import * # noqa F403
from .space.dialog_intent_prediction_model import * # noqa F403
from .space.dialog_modeling_model import * # noqa F403

+ 0
- 50
modelscope/models/nlp/sbert_for_zero_shot_classification.py View File

@@ -1,50 +0,0 @@
from typing import Any, Dict

import numpy as np

from modelscope.utils.constant import Tasks
from ...metainfo import Models
from ..base import Model
from ..builder import MODELS

__all__ = ['SbertForZeroShotClassification']


@MODELS.register_module(
Tasks.zero_shot_classification, module_name=Models.structbert)
class SbertForZeroShotClassification(Model):

def __init__(self, model_dir: str, *args, **kwargs):
"""initialize the zero shot classification model from the `model_dir` path.

Args:
model_dir (str): the model path.
"""

super().__init__(model_dir, *args, **kwargs)
from sofa import SbertForSequenceClassification
self.model = SbertForSequenceClassification.from_pretrained(model_dir)

def train(self):
return self.model.train()

def eval(self):
return self.model.eval()

def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]:
"""return the result by the model

Args:
input (Dict[str, Any]): the preprocessed data

Returns:
Dict[str, np.ndarray]: results
Example:
{
'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value
}
"""
outputs = self.model(**input)
logits = outputs['logits'].numpy()
res = {'logits': logits}
return res

+ 0
- 3
modelscope/pipelines/builder.py View File

@@ -31,9 +31,6 @@ DEFAULT_MODEL_FOR_PIPELINE = {
'damo/nlp_structbert_sentiment-classification_chinese-base'),
Tasks.text_classification: ('bert-sentiment-analysis',
'damo/bert-base-sst2'),
Tasks.zero_shot_classification:
(Pipelines.zero_shot_classification,
'damo/nlp_structbert_zero-shot-classification_chinese-base'),
Tasks.image_matting: (Pipelines.image_matting,
'damo/cv_unet_image-matting'),
Tasks.text_classification: (Pipelines.sentiment_analysis,


+ 0
- 1
modelscope/pipelines/nlp/__init__.py View File

@@ -7,4 +7,3 @@ from .sentiment_classification_pipeline import * # noqa F403
from .sequence_classification_pipeline import * # noqa F403
from .text_generation_pipeline import * # noqa F403
from .word_segmentation_pipeline import * # noqa F403
from .zero_shot_classification_pipeline import * # noqa F403

+ 0
- 98
modelscope/pipelines/nlp/zero_shot_classification_pipeline.py View File

@@ -1,98 +0,0 @@
import os
import uuid
from typing import Any, Dict, Union

import json
import numpy as np
import torch
from scipy.special import softmax

from ...metainfo import Pipelines
from ...models import Model
from ...models.nlp import SbertForZeroShotClassification
from ...preprocessors import ZeroShotClassificationPreprocessor
from ...utils.constant import Tasks
from ..base import Input, Pipeline
from ..builder import PIPELINES

__all__ = ['ZeroShotClassificationPipeline']


@PIPELINES.register_module(
Tasks.zero_shot_classification,
module_name=Pipelines.zero_shot_classification)
class ZeroShotClassificationPipeline(Pipeline):

def __init__(self,
model: Union[SbertForZeroShotClassification, str],
preprocessor: ZeroShotClassificationPreprocessor = None,
**kwargs):
"""use `model` and `preprocessor` to create a nlp text classification pipeline for prediction

Args:
model (SbertForSentimentClassification): a model instance
preprocessor (SentimentClassificationPreprocessor): a preprocessor instance
"""
assert isinstance(model, str) or isinstance(model, SbertForZeroShotClassification), \
'model must be a single str or SbertForZeroShotClassification'
sc_model = model if isinstance(
model,
SbertForZeroShotClassification) else Model.from_pretrained(model)

self.entailment_id = 0
self.contradiction_id = 2

if preprocessor is None:
preprocessor = ZeroShotClassificationPreprocessor(
sc_model.model_dir)
sc_model.eval()
super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs)

def _sanitize_parameters(self, **kwargs):
preprocess_params = {}
postprocess_params = {}

if 'candidate_labels' in kwargs:
candidate_labels = kwargs.pop('candidate_labels')
preprocess_params['candidate_labels'] = candidate_labels
postprocess_params['candidate_labels'] = candidate_labels
else:
raise ValueError('You must include at least one label.')
preprocess_params['hypothesis_template'] = kwargs.pop(
'hypothesis_template', '{}')

postprocess_params['multi_label'] = kwargs.pop('multi_label', False)
return preprocess_params, {}, postprocess_params

def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
with torch.no_grad():
return super().forward(inputs, **forward_params)

def postprocess(self,
inputs: Dict[str, Any],
candidate_labels,
multi_label=False) -> Dict[str, Any]:
"""process the prediction results

Args:
inputs (Dict[str, Any]): _description_

Returns:
Dict[str, Any]: the prediction results
"""

logits = inputs['logits']
if multi_label or len(candidate_labels) == 1:
logits = logits[..., [self.contradiction_id, self.entailment_id]]
scores = softmax(logits, axis=-1)[..., 1]
else:
logits = logits[..., self.entailment_id]
scores = softmax(logits, axis=-1)

reversed_index = list(reversed(scores.argsort()))
result = {
'labels': [candidate_labels[i] for i in reversed_index],
'scores': [scores[i].item() for i in reversed_index],
}
return result

+ 3
- 47
modelscope/preprocessors/nlp.py View File

@@ -13,9 +13,9 @@ from .builder import PREPROCESSORS

__all__ = [
'Tokenize', 'SequenceClassificationPreprocessor',
'TextGenerationPreprocessor', 'ZeroShotClassificationPreprocessor',
'TokenClassifcationPreprocessor', 'NLIPreprocessor',
'SentimentClassificationPreprocessor', 'FillMaskPreprocessor'
'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor',
'NLIPreprocessor', 'SentimentClassificationPreprocessor',
'FillMaskPreprocessor'
]


@@ -372,50 +372,6 @@ class FillMaskPreprocessor(Preprocessor):
return {k: torch.tensor(v) for k, v in rst.items()}


@PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.zero_shot_cls_tokenizer)
class ZeroShotClassificationPreprocessor(Preprocessor):

def __init__(self, model_dir: str, *args, **kwargs):
"""preprocess the data via the vocab.txt from the `model_dir` path

Args:
model_dir (str): model path
"""

super().__init__(*args, **kwargs)

from sofa import SbertTokenizer
self.model_dir: str = model_dir
self.sequence_length = kwargs.pop('sequence_length', 512)
self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir)

@type_assert(object, str)
def __call__(self, data: str, hypothesis_template: str,
candidate_labels: list) -> Dict[str, Any]:
"""process the raw input data

Args:
data (str): a sentence
Example:
'you are so handsome.'

Returns:
Dict[str, Any]: the preprocessed data
"""
pairs = [[data, hypothesis_template.format(label)]
for label in candidate_labels]

features = self.tokenizer(
pairs,
padding=True,
truncation=True,
max_length=self.sequence_length,
return_tensors='pt',
truncation_strategy='only_first')
return features


@PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.token_cls_tokenizer)
class TokenClassifcationPreprocessor(Preprocessor):


+ 0
- 1
modelscope/utils/constant.py View File

@@ -32,7 +32,6 @@ class Tasks(object):
action_recognition = 'action-recognition'

# nlp tasks
zero_shot_classification = 'zero-shot-classification'
word_segmentation = 'word-segmentation'
nli = 'nli'
sentiment_classification = 'sentiment-classification'


+ 0
- 64
tests/pipelines/test_zero_shot_classification.py View File

@@ -1,64 +0,0 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
from modelscope.models.nlp import SbertForZeroShotClassification
from modelscope.pipelines import ZeroShotClassificationPipeline, pipeline
from modelscope.preprocessors import ZeroShotClassificationPreprocessor
from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level


class ZeroShotClassificationTest(unittest.TestCase):
model_id = 'damo/nlp_structbert_zero-shot-classification_chinese-base'
sentence = '全新突破 解放军运20版空中加油机曝光'
labels = ['文化', '体育', '娱乐', '财经', '家居', '汽车', '教育', '科技', '军事']
template = '这篇文章的标题是{}'

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_direct_file_download(self):
cache_path = snapshot_download(self.model_id)
tokenizer = ZeroShotClassificationPreprocessor(cache_path)
model = SbertForZeroShotClassification(cache_path, tokenizer=tokenizer)
pipeline1 = ZeroShotClassificationPipeline(
model, preprocessor=tokenizer)
pipeline2 = pipeline(
Tasks.zero_shot_classification,
model=model,
preprocessor=tokenizer)

print(
f'sentence: {self.sentence}\n'
f'pipeline1:{pipeline1(input=self.sentence,candidate_labels=self.labels)}'
)
print()
print(
f'sentence: {self.sentence}\n'
f'pipeline2: {pipeline2(self.sentence,candidate_labels=self.labels,hypothesis_template=self.template)}'
)

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_from_modelhub(self):
model = Model.from_pretrained(self.model_id)
tokenizer = ZeroShotClassificationPreprocessor(model.model_dir)
pipeline_ins = pipeline(
task=Tasks.zero_shot_classification,
model=model,
preprocessor=tokenizer)
print(pipeline_ins(input=self.sentence, candidate_labels=self.labels))

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_model_name(self):
pipeline_ins = pipeline(
task=Tasks.zero_shot_classification, model=self.model_id)
print(pipeline_ins(input=self.sentence, candidate_labels=self.labels))

@unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
def test_run_with_default_model(self):
pipeline_ins = pipeline(task=Tasks.zero_shot_classification)
print(pipeline_ins(input=self.sentence, candidate_labels=self.labels))


if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save