From 08e03ca9b071338d958e24f8d639f0ef71fea2d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=99=BA=E4=B8=9E?= <zhangzhicheng.zzc@alibaba-inc.com>
Date: Thu, 23 Jun 2022 19:53:29 +0800
Subject: [PATCH] remove zeroshot

---
 modelscope/metainfo.py                        |  2 -
 modelscope/models/__init__.py                 |  4 +-
 modelscope/models/nlp/__init__.py             |  1 -
 .../nlp/sbert_for_zero_shot_classification.py | 50 ----------
 modelscope/pipelines/builder.py               |  3 -
 modelscope/pipelines/nlp/__init__.py          |  1 -
 .../nlp/zero_shot_classification_pipeline.py  | 98 -------------------
 modelscope/preprocessors/nlp.py               | 50 +---------
 modelscope/utils/constant.py                  |  1 -
 .../test_zero_shot_classification.py          | 64 ------------
 10 files changed, 5 insertions(+), 269 deletions(-)
 delete mode 100644 modelscope/models/nlp/sbert_for_zero_shot_classification.py
 delete mode 100644 modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
 delete mode 100644 tests/pipelines/test_zero_shot_classification.py

diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py
index 388d8397..13028278 100644
--- a/modelscope/metainfo.py
+++ b/modelscope/metainfo.py
@@ -48,7 +48,6 @@ class Pipelines(object):
     text_generation = 'text-generation'
     sentiment_analysis = 'sentiment-analysis'
     sentiment_classification = 'sentiment-classification'
-    zero_shot_classification = 'zero-shot-classification'
     fill_mask = 'fill-mask'
     nli = 'nli'
     dialog_intent_prediction = 'dialog-intent-prediction'
@@ -95,7 +94,6 @@ class Preprocessors(object):
     token_cls_tokenizer = 'token-cls-tokenizer'
     nli_tokenizer = 'nli-tokenizer'
     sen_cls_tokenizer = 'sen-cls-tokenizer'
-    zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer'
 
     # audio preprocessor
     linear_aec_fbank = 'linear-aec-fbank'
diff --git a/modelscope/models/__init__.py b/modelscope/models/__init__.py
index 629f2270..6e769d8d 100644
--- a/modelscope/models/__init__.py
+++ b/modelscope/models/__init__.py
@@ -7,5 +7,5 @@ from .builder import MODELS, build_model
 from .multi_model import OfaForImageCaptioning
 from .nlp import (BertForSequenceClassification, SbertForNLI,
                   SbertForSentenceSimilarity, SbertForSentimentClassification,
-                  SbertForTokenClassification, SbertForZeroShotClassification,
-                  StructBertForMaskedLM, VecoForMaskedLM)
+                  SbertForTokenClassification, StructBertForMaskedLM,
+                  VecoForMaskedLM)
diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py
index 78b087e6..399aa63f 100644
--- a/modelscope/models/nlp/__init__.py
+++ b/modelscope/models/nlp/__init__.py
@@ -5,6 +5,5 @@ from .sbert_for_nli import *  # noqa F403
 from .sbert_for_sentence_similarity import *  # noqa F403
 from .sbert_for_sentiment_classification import *  # noqa F403
 from .sbert_for_token_classification import *  # noqa F403
-from .sbert_for_zero_shot_classification import *  # noqa F403
 from .space.dialog_intent_prediction_model import *  # noqa F403
 from .space.dialog_modeling_model import *  # noqa F403
diff --git a/modelscope/models/nlp/sbert_for_zero_shot_classification.py b/modelscope/models/nlp/sbert_for_zero_shot_classification.py
deleted file mode 100644
index 837bb41e..00000000
--- a/modelscope/models/nlp/sbert_for_zero_shot_classification.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from typing import Any, Dict
-
-import numpy as np
-
-from modelscope.utils.constant import Tasks
-from ...metainfo import Models
-from ..base import Model
-from ..builder import MODELS
-
-__all__ = ['SbertForZeroShotClassification']
-
-
-@MODELS.register_module(
-    Tasks.zero_shot_classification, module_name=Models.structbert)
-class SbertForZeroShotClassification(Model):
-
-    def __init__(self, model_dir: str, *args, **kwargs):
-        """initialize the zero shot classification model from the `model_dir` path.
-
-        Args:
-            model_dir (str): the model path.
-        """
-
-        super().__init__(model_dir, *args, **kwargs)
-        from sofa import SbertForSequenceClassification
-        self.model = SbertForSequenceClassification.from_pretrained(model_dir)
-
-    def train(self):
-        return self.model.train()
-
-    def eval(self):
-        return self.model.eval()
-
-    def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]:
-        """return the result by the model
-
-        Args:
-            input (Dict[str, Any]): the preprocessed data
-
-        Returns:
-            Dict[str, np.ndarray]: results
-                Example:
-                    {
-                        'logits': array([[-0.53860897,  1.5029076 ]], dtype=float32) # true value
-                    }
-        """
-        outputs = self.model(**input)
-        logits = outputs['logits'].numpy()
-        res = {'logits': logits}
-        return res
diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py
index b0f2955c..ebbdf01b 100644
--- a/modelscope/pipelines/builder.py
+++ b/modelscope/pipelines/builder.py
@@ -31,9 +31,6 @@ DEFAULT_MODEL_FOR_PIPELINE = {
      'damo/nlp_structbert_sentiment-classification_chinese-base'),
     Tasks.text_classification: ('bert-sentiment-analysis',
                                 'damo/bert-base-sst2'),
-    Tasks.zero_shot_classification:
-    (Pipelines.zero_shot_classification,
-     'damo/nlp_structbert_zero-shot-classification_chinese-base'),
     Tasks.image_matting: (Pipelines.image_matting,
                           'damo/cv_unet_image-matting'),
     Tasks.text_classification: (Pipelines.sentiment_analysis,
diff --git a/modelscope/pipelines/nlp/__init__.py b/modelscope/pipelines/nlp/__init__.py
index 7aebf3e8..76e0ff4e 100644
--- a/modelscope/pipelines/nlp/__init__.py
+++ b/modelscope/pipelines/nlp/__init__.py
@@ -7,4 +7,3 @@ from .sentiment_classification_pipeline import *  # noqa F403
 from .sequence_classification_pipeline import *  # noqa F403
 from .text_generation_pipeline import *  # noqa F403
 from .word_segmentation_pipeline import *  # noqa F403
-from .zero_shot_classification_pipeline import *  # noqa F403
diff --git a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
deleted file mode 100644
index 13ac5d52..00000000
--- a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import os
-import uuid
-from typing import Any, Dict, Union
-
-import json
-import numpy as np
-import torch
-from scipy.special import softmax
-
-from ...metainfo import Pipelines
-from ...models import Model
-from ...models.nlp import SbertForZeroShotClassification
-from ...preprocessors import ZeroShotClassificationPreprocessor
-from ...utils.constant import Tasks
-from ..base import Input, Pipeline
-from ..builder import PIPELINES
-
-__all__ = ['ZeroShotClassificationPipeline']
-
-
-@PIPELINES.register_module(
-    Tasks.zero_shot_classification,
-    module_name=Pipelines.zero_shot_classification)
-class ZeroShotClassificationPipeline(Pipeline):
-
-    def __init__(self,
-                 model: Union[SbertForZeroShotClassification, str],
-                 preprocessor: ZeroShotClassificationPreprocessor = None,
-                 **kwargs):
-        """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction
-
-        Args:
-            model (SbertForSentimentClassification): a model instance
-            preprocessor (SentimentClassificationPreprocessor): a preprocessor instance
-        """
-        assert isinstance(model, str) or isinstance(model, SbertForZeroShotClassification), \
-            'model must be a single str or SbertForZeroShotClassification'
-        sc_model = model if isinstance(
-            model,
-            SbertForZeroShotClassification) else Model.from_pretrained(model)
-
-        self.entailment_id = 0
-        self.contradiction_id = 2
-
-        if preprocessor is None:
-            preprocessor = ZeroShotClassificationPreprocessor(
-                sc_model.model_dir)
-        sc_model.eval()
-        super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs)
-
-    def _sanitize_parameters(self, **kwargs):
-        preprocess_params = {}
-        postprocess_params = {}
-
-        if 'candidate_labels' in kwargs:
-            candidate_labels = kwargs.pop('candidate_labels')
-            preprocess_params['candidate_labels'] = candidate_labels
-            postprocess_params['candidate_labels'] = candidate_labels
-        else:
-            raise ValueError('You must include at least one label.')
-        preprocess_params['hypothesis_template'] = kwargs.pop(
-            'hypothesis_template', '{}')
-
-        postprocess_params['multi_label'] = kwargs.pop('multi_label', False)
-        return preprocess_params, {}, postprocess_params
-
-    def forward(self, inputs: Dict[str, Any],
-                **forward_params) -> Dict[str, Any]:
-        with torch.no_grad():
-            return super().forward(inputs, **forward_params)
-
-    def postprocess(self,
-                    inputs: Dict[str, Any],
-                    candidate_labels,
-                    multi_label=False) -> Dict[str, Any]:
-        """process the prediction results
-
-        Args:
-            inputs (Dict[str, Any]): _description_
-
-        Returns:
-            Dict[str, Any]: the prediction results
-        """
-
-        logits = inputs['logits']
-        if multi_label or len(candidate_labels) == 1:
-            logits = logits[..., [self.contradiction_id, self.entailment_id]]
-            scores = softmax(logits, axis=-1)[..., 1]
-        else:
-            logits = logits[..., self.entailment_id]
-            scores = softmax(logits, axis=-1)
-
-        reversed_index = list(reversed(scores.argsort()))
-        result = {
-            'labels': [candidate_labels[i] for i in reversed_index],
-            'scores': [scores[i].item() for i in reversed_index],
-        }
-        return result
diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py
index 8346402c..5cd9463d 100644
--- a/modelscope/preprocessors/nlp.py
+++ b/modelscope/preprocessors/nlp.py
@@ -13,9 +13,9 @@ from .builder import PREPROCESSORS
 
 __all__ = [
     'Tokenize', 'SequenceClassificationPreprocessor',
-    'TextGenerationPreprocessor', 'ZeroShotClassificationPreprocessor',
-    'TokenClassifcationPreprocessor', 'NLIPreprocessor',
-    'SentimentClassificationPreprocessor', 'FillMaskPreprocessor'
+    'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor',
+    'NLIPreprocessor', 'SentimentClassificationPreprocessor',
+    'FillMaskPreprocessor'
 ]
 
 
@@ -372,50 +372,6 @@ class FillMaskPreprocessor(Preprocessor):
         return {k: torch.tensor(v) for k, v in rst.items()}
 
 
-@PREPROCESSORS.register_module(
-    Fields.nlp, module_name=Preprocessors.zero_shot_cls_tokenizer)
-class ZeroShotClassificationPreprocessor(Preprocessor):
-
-    def __init__(self, model_dir: str, *args, **kwargs):
-        """preprocess the data via the vocab.txt from the `model_dir` path
-
-        Args:
-            model_dir (str): model path
-        """
-
-        super().__init__(*args, **kwargs)
-
-        from sofa import SbertTokenizer
-        self.model_dir: str = model_dir
-        self.sequence_length = kwargs.pop('sequence_length', 512)
-        self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir)
-
-    @type_assert(object, str)
-    def __call__(self, data: str, hypothesis_template: str,
-                 candidate_labels: list) -> Dict[str, Any]:
-        """process the raw input data
-
-        Args:
-            data (str): a sentence
-                Example:
-                    'you are so handsome.'
-
-        Returns:
-            Dict[str, Any]: the preprocessed data
-        """
-        pairs = [[data, hypothesis_template.format(label)]
-                 for label in candidate_labels]
-
-        features = self.tokenizer(
-            pairs,
-            padding=True,
-            truncation=True,
-            max_length=self.sequence_length,
-            return_tensors='pt',
-            truncation_strategy='only_first')
-        return features
-
-
 @PREPROCESSORS.register_module(
     Fields.nlp, module_name=Preprocessors.token_cls_tokenizer)
 class TokenClassifcationPreprocessor(Preprocessor):
diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py
index 75e2d04d..85559917 100644
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -32,7 +32,6 @@ class Tasks(object):
     action_recognition = 'action-recognition'
 
     # nlp tasks
-    zero_shot_classification = 'zero-shot-classification'
     word_segmentation = 'word-segmentation'
     nli = 'nli'
     sentiment_classification = 'sentiment-classification'
diff --git a/tests/pipelines/test_zero_shot_classification.py b/tests/pipelines/test_zero_shot_classification.py
deleted file mode 100644
index 236013aa..00000000
--- a/tests/pipelines/test_zero_shot_classification.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) Alibaba, Inc. and its affiliates.
-import unittest
-
-from modelscope.hub.snapshot_download import snapshot_download
-from modelscope.models import Model
-from modelscope.models.nlp import SbertForZeroShotClassification
-from modelscope.pipelines import ZeroShotClassificationPipeline, pipeline
-from modelscope.preprocessors import ZeroShotClassificationPreprocessor
-from modelscope.utils.constant import Tasks
-from modelscope.utils.test_utils import test_level
-
-
-class ZeroShotClassificationTest(unittest.TestCase):
-    model_id = 'damo/nlp_structbert_zero-shot-classification_chinese-base'
-    sentence = '全新突破 解放军运20版空中加油机曝光'
-    labels = ['文化', '体育', '娱乐', '财经', '家居', '汽车', '教育', '科技', '军事']
-    template = '这篇文章的标题是{}'
-
-    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
-    def test_run_with_direct_file_download(self):
-        cache_path = snapshot_download(self.model_id)
-        tokenizer = ZeroShotClassificationPreprocessor(cache_path)
-        model = SbertForZeroShotClassification(cache_path, tokenizer=tokenizer)
-        pipeline1 = ZeroShotClassificationPipeline(
-            model, preprocessor=tokenizer)
-        pipeline2 = pipeline(
-            Tasks.zero_shot_classification,
-            model=model,
-            preprocessor=tokenizer)
-
-        print(
-            f'sentence: {self.sentence}\n'
-            f'pipeline1:{pipeline1(input=self.sentence,candidate_labels=self.labels)}'
-        )
-        print()
-        print(
-            f'sentence: {self.sentence}\n'
-            f'pipeline2: {pipeline2(self.sentence,candidate_labels=self.labels,hypothesis_template=self.template)}'
-        )
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_run_with_model_from_modelhub(self):
-        model = Model.from_pretrained(self.model_id)
-        tokenizer = ZeroShotClassificationPreprocessor(model.model_dir)
-        pipeline_ins = pipeline(
-            task=Tasks.zero_shot_classification,
-            model=model,
-            preprocessor=tokenizer)
-        print(pipeline_ins(input=self.sentence, candidate_labels=self.labels))
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_run_with_model_name(self):
-        pipeline_ins = pipeline(
-            task=Tasks.zero_shot_classification, model=self.model_id)
-        print(pipeline_ins(input=self.sentence, candidate_labels=self.labels))
-
-    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
-    def test_run_with_default_model(self):
-        pipeline_ins = pipeline(task=Tasks.zero_shot_classification)
-        print(pipeline_ins(input=self.sentence, candidate_labels=self.labels))
-
-
-if __name__ == '__main__':
-    unittest.main()