diff --git a/modelscope/models/multi_modal/ofa/utils/constant.py b/modelscope/models/multi_modal/ofa/utils/constant.py index eec2cc6c..d3257383 100644 --- a/modelscope/models/multi_modal/ofa/utils/constant.py +++ b/modelscope/models/multi_modal/ofa/utils/constant.py @@ -3,9 +3,9 @@ from modelscope.outputs import OutputKeys from modelscope.utils.constant import Tasks OFA_TASK_KEY_MAPPING = { - Tasks.ofa_ocr_recognition: OutputKeys.TEXT, + Tasks.ocr_recognition: OutputKeys.TEXT, Tasks.image_captioning: OutputKeys.CAPTION, - Tasks.summarization: OutputKeys.TEXT, + Tasks.text_summarization: OutputKeys.TEXT, Tasks.visual_question_answering: OutputKeys.TEXT, Tasks.visual_grounding: OutputKeys.BOXES, Tasks.text_classification: (OutputKeys.SCORES, OutputKeys.LABELS), diff --git a/modelscope/models/multi_modal/ofa_for_all_tasks.py b/modelscope/models/multi_modal/ofa_for_all_tasks.py index 20cab6a6..6e331228 100644 --- a/modelscope/models/multi_modal/ofa_for_all_tasks.py +++ b/modelscope/models/multi_modal/ofa_for_all_tasks.py @@ -27,13 +27,13 @@ __all__ = ['OfaForAllTasks'] @MODELS.register_module(Tasks.image_captioning, module_name=Models.ofa) -@MODELS.register_module(Tasks.ofa_ocr_recognition, module_name=Models.ofa) +@MODELS.register_module(Tasks.ocr_recognition, module_name=Models.ofa) @MODELS.register_module(Tasks.visual_grounding, module_name=Models.ofa) @MODELS.register_module( Tasks.visual_question_answering, module_name=Models.ofa) @MODELS.register_module(Tasks.visual_entailment, module_name=Models.ofa) @MODELS.register_module(Tasks.image_classification, module_name=Models.ofa) -@MODELS.register_module(Tasks.summarization, module_name=Models.ofa) +@MODELS.register_module(Tasks.text_summarization, module_name=Models.ofa) @MODELS.register_module(Tasks.text_classification, module_name=Models.ofa) class OfaForAllTasks(TorchModel): @@ -97,9 +97,9 @@ class OfaForAllTasks(TorchModel): 'traverse': self._traverse_inference, } self.task_inference_mapping = { - Tasks.ofa_ocr_recognition: self._text_gen_inference, + Tasks.ocr_recognition: self._text_gen_inference, Tasks.image_captioning: self._text_gen_inference, - Tasks.summarization: self._text_gen_inference, + Tasks.text_summarization: self._text_gen_inference, Tasks.visual_grounding: self._visual_grounding_inference, Tasks.visual_entailment: inference_d[self.gen_type], Tasks.visual_question_answering: inference_d[self.gen_type], diff --git a/modelscope/outputs.py b/modelscope/outputs.py index 365e2bf9..af37eb84 100644 --- a/modelscope/outputs.py +++ b/modelscope/outputs.py @@ -661,7 +661,7 @@ TASK_OUTPUTS = { # "caption": "this is an image caption text." # } Tasks.image_captioning: [OutputKeys.CAPTION], - Tasks.ofa_ocr_recognition: [OutputKeys.TEXT], + Tasks.ocr_recognition: [OutputKeys.TEXT], # visual grounding result for single sample # { diff --git a/modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py b/modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py index 76011be0..d3f15c23 100644 --- a/modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py +++ b/modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py @@ -11,6 +11,8 @@ from modelscope.utils.logger import get_logger logger = get_logger() +@PIPELINES.register_module( + Tasks.image_text_retrieval, module_name=Pipelines.multi_modal_embedding) @PIPELINES.register_module( Tasks.multi_modal_embedding, module_name=Pipelines.multi_modal_embedding) class MultiModalEmbeddingPipeline(Pipeline): diff --git a/modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py b/modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py index 9cd63b6c..c61b38f3 100644 --- a/modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py +++ b/modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py @@ -16,7 +16,7 @@ logger = get_logger() @PIPELINES.register_module( - Tasks.ofa_ocr_recognition, module_name=Pipelines.ofa_ocr_recognition) + Tasks.ocr_recognition, module_name=Pipelines.ofa_ocr_recognition) class OcrRecognitionPipeline(Pipeline): def __init__(self, diff --git a/modelscope/pipelines/nlp/summarization_pipeline.py b/modelscope/pipelines/nlp/summarization_pipeline.py index 7a91eff1..30dd4b30 100644 --- a/modelscope/pipelines/nlp/summarization_pipeline.py +++ b/modelscope/pipelines/nlp/summarization_pipeline.py @@ -13,7 +13,7 @@ logger = get_logger() @PIPELINES.register_module( - Tasks.summarization, module_name=Pipelines.text_generation) + Tasks.text_summarization, module_name=Pipelines.text_generation) class SummarizationPipeline(Pipeline): def __init__(self, diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py index 6f3245c3..4427c096 100644 --- a/modelscope/preprocessors/multi_modal.py +++ b/modelscope/preprocessors/multi_modal.py @@ -34,7 +34,7 @@ class OfaPreprocessor(Preprocessor): """ super().__init__(*args, **kwargs) preprocess_mapping = { - Tasks.ofa_ocr_recognition: OfaOcrRecognitionPreprocessor, + Tasks.ocr_recognition: OfaOcrRecognitionPreprocessor, Tasks.image_captioning: OfaImageCaptioningPreprocessor, Tasks.visual_grounding: OfaVisualGroundingPreprocessor, Tasks.visual_question_answering: @@ -42,14 +42,14 @@ class OfaPreprocessor(Preprocessor): Tasks.visual_entailment: OfaVisualEntailmentPreprocessor, Tasks.image_classification: OfaImageClassificationPreprocessor, Tasks.text_classification: OfaTextClassificationPreprocessor, - Tasks.summarization: OfaSummarizationPreprocessor, + Tasks.text_summarization: OfaSummarizationPreprocessor, Tasks.text_to_image_synthesis: OfaTextToImageSynthesisPreprocessor } input_key_mapping = { - Tasks.ofa_ocr_recognition: ['image'], + Tasks.ocr_recognition: ['image'], Tasks.image_captioning: ['image'], Tasks.image_classification: ['image'], - Tasks.summarization: ['text'], + Tasks.text_summarization: ['text'], Tasks.text_classification: ['text', 'text2'], Tasks.visual_grounding: ['image', 'text'], Tasks.visual_question_answering: ['image', 'text'], diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 865e1d4f..8e986b61 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -117,7 +117,7 @@ class NLPTasks(object): table_question_answering = 'table-question-answering' sentence_embedding = 'sentence-embedding' fill_mask = 'fill-mask' - summarization = 'summarization' + text_summarization = 'text-summarization' question_answering = 'question-answering' zero_shot_classification = 'zero-shot-classification' backbone = 'backbone' @@ -151,7 +151,6 @@ class MultiModalTasks(object): visual_entailment = 'visual-entailment' video_multi_modal_embedding = 'video-multi-modal-embedding' image_text_retrieval = 'image-text-retrieval' - ofa_ocr_recognition = 'ofa-ocr-recognition' class TasksIODescriptions(object): diff --git a/tests/pipelines/test_ofa_tasks.py b/tests/pipelines/test_ofa_tasks.py index 05ecc719..57dcb0c3 100644 --- a/tests/pipelines/test_ofa_tasks.py +++ b/tests/pipelines/test_ofa_tasks.py @@ -48,7 +48,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_ocr_recognize_with_name(self): ocr_recognize = pipeline( - Tasks.ofa_ocr_recognition, + Tasks.ocr_recognition, model='damo/ofa_ocr-recognition_scene_base_zh') result = ocr_recognize('data/test/images/image_ocr_recognition.jpg') print(result[OutputKeys.TEXT]) @@ -75,7 +75,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): def test_run_with_summarization_with_model(self): model = Model.from_pretrained( 'damo/ofa_summarization_gigaword_large_en') - ofa_pipe = pipeline(Tasks.summarization, model=model) + ofa_pipe = pipeline(Tasks.text_summarization, model=model) text = 'five-time world champion michelle kwan withdrew' + \ 'from the #### us figure skating championships on wednesday ,' + \ ' but will petition us skating officials for the chance to ' + \ @@ -87,7 +87,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_summarization_with_name(self): ofa_pipe = pipeline( - Tasks.summarization, + Tasks.text_summarization, model='damo/ofa_summarization_gigaword_large_en') text = 'five-time world champion michelle kwan withdrew' + \ 'from the #### us figure skating championships on wednesday ,' + \