Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10490937master
| @@ -3,9 +3,9 @@ from modelscope.outputs import OutputKeys | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| OFA_TASK_KEY_MAPPING = { | OFA_TASK_KEY_MAPPING = { | ||||
| Tasks.ofa_ocr_recognition: OutputKeys.TEXT, | |||||
| Tasks.ocr_recognition: OutputKeys.TEXT, | |||||
| Tasks.image_captioning: OutputKeys.CAPTION, | Tasks.image_captioning: OutputKeys.CAPTION, | ||||
| Tasks.summarization: OutputKeys.TEXT, | |||||
| Tasks.text_summarization: OutputKeys.TEXT, | |||||
| Tasks.visual_question_answering: OutputKeys.TEXT, | Tasks.visual_question_answering: OutputKeys.TEXT, | ||||
| Tasks.visual_grounding: OutputKeys.BOXES, | Tasks.visual_grounding: OutputKeys.BOXES, | ||||
| Tasks.text_classification: (OutputKeys.SCORES, OutputKeys.LABELS), | Tasks.text_classification: (OutputKeys.SCORES, OutputKeys.LABELS), | ||||
| @@ -27,13 +27,13 @@ __all__ = ['OfaForAllTasks'] | |||||
| @MODELS.register_module(Tasks.image_captioning, module_name=Models.ofa) | @MODELS.register_module(Tasks.image_captioning, module_name=Models.ofa) | ||||
| @MODELS.register_module(Tasks.ofa_ocr_recognition, module_name=Models.ofa) | |||||
| @MODELS.register_module(Tasks.ocr_recognition, module_name=Models.ofa) | |||||
| @MODELS.register_module(Tasks.visual_grounding, module_name=Models.ofa) | @MODELS.register_module(Tasks.visual_grounding, module_name=Models.ofa) | ||||
| @MODELS.register_module( | @MODELS.register_module( | ||||
| Tasks.visual_question_answering, module_name=Models.ofa) | Tasks.visual_question_answering, module_name=Models.ofa) | ||||
| @MODELS.register_module(Tasks.visual_entailment, module_name=Models.ofa) | @MODELS.register_module(Tasks.visual_entailment, module_name=Models.ofa) | ||||
| @MODELS.register_module(Tasks.image_classification, module_name=Models.ofa) | @MODELS.register_module(Tasks.image_classification, module_name=Models.ofa) | ||||
| @MODELS.register_module(Tasks.summarization, module_name=Models.ofa) | |||||
| @MODELS.register_module(Tasks.text_summarization, module_name=Models.ofa) | |||||
| @MODELS.register_module(Tasks.text_classification, module_name=Models.ofa) | @MODELS.register_module(Tasks.text_classification, module_name=Models.ofa) | ||||
| class OfaForAllTasks(TorchModel): | class OfaForAllTasks(TorchModel): | ||||
| @@ -97,9 +97,9 @@ class OfaForAllTasks(TorchModel): | |||||
| 'traverse': self._traverse_inference, | 'traverse': self._traverse_inference, | ||||
| } | } | ||||
| self.task_inference_mapping = { | self.task_inference_mapping = { | ||||
| Tasks.ofa_ocr_recognition: self._text_gen_inference, | |||||
| Tasks.ocr_recognition: self._text_gen_inference, | |||||
| Tasks.image_captioning: self._text_gen_inference, | Tasks.image_captioning: self._text_gen_inference, | ||||
| Tasks.summarization: self._text_gen_inference, | |||||
| Tasks.text_summarization: self._text_gen_inference, | |||||
| Tasks.visual_grounding: self._visual_grounding_inference, | Tasks.visual_grounding: self._visual_grounding_inference, | ||||
| Tasks.visual_entailment: inference_d[self.gen_type], | Tasks.visual_entailment: inference_d[self.gen_type], | ||||
| Tasks.visual_question_answering: inference_d[self.gen_type], | Tasks.visual_question_answering: inference_d[self.gen_type], | ||||
| @@ -661,7 +661,7 @@ TASK_OUTPUTS = { | |||||
| # "caption": "this is an image caption text." | # "caption": "this is an image caption text." | ||||
| # } | # } | ||||
| Tasks.image_captioning: [OutputKeys.CAPTION], | Tasks.image_captioning: [OutputKeys.CAPTION], | ||||
| Tasks.ofa_ocr_recognition: [OutputKeys.TEXT], | |||||
| Tasks.ocr_recognition: [OutputKeys.TEXT], | |||||
| # visual grounding result for single sample | # visual grounding result for single sample | ||||
| # { | # { | ||||
| @@ -11,6 +11,8 @@ from modelscope.utils.logger import get_logger | |||||
| logger = get_logger() | logger = get_logger() | ||||
| @PIPELINES.register_module( | |||||
| Tasks.image_text_retrieval, module_name=Pipelines.multi_modal_embedding) | |||||
| @PIPELINES.register_module( | @PIPELINES.register_module( | ||||
| Tasks.multi_modal_embedding, module_name=Pipelines.multi_modal_embedding) | Tasks.multi_modal_embedding, module_name=Pipelines.multi_modal_embedding) | ||||
| class MultiModalEmbeddingPipeline(Pipeline): | class MultiModalEmbeddingPipeline(Pipeline): | ||||
| @@ -16,7 +16,7 @@ logger = get_logger() | |||||
| @PIPELINES.register_module( | @PIPELINES.register_module( | ||||
| Tasks.ofa_ocr_recognition, module_name=Pipelines.ofa_ocr_recognition) | |||||
| Tasks.ocr_recognition, module_name=Pipelines.ofa_ocr_recognition) | |||||
| class OcrRecognitionPipeline(Pipeline): | class OcrRecognitionPipeline(Pipeline): | ||||
| def __init__(self, | def __init__(self, | ||||
| @@ -13,7 +13,7 @@ logger = get_logger() | |||||
| @PIPELINES.register_module( | @PIPELINES.register_module( | ||||
| Tasks.summarization, module_name=Pipelines.text_generation) | |||||
| Tasks.text_summarization, module_name=Pipelines.text_generation) | |||||
| class SummarizationPipeline(Pipeline): | class SummarizationPipeline(Pipeline): | ||||
| def __init__(self, | def __init__(self, | ||||
| @@ -34,7 +34,7 @@ class OfaPreprocessor(Preprocessor): | |||||
| """ | """ | ||||
| super().__init__(*args, **kwargs) | super().__init__(*args, **kwargs) | ||||
| preprocess_mapping = { | preprocess_mapping = { | ||||
| Tasks.ofa_ocr_recognition: OfaOcrRecognitionPreprocessor, | |||||
| Tasks.ocr_recognition: OfaOcrRecognitionPreprocessor, | |||||
| Tasks.image_captioning: OfaImageCaptioningPreprocessor, | Tasks.image_captioning: OfaImageCaptioningPreprocessor, | ||||
| Tasks.visual_grounding: OfaVisualGroundingPreprocessor, | Tasks.visual_grounding: OfaVisualGroundingPreprocessor, | ||||
| Tasks.visual_question_answering: | Tasks.visual_question_answering: | ||||
| @@ -42,14 +42,14 @@ class OfaPreprocessor(Preprocessor): | |||||
| Tasks.visual_entailment: OfaVisualEntailmentPreprocessor, | Tasks.visual_entailment: OfaVisualEntailmentPreprocessor, | ||||
| Tasks.image_classification: OfaImageClassificationPreprocessor, | Tasks.image_classification: OfaImageClassificationPreprocessor, | ||||
| Tasks.text_classification: OfaTextClassificationPreprocessor, | Tasks.text_classification: OfaTextClassificationPreprocessor, | ||||
| Tasks.summarization: OfaSummarizationPreprocessor, | |||||
| Tasks.text_summarization: OfaSummarizationPreprocessor, | |||||
| Tasks.text_to_image_synthesis: OfaTextToImageSynthesisPreprocessor | Tasks.text_to_image_synthesis: OfaTextToImageSynthesisPreprocessor | ||||
| } | } | ||||
| input_key_mapping = { | input_key_mapping = { | ||||
| Tasks.ofa_ocr_recognition: ['image'], | |||||
| Tasks.ocr_recognition: ['image'], | |||||
| Tasks.image_captioning: ['image'], | Tasks.image_captioning: ['image'], | ||||
| Tasks.image_classification: ['image'], | Tasks.image_classification: ['image'], | ||||
| Tasks.summarization: ['text'], | |||||
| Tasks.text_summarization: ['text'], | |||||
| Tasks.text_classification: ['text', 'text2'], | Tasks.text_classification: ['text', 'text2'], | ||||
| Tasks.visual_grounding: ['image', 'text'], | Tasks.visual_grounding: ['image', 'text'], | ||||
| Tasks.visual_question_answering: ['image', 'text'], | Tasks.visual_question_answering: ['image', 'text'], | ||||
| @@ -117,7 +117,7 @@ class NLPTasks(object): | |||||
| table_question_answering = 'table-question-answering' | table_question_answering = 'table-question-answering' | ||||
| sentence_embedding = 'sentence-embedding' | sentence_embedding = 'sentence-embedding' | ||||
| fill_mask = 'fill-mask' | fill_mask = 'fill-mask' | ||||
| summarization = 'summarization' | |||||
| text_summarization = 'text-summarization' | |||||
| question_answering = 'question-answering' | question_answering = 'question-answering' | ||||
| zero_shot_classification = 'zero-shot-classification' | zero_shot_classification = 'zero-shot-classification' | ||||
| backbone = 'backbone' | backbone = 'backbone' | ||||
| @@ -151,7 +151,6 @@ class MultiModalTasks(object): | |||||
| visual_entailment = 'visual-entailment' | visual_entailment = 'visual-entailment' | ||||
| video_multi_modal_embedding = 'video-multi-modal-embedding' | video_multi_modal_embedding = 'video-multi-modal-embedding' | ||||
| image_text_retrieval = 'image-text-retrieval' | image_text_retrieval = 'image-text-retrieval' | ||||
| ofa_ocr_recognition = 'ofa-ocr-recognition' | |||||
| class TasksIODescriptions(object): | class TasksIODescriptions(object): | ||||
| @@ -48,7 +48,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | ||||
| def test_run_with_ocr_recognize_with_name(self): | def test_run_with_ocr_recognize_with_name(self): | ||||
| ocr_recognize = pipeline( | ocr_recognize = pipeline( | ||||
| Tasks.ofa_ocr_recognition, | |||||
| Tasks.ocr_recognition, | |||||
| model='damo/ofa_ocr-recognition_scene_base_zh') | model='damo/ofa_ocr-recognition_scene_base_zh') | ||||
| result = ocr_recognize('data/test/images/image_ocr_recognition.jpg') | result = ocr_recognize('data/test/images/image_ocr_recognition.jpg') | ||||
| print(result[OutputKeys.TEXT]) | print(result[OutputKeys.TEXT]) | ||||
| @@ -75,7 +75,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| def test_run_with_summarization_with_model(self): | def test_run_with_summarization_with_model(self): | ||||
| model = Model.from_pretrained( | model = Model.from_pretrained( | ||||
| 'damo/ofa_summarization_gigaword_large_en') | 'damo/ofa_summarization_gigaword_large_en') | ||||
| ofa_pipe = pipeline(Tasks.summarization, model=model) | |||||
| ofa_pipe = pipeline(Tasks.text_summarization, model=model) | |||||
| text = 'five-time world champion michelle kwan withdrew' + \ | text = 'five-time world champion michelle kwan withdrew' + \ | ||||
| 'from the #### us figure skating championships on wednesday ,' + \ | 'from the #### us figure skating championships on wednesday ,' + \ | ||||
| ' but will petition us skating officials for the chance to ' + \ | ' but will petition us skating officials for the chance to ' + \ | ||||
| @@ -87,7 +87,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | ||||
| def test_run_with_summarization_with_name(self): | def test_run_with_summarization_with_name(self): | ||||
| ofa_pipe = pipeline( | ofa_pipe = pipeline( | ||||
| Tasks.summarization, | |||||
| Tasks.text_summarization, | |||||
| model='damo/ofa_summarization_gigaword_large_en') | model='damo/ofa_summarization_gigaword_large_en') | ||||
| text = 'five-time world champion michelle kwan withdrew' + \ | text = 'five-time world champion michelle kwan withdrew' + \ | ||||
| 'from the #### us figure skating championships on wednesday ,' + \ | 'from the #### us figure skating championships on wednesday ,' + \ | ||||