[to #42322933] add human whole body model and image object detection auto model

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10319306
3 years ago · 4cb5f8a2cd
--- a/data/test/images/auto_demo.jpg
+++ b/data/test/images/auto_demo.jpg
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:76bf84536edbaf192a8a699efc62ba2b06056bac12c426ecfcc2e003d91fbd32
 size 53219
--- a/data/test/images/keypoints_detect/body_keypoints_detection.jpg
+++ b/data/test/images/keypoints_detect/body_keypoints_detection.jpg
@@ -1,3 +0,0 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:379e11d7fc3734d3ec95afd0d86460b4653fbf4bb1f57f993610d6a6fd30fd3d
 size 1702339
--- a/data/test/images/keypoints_detect/img_test_wholebody.jpg
+++ b/data/test/images/keypoints_detect/img_test_wholebody.jpg
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:dec0fbb931cb609bf481e56b89cd2fbbab79839f22832c3bbe69a8fae2769cdd
 size 167407
--- a/modelscope/metainfo.py
+++ b/modelscope/metainfo.py
@@ -40,6 +40,7 @@ class Models(object):
    mtcnn = 'mtcnn'
    ulfd = 'ulfd'
    video_inpainting = 'video-inpainting'
    human_wholebody_keypoint = 'human-wholebody-keypoint'
    hand_static = 'hand-static'
    face_human_hand_detection = 'face-human-hand-detection'
    face_emotion = 'face-emotion'
@@ -49,6 +50,7 @@ class Models(object):
    # EasyCV models
    yolox = 'YOLOX'
    segformer = 'Segformer'
    image_object_detection_auto = 'image-object-detection-auto'
    # nlp models
    bert = 'bert'
@@ -170,6 +172,7 @@ class Pipelines(object):
    ocr_recognition = 'convnextTiny-ocr-recognition'
    image_portrait_enhancement = 'gpen-image-portrait-enhancement'
    image_to_image_generation = 'image-to-image-generation'
    image_object_detection_auto = 'yolox_image-object-detection-auto'
    skin_retouching = 'unet-skin-retouching'
    tinynas_classification = 'tinynas-classification'
    tinynas_detection = 'tinynas-detection'
@@ -185,6 +188,7 @@ class Pipelines(object):
    movie_scene_segmentation = 'resnet50-bert-movie-scene-segmentation'
    shop_segmentation = 'shop-segmentation'
    video_inpainting = 'video-inpainting'
    human_wholebody_keypoint = 'hrnetw48_human-wholebody-keypoint_image'
    pst_action_recognition = 'patchshift-action-recognition'
    hand_static = 'hand-static'
    face_human_hand_detection = 'face-human-hand-detection'
@@ -427,6 +431,7 @@ class Datasets(object):
    """
    ClsDataset = 'ClsDataset'
    Face2dKeypointsDataset = 'Face2dKeypointsDataset'
    HumanWholeBodyKeypointDataset = 'HumanWholeBodyKeypointDataset'
    SegDataset = 'SegDataset'
    DetDataset = 'DetDataset'
    DetImagesMixDataset = 'DetImagesMixDataset'
--- a/modelscope/models/cv/init.py
+++ b/modelscope/models/cv/init.py
@@ -4,15 +4,15 @@
 from . import (action_recognition, animal_recognition, body_2d_keypoints,
               body_3d_keypoints, cartoon, cmdssl_video_embedding,
               crowd_counting, face_2d_keypoints, face_detection,
               face_generation, image_classification, image_color_enhance,
               image_colorization, image_denoise, image_inpainting,
               image_instance_segmentation, image_panoptic_segmentation,
               image_portrait_enhancement, image_reid_person,
               image_semantic_segmentation, image_to_image_generation,
               image_to_image_translation, movie_scene_segmentation,
               object_detection, product_retrieval_embedding,
               realtime_object_detection, salient_detection, shop_segmentation,
               super_resolution, video_single_object_tracking,
               video_summarization, virual_tryon)
               face_generation, human_wholebody_keypoint, image_classification,
               image_color_enhance, image_colorization, image_denoise,
               image_inpainting, image_instance_segmentation,
               image_panoptic_segmentation, image_portrait_enhancement,
               image_reid_person, image_semantic_segmentation,
               image_to_image_generation, image_to_image_translation,
               movie_scene_segmentation, object_detection,
               product_retrieval_embedding, realtime_object_detection,
               salient_detection, shop_segmentation, super_resolution,
               video_single_object_tracking, video_summarization, virual_tryon)
 # yapf: enable
--- a/modelscope/models/cv/human_wholebody_keypoint/init.py
+++ b/modelscope/models/cv/human_wholebody_keypoint/init.py
@@ -0,0 +1,22 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import TYPE_CHECKING
 from modelscope.utils.import_utils import LazyImportModule
 if TYPE_CHECKING:
    from .human_wholebody_keypoint import HumanWholeBodyKeypoint
 else:
    _import_structure = {
        'human_wholebody_keypoint': ['HumanWholeBodyKeypoint']
    }
    import sys
    sys.modules[__name__] = LazyImportModule(
        __name__,
        globals()['__file__'],
        _import_structure,
        module_spec=__spec__,
        extra_objects={},
    )
--- a/modelscope/models/cv/human_wholebody_keypoint/human_wholebody_keypoint.py
+++ b/modelscope/models/cv/human_wholebody_keypoint/human_wholebody_keypoint.py
@@ -0,0 +1,17 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from easycv.models.pose.top_down import TopDown
 from modelscope.metainfo import Models
 from modelscope.models.builder import MODELS
 from modelscope.models.cv.easycv_base import EasyCVBaseModel
 from modelscope.utils.constant import Tasks
@MODELS.register_module(
    group_key=Tasks.human_wholebody_keypoint,
    module_name=Models.human_wholebody_keypoint)
 class HumanWholeBodyKeypoint(EasyCVBaseModel, TopDown):
    def __init__(self, model_dir=None, *args, **kwargs):
        EasyCVBaseModel.__init__(self, model_dir, args, kwargs)
        TopDown.__init__(self, *args, **kwargs)
--- a/modelscope/models/cv/object_detection/init.py
+++ b/modelscope/models/cv/object_detection/init.py
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
 else:
    _import_structure = {
        'mmdet_model': ['DetectionModel'],
        'yolox_pai': ['YOLOX']
        'yolox_pai': ['YOLOX'],
    }
    import sys
--- a/modelscope/models/cv/object_detection/yolox_pai.py
+++ b/modelscope/models/cv/object_detection/yolox_pai.py
@@ -9,6 +9,9 @@ from modelscope.utils.constant import Tasks
@MODELS.register_module(
    group_key=Tasks.image_object_detection, module_name=Models.yolox)
@MODELS.register_module(
    group_key=Tasks.image_object_detection,
    module_name=Models.image_object_detection_auto)
 class YOLOX(EasyCVBaseModel, _YOLOX):
    def __init__(self, model_dir=None, *args, **kwargs):
--- a/modelscope/msdatasets/cv/human_wholebody_keypoint/init.py
+++ b/modelscope/msdatasets/cv/human_wholebody_keypoint/init.py
@@ -0,0 +1,22 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import TYPE_CHECKING
 from modelscope.utils.import_utils import LazyImportModule
 if TYPE_CHECKING:
    from .human_wholebody_keypoint_dataset import WholeBodyCocoTopDownDataset
 else:
    _import_structure = {
        'human_wholebody_keypoint_dataset': ['WholeBodyCocoTopDownDataset']
    }
    import sys
    sys.modules[__name__] = LazyImportModule(
        __name__,
        globals()['__file__'],
        _import_structure,
        module_spec=__spec__,
        extra_objects={},
    )
--- a/modelscope/msdatasets/cv/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
+++ b/modelscope/msdatasets/cv/human_wholebody_keypoint/human_wholebody_keypoint_dataset.py
@@ -0,0 +1,39 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from easycv.datasets.pose import \
    WholeBodyCocoTopDownDataset as _WholeBodyCocoTopDownDataset
 from modelscope.metainfo import Datasets
 from modelscope.msdatasets.cv.easycv_base import EasyCVBaseDataset
 from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
 from modelscope.utils.constant import Tasks
@TASK_DATASETS.register_module(
    group_key=Tasks.human_wholebody_keypoint,
    module_name=Datasets.HumanWholeBodyKeypointDataset)
 class WholeBodyCocoTopDownDataset(EasyCVBaseDataset,
                                  _WholeBodyCocoTopDownDataset):
    """EasyCV dataset for human whole body 2d keypoints.
    Args:
        split_config (dict): Dataset root path from MSDataset, e.g.
            {"train":"local cache path"} or {"evaluation":"local cache path"}.
        preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
            the model if supplied. Not support yet.
        mode: Training or Evaluation.
    """
    def __init__(self,
                 split_config=None,
                 preprocessor=None,
                 mode=None,
                 *args,
                 **kwargs) -> None:
        EasyCVBaseDataset.__init__(
            self,
            split_config=split_config,
            preprocessor=preprocessor,
            mode=mode,
            args=args,
            kwargs=kwargs)
        _WholeBodyCocoTopDownDataset.__init__(self, *args, **kwargs)
--- a/modelscope/outputs.py
+++ b/modelscope/outputs.py
@@ -203,7 +203,7 @@ TASK_OUTPUTS = {
    # human body keypoints detection result for single sample
    # {
    #   "poses": [
    #   "keypoints": [
    #               [[x, y]*15],
    #               [[x, y]*15],
    #               [[x, y]*15]
@@ -220,7 +220,7 @@ TASK_OUTPUTS = {
    #             ]
    # }
    Tasks.body_2d_keypoints:
    [OutputKeys.POSES, OutputKeys.SCORES, OutputKeys.BOXES],
    [OutputKeys.KEYPOINTS, OutputKeys.SCORES, OutputKeys.BOXES],
    # 3D human body keypoints detection result for single sample
    # {
@@ -339,6 +339,21 @@ TASK_OUTPUTS = {
        OutputKeys.SCENE_META_LIST
    ],
    # human whole body keypoints detection result for single sample
    # {
    #   "keypoints": [
    #               [[x, y]*133],
    #               [[x, y]*133],
    #               [[x, y]*133]
    #             ]
    #   "boxes": [
    #               [x1, y1, x2, y2],
    #               [x1, y1, x2, y2],
    #               [x1, y1, x2, y2],
    #             ]
    # }
    Tasks.human_wholebody_keypoint: [OutputKeys.KEYPOINTS, OutputKeys.BOXES],
    # video summarization result for a single video
    # {
    #        "output":
--- a/modelscope/pipelines/builder.py
+++ b/modelscope/pipelines/builder.py
@@ -75,8 +75,6 @@ DEFAULT_MODEL_FOR_PIPELINE = {
     'damo/nlp_bart_text-error-correction_chinese'),
    Tasks.image_captioning: (Pipelines.image_captioning,
                             'damo/ofa_image-caption_coco_large_en'),
    Tasks.image_body_reshaping: (Pipelines.image_body_reshaping,
                                 'damo/cv_flow-based-body-reshaping_damo'),
    Tasks.image_portrait_stylization:
    (Pipelines.person_image_cartoon,
     'damo/cv_unet_person-image-cartoon_compound-models'),
@@ -159,6 +157,9 @@ DEFAULT_MODEL_FOR_PIPELINE = {
    Tasks.image_classification:
    (Pipelines.daily_image_classification,
     'damo/cv_vit-base_image-classification_Dailylife-labels'),
    Tasks.image_object_detection:
    (Pipelines.image_object_detection_auto,
     'damo/cv_yolox_image-object-detection-auto'),
    Tasks.ocr_recognition:
    (Pipelines.ocr_recognition,
     'damo/cv_convnextTiny_ocr-recognition-general_damo'),
@@ -186,6 +187,9 @@ DEFAULT_MODEL_FOR_PIPELINE = {
                             'damo/cv_fft_inpainting_lama'),
    Tasks.video_inpainting: (Pipelines.video_inpainting,
                             'damo/cv_video-inpainting'),
    Tasks.human_wholebody_keypoint:
    (Pipelines.human_wholebody_keypoint,
     'damo/cv_hrnetw48_human-wholebody-keypoint_image'),
    Tasks.hand_static: (Pipelines.hand_static,
                        'damo/cv_mobileface_hand-static'),
    Tasks.face_human_hand_detection:
--- a/modelscope/pipelines/cv/init.py
+++ b/modelscope/pipelines/cv/init.py
@@ -46,7 +46,10 @@ if TYPE_CHECKING:
    from .video_category_pipeline import VideoCategoryPipeline
    from .virtual_try_on_pipeline import VirtualTryonPipeline
    from .shop_segmentation_pipleline import ShopSegmentationPipeline
    from .easycv_pipelines import EasyCVDetectionPipeline, EasyCVSegmentationPipeline, Face2DKeypointsPipeline
    from .easycv_pipelines import (EasyCVDetectionPipeline,
                                   EasyCVSegmentationPipeline,
                                   Face2DKeypointsPipeline,
                                   HumanWholebodyKeypointsPipeline)
    from .text_driven_segmentation_pipleline import TextDrivenSegmentationPipeline
    from .movie_scene_segmentation_pipeline import MovieSceneSegmentationPipeline
    from .mog_face_detection_pipeline import MogFaceDetectionPipeline
@@ -109,8 +112,10 @@ else:
        'virtual_try_on_pipeline': ['VirtualTryonPipeline'],
        'shop_segmentation_pipleline': ['ShopSegmentationPipeline'],
        'easycv_pipeline': [
            'EasyCVDetectionPipeline', 'EasyCVSegmentationPipeline',
            'Face2DKeypointsPipeline'
            'EasyCVDetectionPipeline',
            'EasyCVSegmentationPipeline',
            'Face2DKeypointsPipeline',
            'HumanWholebodyKeypointsPipeline',
        ],
        'text_driven_segmentation_pipeline':
        ['TextDrivenSegmentationPipeline'],
--- a/modelscope/pipelines/cv/body_2d_keypoints_pipeline.py
+++ b/modelscope/pipelines/cv/body_2d_keypoints_pipeline.py
@@ -73,7 +73,7 @@ class Body2DKeypointsPipeline(Pipeline):
        if input[0] is None or input[1] is None:
            return {
                OutputKeys.BOXES: [],
                OutputKeys.POSES: [],
                OutputKeys.KEYPOINTS: [],
                OutputKeys.SCORES: []
            }
@@ -83,7 +83,7 @@ class Body2DKeypointsPipeline(Pipeline):
            result_boxes.append([box[0][0], box[0][1], box[1][0], box[1][1]])
        return {
            OutputKeys.BOXES: result_boxes,
            OutputKeys.POSES: poses,
            OutputKeys.KEYPOINTS: poses,
            OutputKeys.SCORES: scores
        }
--- a/modelscope/pipelines/cv/body_3d_keypoints_pipeline.py
+++ b/modelscope/pipelines/cv/body_3d_keypoints_pipeline.py
@@ -145,7 +145,7 @@ class Body3DKeypointsPipeline(Pipeline):
            kps_2d = self.human_body_2d_kps_detector(frame)
            box = kps_2d['boxes'][
                0]  # box: [[[x1, y1], [x2, y2]]], N human boxes per frame, [0] represent using first detected bbox
            pose = kps_2d['poses'][0]  # keypoints: [15, 2]
            pose = kps_2d['keypoints'][0]  # keypoints: [15, 2]
            score = kps_2d['scores'][0]  # keypoints: [15, 2]
            all_2d_poses.append(pose)
            all_boxes_with_socre.append(
--- a/modelscope/pipelines/cv/easycv_pipelines/init.py
+++ b/modelscope/pipelines/cv/easycv_pipelines/init.py
@@ -7,11 +7,14 @@ if TYPE_CHECKING:
    from .detection_pipeline import EasyCVDetectionPipeline
    from .segmentation_pipeline import EasyCVSegmentationPipeline
    from .face_2d_keypoints_pipeline import Face2DKeypointsPipeline
    from .human_wholebody_keypoint_pipeline import HumanWholebodyKeypointsPipeline
 else:
    _import_structure = {
        'detection_pipeline': ['EasyCVDetectionPipeline'],
        'segmentation_pipeline': ['EasyCVSegmentationPipeline'],
        'face_2d_keypoints_pipeline': ['Face2DKeypointsPipeline']
        'face_2d_keypoints_pipeline': ['Face2DKeypointsPipeline'],
        'human_wholebody_keypoint_pipeline':
        ['HumanWholebodyKeypointsPipeline'],
    }
    import sys
--- a/modelscope/pipelines/cv/easycv_pipelines/detection_pipeline.py
+++ b/modelscope/pipelines/cv/easycv_pipelines/detection_pipeline.py
@@ -1,16 +1,28 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import Any
 from modelscope.metainfo import Pipelines
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.utils.constant import Tasks
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.cv.image_utils import \
    show_image_object_detection_auto_result
 from .base import EasyCVPipeline
@PIPELINES.register_module(
    Tasks.image_object_detection, module_name=Pipelines.easycv_detection)
@PIPELINES.register_module(
    Tasks.image_object_detection,
    module_name=Pipelines.image_object_detection_auto)
 class EasyCVDetectionPipeline(EasyCVPipeline):
    """Pipeline for easycv detection task."""
    def __init__(self, model: str, model_file_pattern='*.pt', *args, **kwargs):
    def __init__(self,
                 model: str,
                 model_file_pattern=ModelFile.TORCH_MODEL_FILE,
                 *args,
                 **kwargs):
        """
            model (str): model id on modelscope hub or local model path.
            model_file_pattern (str): model file pattern.
@@ -21,3 +33,28 @@ class EasyCVDetectionPipeline(EasyCVPipeline):
            model_file_pattern=model_file_pattern,
            *args,
            **kwargs)
    def show_result(self, img_path, result, save_path=None):
        show_image_object_detection_auto_result(img_path, result, save_path)
    def __call__(self, inputs) -> Any:
        outputs = self.predict_op(inputs)
        scores = []
        labels = []
        boxes = []
        for output in outputs:
            for score, label, box in zip(output['detection_scores'],
                                         output['detection_classes'],
                                         output['detection_boxes']):
                scores.append(score)
                labels.append(self.cfg.CLASSES[label])
                boxes.append([b for b in box])
        results = [{
            OutputKeys.SCORES: scores,
            OutputKeys.LABELS: labels,
            OutputKeys.BOXES: boxes
        } for output in outputs]
        return results
--- a/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py
+++ b/modelscope/pipelines/cv/easycv_pipelines/human_wholebody_keypoint_pipeline.py
@@ -0,0 +1,65 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import os.path
 from typing import Any
 from modelscope.metainfo import Pipelines
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.utils.constant import ModelFile, Tasks
 from .base import EasyCVPipeline
@PIPELINES.register_module(
    Tasks.human_wholebody_keypoint,
    module_name=Pipelines.human_wholebody_keypoint)
 class HumanWholebodyKeypointsPipeline(EasyCVPipeline):
    """Pipeline for human wholebody 2d keypoints detection."""
    def __init__(self,
                 model: str,
                 model_file_pattern=ModelFile.TORCH_MODEL_FILE,
                 *args,
                 **kwargs):
        """
            model (str): model id on modelscope hub or local model path.
            model_file_pattern (str): model file pattern.
        """
        self.model_dir = model
        super(HumanWholebodyKeypointsPipeline, self).__init__(
            model=model,
            model_file_pattern=model_file_pattern,
            *args,
            **kwargs)
    def _build_predict_op(self, **kwargs):
        """Build EasyCV predictor."""
        from easycv.predictors.builder import build_predictor
        detection_predictor_type = self.cfg['DETECTION']['type']
        detection_model_path = os.path.join(
            self.model_dir, self.cfg['DETECTION']['model_path'])
        detection_cfg_file = os.path.join(self.model_dir,
                                          self.cfg['DETECTION']['config_file'])
        detection_score_threshold = self.cfg['DETECTION']['score_threshold']
        self.cfg.pipeline.predictor_config[
            'detection_predictor_config'] = dict(
                type=detection_predictor_type,
                model_path=detection_model_path,
                config_file=detection_cfg_file,
                score_threshold=detection_score_threshold)
        easycv_config = self._to_easycv_config()
        pipeline_op = build_predictor(self.cfg.pipeline.predictor_config, {
            'model_path': self.model_path,
            'config_file': easycv_config,
            **kwargs
        })
        return pipeline_op
    def __call__(self, inputs) -> Any:
        outputs = self.predict_op(inputs)
        results = [{
            OutputKeys.KEYPOINTS: output['keypoints'],
            OutputKeys.BOXES: output['boxes']
        } for output in outputs]
        return results
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -29,6 +29,7 @@ class CVTasks(object):
    body_3d_keypoints = 'body-3d-keypoints'
    hand_2d_keypoints = 'hand-2d-keypoints'
    general_recognition = 'general-recognition'
    human_wholebody_keypoint = 'human-wholebody-keypoint'
    image_classification = 'image-classification'
    image_multilabel_classification = 'image-multilabel-classification'
--- a/modelscope/utils/cv/image_utils.py
+++ b/modelscope/utils/cv/image_utils.py
@@ -80,7 +80,7 @@ def realtime_object_detection_bbox_vis(image, bboxes):
 def draw_keypoints(output, original_image):
    poses = np.array(output[OutputKeys.POSES])
    poses = np.array(output[OutputKeys.KEYPOINTS])
    scores = np.array(output[OutputKeys.SCORES])
    boxes = np.array(output[OutputKeys.BOXES])
    assert len(poses) == len(scores) and len(poses) == len(boxes)
@@ -234,3 +234,35 @@ def show_video_summarization_result(video_in_path, result, video_save_path):
            video_writer.write(frame)
    video_writer.release()
    cap.release()
 def show_image_object_detection_auto_result(img_path,
                                            detection_result,
                                            save_path=None):
    scores = detection_result[OutputKeys.SCORES]
    labels = detection_result[OutputKeys.LABELS]
    bboxes = detection_result[OutputKeys.BOXES]
    img = cv2.imread(img_path)
    assert img is not None, f"Can't read img: {img_path}"
    for (score, label, box) in zip(scores, labels, bboxes):
        cv2.rectangle(img, (int(box[0]), int(box[1])),
                      (int(box[2]), int(box[3])), (0, 0, 255), 2)
        cv2.putText(
            img,
            f'{score:.2f}', (int(box[0]), int(box[1])),
            1,
            1.0, (0, 255, 0),
            thickness=1,
            lineType=8)
        cv2.putText(
            img,
            label, (int((box[0] + box[2]) * 0.5), int(box[1])),
            1,
            1.0, (0, 255, 0),
            thickness=1,
            lineType=8)
    if save_path is not None:
        cv2.imwrite(save_path, img)
    return img
--- a/tests/pipelines/test_human_wholebody_keypoint.py
+++ b/tests/pipelines/test_human_wholebody_keypoint.py
@@ -0,0 +1,40 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import unittest
 import cv2
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.test_utils import test_level
 class EasyCVFace2DKeypointsPipelineTest(unittest.TestCase):
    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_human_wholebody_keypoint(self):
        img_path = 'data/test/images/keypoints_detect/img_test_wholebody.jpg'
        model_id = 'damo/cv_hrnetw48_human-wholebody-keypoint_image'
        human_wholebody_keypoint_pipeline = pipeline(
            task=Tasks.human_wholebody_keypoint, model=model_id)
        output = human_wholebody_keypoint_pipeline(img_path)[0]
        output_keypoints = output[OutputKeys.KEYPOINTS]
        output_pose = output[OutputKeys.BOXES]
        human_wholebody_keypoint_pipeline.predict_op.show_result(
            img_path,
            output_keypoints,
            output_pose,
            scale=1,
            save_path='human_wholebody_keypoint_ret.jpg')
        for keypoint in output_keypoints:
            self.assertEqual(keypoint.shape[0], 133)
        for box in output_pose:
            self.assertEqual(box.shape[0], 4)
 if __name__ == '__main__':
    unittest.main()
--- a/tests/pipelines/test_object_detection.py
+++ b/tests/pipelines/test_object_detection.py
@@ -59,6 +59,18 @@ class ObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck):
    def test_demo_compatibility(self):
        self.compatibility_check()
    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_image_object_detection_auto_pipeline(self):
        model_id = 'damo/cv_yolox_image-object-detection-auto'
        test_image = 'data/test/images/auto_demo.jpg'
        image_object_detection_auto = pipeline(
            Tasks.image_object_detection, model=model_id)
        result = image_object_detection_auto(test_image)[0]
        image_object_detection_auto.show_result(test_image, result,
                                                'auto_demo_ret.jpg')
 if __name__ == '__main__':
    unittest.main()