[to #42322933]add image_body_reshaping code

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10217723 * add image_body_reshaping code
3 years ago · ff69439c4f
--- a/data/test/images/image_body_reshaping.jpg
+++ b/data/test/images/image_body_reshaping.jpg
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:b2c1119e3d521cf2e583b1e85fc9c9afd1d44954b433135039a98050a730932d
 size 1127557
--- a/modelscope/metainfo.py
+++ b/modelscope/metainfo.py
@@ -43,6 +43,7 @@ class Models(object):
    face_human_hand_detection = 'face-human-hand-detection'
    face_emotion = 'face-emotion'
    product_segmentation = 'product-segmentation'
    image_body_reshaping = 'image-body-reshaping'
    # EasyCV models
    yolox = 'YOLOX'
@@ -187,6 +188,7 @@ class Pipelines(object):
    face_human_hand_detection = 'face-human-hand-detection'
    face_emotion = 'face-emotion'
    product_segmentation = 'product-segmentation'
    image_body_reshaping = 'flow-based-body-reshaping'
    # nlp tasks
    automatic_post_editing = 'automatic-post-editing'
--- a/modelscope/models/cv/image_body_reshaping/init.py
+++ b/modelscope/models/cv/image_body_reshaping/init.py
@@ -0,0 +1,20 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import TYPE_CHECKING
 from modelscope.utils.import_utils import LazyImportModule
 if TYPE_CHECKING:
    from .image_body_reshaping import ImageBodyReshaping
 else:
    _import_structure = {'image_body_reshaping': ['ImageBodyReshaping']}
    import sys
    sys.modules[__name__] = LazyImportModule(
        __name__,
        globals()['__file__'],
        _import_structure,
        module_spec=__spec__,
        extra_objects={},
    )
--- a/modelscope/models/cv/image_body_reshaping/image_body_reshaping.py
+++ b/modelscope/models/cv/image_body_reshaping/image_body_reshaping.py
@@ -0,0 +1,128 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import os
 from typing import Any, Dict
 import cv2
 import numpy as np
 import torch
 from modelscope.metainfo import Models
 from modelscope.models.base import Tensor, TorchModel
 from modelscope.models.builder import MODELS
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.logger import get_logger
 from .model import FlowGenerator
 from .person_info import PersonInfo
 from .pose_estimator.body import Body
 from .slim_utils import image_warp_grid1, resize_on_long_side
 logger = get_logger()
 __all__ = ['ImageBodyReshaping']
@MODELS.register_module(
    Tasks.image_body_reshaping, module_name=Models.image_body_reshaping)
 class ImageBodyReshaping(TorchModel):
    def __init__(self, model_dir: str, *args, **kwargs):
        """initialize the image body reshaping model from the `model_dir` path.
        Args:
            model_dir (str): the model path.
        """
        super().__init__(model_dir, *args, **kwargs)
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')
        self.degree = 1.0
        self.reshape_model = FlowGenerator(n_channels=16).to(self.device)
        model_path = os.path.join(model_dir, ModelFile.TORCH_MODEL_FILE)
        checkpoints = torch.load(model_path, map_location=torch.device('cpu'))
        self.reshape_model.load_state_dict(
            checkpoints['state_dict'], strict=True)
        self.reshape_model.eval()
        logger.info('load body reshaping model done')
        pose_model_ckpt = os.path.join(model_dir, 'body_pose_model.pth')
        self.pose_esti = Body(pose_model_ckpt, self.device)
        logger.info('load pose model done')
    def pred_joints(self, img):
        if img is None:
            return None
        small_src, resize_scale = resize_on_long_side(img, 300)
        body_joints = self.pose_esti(small_src)
        if body_joints.shape[0] >= 1:
            body_joints[:, :, :2] = body_joints[:, :, :2] / resize_scale
        return body_joints
    def pred_flow(self, img):
        body_joints = self.pred_joints(img)
        small_size = 1200
        if img.shape[0] > small_size or img.shape[1] > small_size:
            _img, _scale = resize_on_long_side(img, small_size)
            body_joints[:, :, :2] = body_joints[:, :, :2] * _scale
        else:
            _img = img
        # We only reshape one person
        if body_joints.shape[0] < 1 or body_joints.shape[0] > 1:
            return None
        person = PersonInfo(body_joints[0])
        with torch.no_grad():
            person_pred = person.pred_flow(_img, self.reshape_model,
                                           self.device)
        flow = np.dstack((person_pred['rDx'], person_pred['rDy']))
        scale = img.shape[0] * 1.0 / flow.shape[0]
        flow = cv2.resize(flow, (img.shape[1], img.shape[0]))
        flow *= scale
        return flow
    def warp(self, src_img, flow):
        X_flow = flow[..., 0]
        Y_flow = flow[..., 1]
        X_flow = np.ascontiguousarray(X_flow)
        Y_flow = np.ascontiguousarray(Y_flow)
        pred = image_warp_grid1(X_flow, Y_flow, src_img, 1.0, 0, 0)
        return pred
    def inference(self, img):
        img = img.cpu().numpy()
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        flow = self.pred_flow(img)
        if flow is None:
            return img
        assert flow.shape[:2] == img.shape[:2]
        mag, ang = cv2.cartToPolar(flow[..., 0] + 1e-8, flow[..., 1] + 1e-8)
        mag -= 3
        mag[mag <= 0] = 0
        x, y = cv2.polarToCart(mag, ang, angleInDegrees=False)
        flow = np.dstack((x, y))
        flow *= self.degree
        pred = self.warp(img, flow)
        out_img = np.clip(pred, 0, 255)
        logger.info('model inference done')
        return out_img.astype(np.uint8)
--- a/modelscope/models/cv/image_body_reshaping/model.py
+++ b/modelscope/models/cv/image_body_reshaping/model.py
@@ -0,0 +1,189 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class ConvLayer(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(ConvLayer, self).__init__()
        self.conv = nn.Sequential(
            nn.ReflectionPad2d(1),
            nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=0),
            nn.BatchNorm2d(out_ch), nn.ReLU(inplace=True))
    def forward(self, x):
        x = self.conv(x)
        return x
 class SASA(nn.Module):
    def __init__(self, in_dim):
        super(SASA, self).__init__()
        self.chanel_in = in_dim
        self.query_conv = nn.Conv2d(
            in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
        self.key_conv = nn.Conv2d(
            in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
        self.value_conv = nn.Conv2d(
            in_channels=in_dim, out_channels=in_dim, kernel_size=1)
        self.mag_conv = nn.Conv2d(
            in_channels=5, out_channels=in_dim // 32, kernel_size=1)
        self.gamma = nn.Parameter(torch.zeros(1))
        self.softmax = nn.Softmax(dim=-1)  #
        self.sigmoid = nn.Sigmoid()
    def structure_encoder(self, paf_mag, target_height, target_width):
        torso_mask = torch.sum(paf_mag[:, 1:3, :, :], dim=1, keepdim=True)
        torso_mask = torch.clamp(torso_mask, 0, 1)
        arms_mask = torch.sum(paf_mag[:, 4:8, :, :], dim=1, keepdim=True)
        arms_mask = torch.clamp(arms_mask, 0, 1)
        legs_mask = torch.sum(paf_mag[:, 8:12, :, :], dim=1, keepdim=True)
        legs_mask = torch.clamp(legs_mask, 0, 1)
        fg_mask = paf_mag[:, 12, :, :].unsqueeze(1)
        bg_mask = 1 - fg_mask
        Y = torch.cat((arms_mask, torso_mask, legs_mask, fg_mask, bg_mask),
                      dim=1)
        Y = F.interpolate(Y, size=(target_height, target_width), mode='area')
        return Y
    def forward(self, X, PAF_mag):
        """extract self-attention features.
        Args:
            X : input feature maps( B x C x H x W)
            PAF_mag : ( B x C x H x W), 1 denotes connectivity, 0 denotes non-connectivity
        Returns:
            out : self attention value + input feature
            Y: B X N X N (N is Width*Height)
        """
        m_batchsize, C, height, width = X.size()
        Y = self.structure_encoder(PAF_mag, height, width)
        connectivity_mask_vec = self.mag_conv(Y).view(m_batchsize, -1,
                                                      width * height)
        affinity = torch.bmm(
            connectivity_mask_vec.permute(0, 2, 1), connectivity_mask_vec)
        affinity_centered = affinity - torch.mean(affinity)
        affinity_sigmoid = self.sigmoid(affinity_centered)
        proj_query = self.query_conv(X).view(m_batchsize, -1,
                                             width * height).permute(0, 2, 1)
        proj_key = self.key_conv(X).view(m_batchsize, -1, width * height)
        selfatten_map = torch.bmm(proj_query, proj_key)
        selfatten_centered = selfatten_map - torch.mean(
            selfatten_map)  # centering
        selfatten_sigmoid = self.sigmoid(selfatten_centered)
        SASA_map = selfatten_sigmoid * affinity_sigmoid
        proj_value = self.value_conv(X).view(m_batchsize, -1, width * height)
        out = torch.bmm(proj_value, SASA_map.permute(0, 2, 1))
        out = out.view(m_batchsize, C, height, width)
        out = self.gamma * out + X
        return out, Y
 class FlowGenerator(nn.Module):
    def __init__(self, n_channels, deep_supervision=False):
        super(FlowGenerator, self).__init__()
        self.deep_supervision = deep_supervision
        self.Encoder = nn.Sequential(
            ConvLayer(n_channels, 64),
            ConvLayer(64, 64),
            nn.MaxPool2d(2),
            ConvLayer(64, 128),
            ConvLayer(128, 128),
            nn.MaxPool2d(2),
            ConvLayer(128, 256),
            ConvLayer(256, 256),
            nn.MaxPool2d(2),
            ConvLayer(256, 512),
            ConvLayer(512, 512),
            nn.MaxPool2d(2),
            ConvLayer(512, 1024),
            ConvLayer(1024, 1024),
            ConvLayer(1024, 1024),
            ConvLayer(1024, 1024),
            ConvLayer(1024, 1024),
        )
        self.SASA = SASA(in_dim=1024)
        self.Decoder = nn.Sequential(
            ConvLayer(1024, 1024),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            ConvLayer(1024, 512),
            ConvLayer(512, 512),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            ConvLayer(512, 256),
            ConvLayer(256, 256),
            ConvLayer(256, 128),
            ConvLayer(128, 64),
            ConvLayer(64, 32),
            nn.Conv2d(32, 2, kernel_size=1, padding=0),
            nn.Tanh(),
            nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True),
        )
        dilation_ksize = 17
        self.dilation = torch.nn.MaxPool2d(
            kernel_size=dilation_ksize,
            stride=1,
            padding=int((dilation_ksize - 1) / 2))
    def warp(self, x, flow, mode='bilinear', padding_mode='zeros', coff=0.2):
        n, c, h, w = x.size()
        yv, xv = torch.meshgrid([torch.arange(h), torch.arange(w)])
        xv = xv.float() / (w - 1) * 2.0 - 1
        yv = yv.float() / (h - 1) * 2.0 - 1
        grid = torch.cat((xv.unsqueeze(-1), yv.unsqueeze(-1)), -1).unsqueeze(0)
        grid = grid.to(flow.device)
        grid_x = grid + 2 * flow * coff
        warp_x = F.grid_sample(x, grid_x, mode=mode, padding_mode=padding_mode)
        return warp_x
    def forward(self, img, skeleton_map, coef=0.2):
        """extract self-attention features.
        Args:
            img : input numpy image
            skeleton_map : skeleton map of input image
            coef: warp degree
        Returns:
            warp_x : warped image
            flow: predicted flow
        """
        img_concat = torch.cat((img, skeleton_map), dim=1)
        X = self.Encoder(img_concat)
        _, _, height, width = X.size()
        # directly get PAF magnitude from skeleton maps via dilation
        PAF_mag = self.dilation((skeleton_map + 1.0) * 0.5)
        out, Y = self.SASA(X, PAF_mag)
        flow = self.Decoder(out)
        flow = flow.permute(0, 2, 3, 1)  # [n, 2, h, w] ==> [n, h, w, 2]
        warp_x = self.warp(img, flow, coff=coef)
        warp_x = torch.clamp(warp_x, min=-1.0, max=1.0)
        return warp_x, flow
--- a/modelscope/models/cv/image_body_reshaping/person_info.py
+++ b/modelscope/models/cv/image_body_reshaping/person_info.py
@@ -0,0 +1,339 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import copy
 import cv2
 import numpy as np
 import torch
 from .slim_utils import (enlarge_box_tblr, gen_skeleton_map,
                         get_map_fusion_map_cuda, get_mask_bbox,
                         resize_on_long_side)
 class PersonInfo(object):
    def __init__(self, joints):
        self.joints = joints
        self.flow = None
        self.pad_boder = False
        self.height_expand = 0
        self.width_expand = 0
        self.coeff = 0.2
        self.network_input_W = 256
        self.network_input_H = 256
        self.divider = 20
        self.flow_scales = ['upper_2']
    def update_attribute(self, pad_boder, height_expand, width_expand):
        self.pad_boder = pad_boder
        self.height_expand = height_expand
        self.width_expand = width_expand
        if pad_boder:
            self.joints[:, 0] += width_expand
            self.joints[:, 1] += height_expand
    def pred_flow(self, img, flow_net, device):
        with torch.no_grad():
            if img is None:
                print('image is none')
                self.flow = None
            if len(img.shape) == 2:
                img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
            if self.pad_boder:
                height_expand = self.height_expand
                width_expand = self.width_expand
                pad_img = cv2.copyMakeBorder(
                    img,
                    height_expand,
                    height_expand,
                    width_expand,
                    width_expand,
                    cv2.BORDER_CONSTANT,
                    value=(127, 127, 127))
            else:
                height_expand = 0
                width_expand = 0
                pad_img = img.copy()
            canvas = np.zeros(
                shape=(pad_img.shape[0], pad_img.shape[1]), dtype=np.float32)
            self.human_joint_box = self.__joint_to_body_box()
            self.human_box = enlarge_box_tblr(
                self.human_joint_box, pad_img, ratio=0.25)
            human_box_height = self.human_box[1] - self.human_box[0]
            human_box_width = self.human_box[3] - self.human_box[2]
            self.leg_joint_box = self.__joint_to_leg_box()
            self.leg_box = enlarge_box_tblr(
                self.leg_joint_box, pad_img, ratio=0.25)
            self.arm_joint_box = self.__joint_to_arm_box()
            self.arm_box = enlarge_box_tblr(
                self.arm_joint_box, pad_img, ratio=0.1)
            x_flows = []
            y_flows = []
            multi_bbox = []
            for scale in self.flow_scales:  # better for metric
                scale_value = float(scale.split('_')[-1])
                arm_box = copy.deepcopy(self.arm_box)
                if arm_box[0] is None:
                    arm_box = self.human_box
                arm_box_height = arm_box[1] - arm_box[0]
                arm_box_width = arm_box[3] - arm_box[2]
                roi_bbox = None
                if arm_box_width < human_box_width * 0.1 or arm_box_height < human_box_height * 0.1:
                    roi_bbox = self.human_box
                else:
                    arm_box = enlarge_box_tblr(
                        arm_box, pad_img, ratio=scale_value)
                    if scale == 'upper_0.2':
                        arm_box[0] = min(arm_box[0], int(self.joints[0][1]))
                    if scale.startswith('upper'):
                        roi_bbox = [
                            max(self.human_box[0], arm_box[0]),
                            min(self.human_box[1], arm_box[1]),
                            max(self.human_box[2], arm_box[2]),
                            min(self.human_box[3], arm_box[3])
                        ]
                        if roi_bbox[1] - roi_bbox[0] < 1 or roi_bbox[
                                3] - roi_bbox[2] < 1:
                            continue
                    elif scale.startswith('lower'):
                        roi_bbox = [
                            max(self.human_box[0], self.leg_box[0]),
                            min(self.human_box[1], self.leg_box[1]),
                            max(self.human_box[2], self.leg_box[2]),
                            min(self.human_box[3], self.leg_box[3])
                        ]
                        if roi_bbox[1] - roi_bbox[0] < 1 or roi_bbox[
                                3] - roi_bbox[2] < 1:
                            continue
                skel_map, roi_bbox = gen_skeleton_map(
                    self.joints, 'depth', input_roi_box=roi_bbox)
                if roi_bbox is None:
                    continue
                if skel_map.dtype != np.float32:
                    skel_map = skel_map.astype(np.float32)
                skel_map -= 1.0  # [0,2] ->[-1,1]
                multi_bbox.append(roi_bbox)
                roi_bbox_height = roi_bbox[1] - roi_bbox[0]
                roi_bbox_width = roi_bbox[3] - roi_bbox[2]
                assert skel_map.shape[0] == roi_bbox_height
                assert skel_map.shape[1] == roi_bbox_width
                roi_height_pad = roi_bbox_height // self.divider
                roi_width_pad = roi_bbox_width // self.divider
                paded_roi_h = roi_bbox_height + 2 * roi_height_pad
                paded_roi_w = roi_bbox_width + 2 * roi_width_pad
                roi_height_pad_joint = skel_map.shape[0] // self.divider
                roi_width_pad_joint = skel_map.shape[1] // self.divider
                skel_map = np.pad(
                    skel_map,
                    ((roi_height_pad_joint, roi_height_pad_joint),
                     (roi_width_pad_joint, roi_width_pad_joint), (0, 0)),
                    'constant',
                    constant_values=-1)
                skel_map_resized = cv2.resize(
                    skel_map, (self.network_input_W, self.network_input_H))
                skel_map_resized[skel_map_resized < 0] = -1.0
                skel_map_resized[skel_map_resized > -0.5] = 1.0
                skel_map_transformed = torch.from_numpy(
                    skel_map_resized.transpose((2, 0, 1)))
                roi_npy = pad_img[roi_bbox[0]:roi_bbox[1],
                                  roi_bbox[2]:roi_bbox[3], :].copy()
                if roi_npy.dtype != np.float32:
                    roi_npy = roi_npy.astype(np.float32)
                roi_npy = np.pad(roi_npy,
                                 ((roi_height_pad, roi_height_pad),
                                  (roi_width_pad, roi_width_pad), (0, 0)),
                                 'edge')
                roi_npy = roi_npy[:, :, ::-1]
                roi_npy = cv2.resize(
                    roi_npy, (self.network_input_W, self.network_input_H))
                roi_npy *= 1.0 / 255
                roi_npy -= 0.5
                roi_npy *= 2
                rgb_tensor = torch.from_numpy(roi_npy.transpose((2, 0, 1)))
                rgb_tensor = rgb_tensor.unsqueeze(0).to(device)
                skel_map_tensor = skel_map_transformed.unsqueeze(0).to(device)
                warped_img_val, flow_field_val = flow_net(
                    rgb_tensor, skel_map_tensor
                )  # inference, connectivity_mask [1,12,16,16]
                flow_field_val = flow_field_val.detach().squeeze().cpu().numpy(
                )
                flow_field_val = cv2.resize(
                    flow_field_val, (paded_roi_w, paded_roi_h),
                    interpolation=cv2.INTER_LINEAR)
                flow_field_val[..., 0] = flow_field_val[
                    ..., 0] * paded_roi_w * 0.5 * 2 * self.coeff
                flow_field_val[..., 1] = flow_field_val[
                    ..., 1] * paded_roi_h * 0.5 * 2 * self.coeff
                # remove pad areas
                flow_field_val = flow_field_val[
                    roi_height_pad:flow_field_val.shape[0] - roi_height_pad,
                    roi_width_pad:flow_field_val.shape[1] - roi_width_pad, :]
                diffuse_width = max(roi_bbox_width // 3, 1)
                diffuse_height = max(roi_bbox_height // 3, 1)
                assert roi_bbox_width == flow_field_val.shape[1]
                assert roi_bbox_height == flow_field_val.shape[0]
                origin_flow = np.zeros(
                    (pad_img.shape[0] + 2 * diffuse_height,
                     pad_img.shape[1] + 2 * diffuse_width, 2),
                    dtype=np.float32)
                flow_field_val = np.pad(flow_field_val,
                                        ((diffuse_height, diffuse_height),
                                         (diffuse_width, diffuse_width),
                                         (0, 0)), 'linear_ramp')
                origin_flow[roi_bbox[0]:roi_bbox[1] + 2 * diffuse_height,
                            roi_bbox[2]:roi_bbox[3]
                            + 2 * diffuse_width] = flow_field_val
                origin_flow = origin_flow[diffuse_height:-diffuse_height,
                                          diffuse_width:-diffuse_width, :]
                x_flows.append(origin_flow[..., 0])
                y_flows.append(origin_flow[..., 1])
            if len(x_flows) == 0:
                return {
                    'rDx': np.zeros(canvas.shape[:2], dtype=np.float32),
                    'rDy': np.zeros(canvas.shape[:2], dtype=np.float32),
                    'multi_bbox': multi_bbox,
                    'x_fusion_map':
                    np.ones(canvas.shape[:2], dtype=np.float32),
                    'y_fusion_map':
                    np.ones(canvas.shape[:2], dtype=np.float32)
                }
            else:
                origin_rDx, origin_rDy, x_fusion_map, y_fusion_map = self.blend_multiscale_flow(
                    x_flows, y_flows, device=device)
            return {
                'rDx': origin_rDx,
                'rDy': origin_rDy,
                'multi_bbox': multi_bbox,
                'x_fusion_map': x_fusion_map,
                'y_fusion_map': y_fusion_map
            }
    @staticmethod
    def blend_multiscale_flow(x_flows, y_flows, device=None):
        scale_num = len(x_flows)
        if scale_num == 1:
            return x_flows[0], y_flows[0], np.ones_like(
                x_flows[0]), np.ones_like(x_flows[0])
        origin_rDx = np.zeros((x_flows[0].shape[0], x_flows[0].shape[1]),
                              dtype=np.float32)
        origin_rDy = np.zeros((y_flows[0].shape[0], y_flows[0].shape[1]),
                              dtype=np.float32)
        x_fusion_map, x_acc_map = get_map_fusion_map_cuda(
            x_flows, 1, device=device)
        y_fusion_map, y_acc_map = get_map_fusion_map_cuda(
            y_flows, 1, device=device)
        x_flow_map = 1.0 / x_fusion_map
        y_flow_map = 1.0 / y_fusion_map
        all_acc_map = x_acc_map + y_acc_map
        all_acc_map = all_acc_map.astype(np.uint8)
        roi_box = get_mask_bbox(all_acc_map, threshold=1)
        if roi_box[0] is None or roi_box[1] - roi_box[0] <= 0 or roi_box[
                3] - roi_box[2] <= 0:
            roi_box = [0, x_flow_map.shape[0], 0, x_flow_map.shape[1]]
        roi_x_flow_map = x_flow_map[roi_box[0]:roi_box[1],
                                    roi_box[2]:roi_box[3]]
        roi_y_flow_map = y_flow_map[roi_box[0]:roi_box[1],
                                    roi_box[2]:roi_box[3]]
        roi_width = roi_x_flow_map.shape[1]
        roi_height = roi_x_flow_map.shape[0]
        roi_x_flow_map, scale = resize_on_long_side(roi_x_flow_map, 320)
        roi_y_flow_map, scale = resize_on_long_side(roi_y_flow_map, 320)
        roi_x_flow_map = cv2.blur(roi_x_flow_map, (55, 55))
        roi_y_flow_map = cv2.blur(roi_y_flow_map, (55, 55))
        roi_x_flow_map = cv2.resize(roi_x_flow_map, (roi_width, roi_height))
        roi_y_flow_map = cv2.resize(roi_y_flow_map, (roi_width, roi_height))
        x_flow_map[roi_box[0]:roi_box[1],
                   roi_box[2]:roi_box[3]] = roi_x_flow_map
        y_flow_map[roi_box[0]:roi_box[1],
                   roi_box[2]:roi_box[3]] = roi_y_flow_map
        for i in range(scale_num):
            origin_rDx += x_flows[i]
            origin_rDy += y_flows[i]
        origin_rDx *= x_flow_map
        origin_rDy *= y_flow_map
        return origin_rDx, origin_rDy, x_flow_map, y_flow_map
    def __joint_to_body_box(self):
        joint_left = int(np.min(self.joints, axis=0)[0])
        joint_right = int(np.max(self.joints, axis=0)[0])
        joint_top = int(np.min(self.joints, axis=0)[1])
        joint_bottom = int(np.max(self.joints, axis=0)[1])
        return [joint_top, joint_bottom, joint_left, joint_right]
    def __joint_to_leg_box(self):
        leg_joints = self.joints[8:, :]
        if np.max(leg_joints, axis=0)[2] < 0.05:
            return [0, 0, 0, 0]
        joint_left = int(np.min(leg_joints, axis=0)[0])
        joint_right = int(np.max(leg_joints, axis=0)[0])
        joint_top = int(np.min(leg_joints, axis=0)[1])
        joint_bottom = int(np.max(leg_joints, axis=0)[1])
        return [joint_top, joint_bottom, joint_left, joint_right]
    def __joint_to_arm_box(self):
        arm_joints = self.joints[2:8, :]
        if np.max(arm_joints, axis=0)[2] < 0.05:
            return [0, 0, 0, 0]
        joint_left = int(np.min(arm_joints, axis=0)[0])
        joint_right = int(np.max(arm_joints, axis=0)[0])
        joint_top = int(np.min(arm_joints, axis=0)[1])
        joint_bottom = int(np.max(arm_joints, axis=0)[1])
        return [joint_top, joint_bottom, joint_left, joint_right]
--- a/modelscope/models/cv/image_body_reshaping/pose_estimator/init.py
+++ b/modelscope/models/cv/image_body_reshaping/pose_estimator/init.py
--- a/modelscope/models/cv/image_body_reshaping/pose_estimator/body.py
+++ b/modelscope/models/cv/image_body_reshaping/pose_estimator/body.py
@@ -0,0 +1,272 @@
 # The implementation is based on openpose, available at https://github.com/Hzzone/pytorch-openpose.
 import math
 import cv2
 import numpy as np
 import torch
 from scipy.ndimage.filters import gaussian_filter
 from .model import BodyposeModel
 from .util import pad_rightdown_corner, transfer
 class Body(object):
    def __init__(self, model_path, device):
        self.model = BodyposeModel().to(device)
        model_dict = transfer(self.model, torch.load(model_path))
        self.model.load_state_dict(model_dict)
        self.model.eval()
    def __call__(self, oriImg):
        scale_search = [0.5]
        boxsize = 368
        stride = 8
        padValue = 128
        thre1 = 0.1
        thre2 = 0.05
        bodyparts = 18
        multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
        heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
        paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
        for m in range(len(multiplier)):
            scale = multiplier[m]
            imageToTest = cv2.resize(
                oriImg, (0, 0),
                fx=scale,
                fy=scale,
                interpolation=cv2.INTER_CUBIC)
            imageToTest_padded, pad = pad_rightdown_corner(
                imageToTest, stride, padValue)
            im = np.transpose(
                np.float32(imageToTest_padded[:, :, :, np.newaxis]),
                (3, 2, 0, 1)) / 256 - 0.5
            im = np.ascontiguousarray(im)
            data = torch.from_numpy(im).float()
            if torch.cuda.is_available():
                data = data.cuda()
            with torch.no_grad():
                Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
            Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
            Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
            # extract outputs, resize, and remove padding
            heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2),
                                   (1, 2, 0))  # output 1 is heatmaps
            heatmap = cv2.resize(
                heatmap, (0, 0),
                fx=stride,
                fy=stride,
                interpolation=cv2.INTER_CUBIC)
            heatmap = heatmap[:imageToTest_padded.shape[0]
                              - pad[2], :imageToTest_padded.shape[1]
                              - pad[3], :]
            heatmap = cv2.resize(
                heatmap, (oriImg.shape[1], oriImg.shape[0]),
                interpolation=cv2.INTER_CUBIC)
            paf = np.transpose(np.squeeze(Mconv7_stage6_L1),
                               (1, 2, 0))  # output 0 is PAFs
            paf = cv2.resize(
                paf, (0, 0),
                fx=stride,
                fy=stride,
                interpolation=cv2.INTER_CUBIC)
            paf = paf[:imageToTest_padded.shape[0]
                      - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
            paf = cv2.resize(
                paf, (oriImg.shape[1], oriImg.shape[0]),
                interpolation=cv2.INTER_CUBIC)
            heatmap_avg += heatmap_avg + heatmap / len(multiplier)
            paf_avg += +paf / len(multiplier)
        all_peaks = []
        peak_counter = 0
        for part in range(bodyparts):
            map_ori = heatmap_avg[:, :, part]
            one_heatmap = gaussian_filter(map_ori, sigma=3)
            map_left = np.zeros(one_heatmap.shape)
            map_left[1:, :] = one_heatmap[:-1, :]
            map_right = np.zeros(one_heatmap.shape)
            map_right[:-1, :] = one_heatmap[1:, :]
            map_up = np.zeros(one_heatmap.shape)
            map_up[:, 1:] = one_heatmap[:, :-1]
            map_down = np.zeros(one_heatmap.shape)
            map_down[:, :-1] = one_heatmap[:, 1:]
            peaks_binary = np.logical_and.reduce(
                (one_heatmap >= map_left, one_heatmap >= map_right,
                 one_heatmap >= map_up, one_heatmap >= map_down,
                 one_heatmap > thre1))
            peaks = list(
                zip(np.nonzero(peaks_binary)[1],
                    np.nonzero(peaks_binary)[0]))  # note reverse
            peaks_with_score = [x + (map_ori[x[1], x[0]], ) for x in peaks]
            peak_id = range(peak_counter, peak_counter + len(peaks))
            peaks_with_score_and_id = [
                peaks_with_score[i] + (peak_id[i], )
                for i in range(len(peak_id))
            ]
            all_peaks.append(peaks_with_score_and_id)
            peak_counter += len(peaks)
        # find connection in the specified sequence, center 29 is in the position 15
        limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9],
                   [9, 10], [10, 11], [2, 12], [12, 13], [13, 14], [2, 1],
                   [1, 15], [15, 17], [1, 16], [16, 18], [3, 17], [6, 18]]
        # the middle joints heatmap correpondence
        mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44],
                  [19, 20], [21, 22], [23, 24], [25, 26], [27, 28], [29, 30],
                  [47, 48], [49, 50], [53, 54], [51, 52], [55, 56], [37, 38],
                  [45, 46]]
        connection_all = []
        special_k = []
        mid_num = 10
        for k in range(len(mapIdx)):
            score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
            candA = all_peaks[limbSeq[k][0] - 1]
            candB = all_peaks[limbSeq[k][1] - 1]
            nA = len(candA)
            nB = len(candB)
            if (nA != 0 and nB != 0):
                connection_candidate = []
                for i in range(nA):
                    for j in range(nB):
                        vec = np.subtract(candB[j][:2], candA[i][:2])
                        norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
                        norm = max(0.001, norm)
                        vec = np.divide(vec, norm)
                        startend = list(
                            zip(
                                np.linspace(
                                    candA[i][0], candB[j][0], num=mid_num),
                                np.linspace(
                                    candA[i][1], candB[j][1], num=mid_num)))
                        vec_x = np.array([
                            score_mid[int(round(startend[item][1])),
                                      int(round(startend[item][0])), 0]
                            for item in range(len(startend))
                        ])
                        vec_y = np.array([
                            score_mid[int(round(startend[item][1])),
                                      int(round(startend[item][0])), 1]
                            for item in range(len(startend))
                        ])
                        score_midpts = np.multiply(
                            vec_x, vec[0]) + np.multiply(vec_y, vec[1])
                        temp1 = sum(score_midpts) / len(score_midpts)
                        temp2 = min(0.5 * oriImg.shape[0] / norm - 1, 0)
                        score_with_dist_prior = temp1 + temp2
                        criterion1 = len(np.nonzero(
                            score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
                        criterion2 = score_with_dist_prior > 0
                        if criterion1 and criterion2:
                            connection_candidate.append([
                                i, j, score_with_dist_prior,
                                score_with_dist_prior + candA[i][2]
                                + candB[j][2]
                            ])
                connection_candidate = sorted(
                    connection_candidate, key=lambda x: x[2], reverse=True)
                connection = np.zeros((0, 5))
                for c in range(len(connection_candidate)):
                    i, j, s = connection_candidate[c][0:3]
                    if (i not in connection[:, 3]
                            and j not in connection[:, 4]):
                        connection = np.vstack(
                            [connection, [candA[i][3], candB[j][3], s, i, j]])
                        if (len(connection) >= min(nA, nB)):
                            break
                connection_all.append(connection)
            else:
                special_k.append(k)
                connection_all.append([])
        # last number in each row is the total parts number of that person
        # the second last number in each row is the score of the overall configuration
        subset = -1 * np.ones((0, 20))
        candidate = np.array(
            [item for sublist in all_peaks for item in sublist])
        for k in range(len(mapIdx)):
            if k not in special_k:
                partAs = connection_all[k][:, 0]
                partBs = connection_all[k][:, 1]
                indexA, indexB = np.array(limbSeq[k]) - 1
                for i in range(len(connection_all[k])):  # = 1:size(temp,1)
                    found = 0
                    subset_idx = [-1, -1]
                    for j in range(len(subset)):  # 1:size(subset,1):
                        if subset[j][indexA] == partAs[i] or subset[j][
                                indexB] == partBs[i]:
                            subset_idx[found] = j
                            found += 1
                    if found == 1:
                        j = subset_idx[0]
                        if subset[j][indexB] != partBs[i]:
                            subset[j][indexB] = partBs[i]
                            subset[j][-1] += 1
                            subset[j][-2] += candidate[
                                partBs[i].astype(int),
                                2] + connection_all[k][i][2]
                    elif found == 2:  # if found 2 and disjoint, merge them
                        j1, j2 = subset_idx
                        tmp1 = (subset[j1] >= 0).astype(int)
                        tmp2 = (subset[j2] >= 0).astype(int)
                        membership = (tmp1 + tmp2)[:-2]
                        if len(np.nonzero(membership == 2)[0]) == 0:  # merge
                            subset[j1][:-2] += (subset[j2][:-2] + 1)
                            subset[j1][-2:] += subset[j2][-2:]
                            subset[j1][-2] += connection_all[k][i][2]
                            subset = np.delete(subset, j2, 0)
                        else:  # as like found == 1
                            subset[j1][indexB] = partBs[i]
                            subset[j1][-1] += 1
                            subset[j1][-2] += candidate[
                                partBs[i].astype(int),
                                2] + connection_all[k][i][2]
                    # if find no partA in the subset, create a new subset
                    elif not found and k < 17:
                        row = -1 * np.ones(20)
                        row[indexA] = partAs[i]
                        row[indexB] = partBs[i]
                        row[-1] = 2
                        row[-2] = sum(
                            candidate[connection_all[k][i, :2].astype(int),
                                      2]) + connection_all[k][i][2]
                        subset = np.vstack([subset, row])
        # delete some rows of subset which has few parts occur
        deleteIdx = []
        for i in range(len(subset)):
            if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
                deleteIdx.append(i)
        subset = np.delete(subset, deleteIdx, axis=0)
        # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
        # candidate: x, y, score, id
        count = subset.shape[0]
        joints = np.zeros(shape=(count, bodyparts, 3))
        for i in range(count):
            for j in range(bodyparts):
                joints[i, j, :3] = candidate[int(subset[i, j]), :3]
                confidence = 1.0 if subset[i, j] >= 0 else 0.0
                joints[i, j, 2] *= confidence
        return joints
--- a/modelscope/models/cv/image_body_reshaping/pose_estimator/model.py
+++ b/modelscope/models/cv/image_body_reshaping/pose_estimator/model.py
@@ -0,0 +1,141 @@
 # The implementation is based on openpose, available at https://github.com/Hzzone/pytorch-openpose.
 from collections import OrderedDict
 import torch
 import torch.nn as nn
 def make_layers(block, no_relu_layers):
    layers = []
    for layer_name, v in block.items():
        if 'pool' in layer_name:
            layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])
            layers.append((layer_name, layer))
        else:
            conv2d = nn.Conv2d(
                in_channels=v[0],
                out_channels=v[1],
                kernel_size=v[2],
                stride=v[3],
                padding=v[4])
            layers.append((layer_name, conv2d))
            if layer_name not in no_relu_layers:
                layers.append(('relu_' + layer_name, nn.ReLU(inplace=True)))
    return nn.Sequential(OrderedDict(layers))
 class BodyposeModel(nn.Module):
    def __init__(self):
        super(BodyposeModel, self).__init__()
        # these layers have no relu layer
        no_relu_layers = [
            'conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',
            'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',
            'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',
            'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1'
        ]
        blocks = {}
        block0 = OrderedDict([('conv1_1', [3, 64, 3, 1, 1]),
                              ('conv1_2', [64, 64, 3, 1, 1]),
                              ('pool1_stage1', [2, 2, 0]),
                              ('conv2_1', [64, 128, 3, 1, 1]),
                              ('conv2_2', [128, 128, 3, 1, 1]),
                              ('pool2_stage1', [2, 2, 0]),
                              ('conv3_1', [128, 256, 3, 1, 1]),
                              ('conv3_2', [256, 256, 3, 1, 1]),
                              ('conv3_3', [256, 256, 3, 1, 1]),
                              ('conv3_4', [256, 256, 3, 1, 1]),
                              ('pool3_stage1', [2, 2, 0]),
                              ('conv4_1', [256, 512, 3, 1, 1]),
                              ('conv4_2', [512, 512, 3, 1, 1]),
                              ('conv4_3_CPM', [512, 256, 3, 1, 1]),
                              ('conv4_4_CPM', [256, 128, 3, 1, 1])])
        # Stage 1
        block1_1 = OrderedDict([('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
                                ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
                                ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
                                ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
                                ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])])
        block1_2 = OrderedDict([('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
                                ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
                                ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
                                ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
                                ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])])
        blocks['block1_1'] = block1_1
        blocks['block1_2'] = block1_2
        self.model0 = make_layers(block0, no_relu_layers)
        # Stages 2 - 6
        for i in range(2, 7):
            blocks['block%d_1' % i] = OrderedDict([
                ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
                ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
                ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
                ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
                ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
                ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
                ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
            ])
            blocks['block%d_2' % i] = OrderedDict([
                ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
                ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
                ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
                ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
                ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
                ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
                ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
            ])
        for k in blocks.keys():
            blocks[k] = make_layers(blocks[k], no_relu_layers)
        self.model1_1 = blocks['block1_1']
        self.model2_1 = blocks['block2_1']
        self.model3_1 = blocks['block3_1']
        self.model4_1 = blocks['block4_1']
        self.model5_1 = blocks['block5_1']
        self.model6_1 = blocks['block6_1']
        self.model1_2 = blocks['block1_2']
        self.model2_2 = blocks['block2_2']
        self.model3_2 = blocks['block3_2']
        self.model4_2 = blocks['block4_2']
        self.model5_2 = blocks['block5_2']
        self.model6_2 = blocks['block6_2']
    def forward(self, x):
        out1 = self.model0(x)
        out1_1 = self.model1_1(out1)
        out1_2 = self.model1_2(out1)
        out2 = torch.cat([out1_1, out1_2, out1], 1)
        out2_1 = self.model2_1(out2)
        out2_2 = self.model2_2(out2)
        out3 = torch.cat([out2_1, out2_2, out1], 1)
        out3_1 = self.model3_1(out3)
        out3_2 = self.model3_2(out3)
        out4 = torch.cat([out3_1, out3_2, out1], 1)
        out4_1 = self.model4_1(out4)
        out4_2 = self.model4_2(out4)
        out5 = torch.cat([out4_1, out4_2, out1], 1)
        out5_1 = self.model5_1(out5)
        out5_2 = self.model5_2(out5)
        out6 = torch.cat([out5_1, out5_2, out1], 1)
        out6_1 = self.model6_1(out6)
        out6_2 = self.model6_2(out6)
        return out6_1, out6_2
--- a/modelscope/models/cv/image_body_reshaping/pose_estimator/util.py
+++ b/modelscope/models/cv/image_body_reshaping/pose_estimator/util.py
@@ -0,0 +1,33 @@
 # The implementation is based on openpose, available at https://github.com/Hzzone/pytorch-openpose.
 import numpy as np
 def pad_rightdown_corner(img, stride, padValue):
    h = img.shape[0]
    w = img.shape[1]
    pad = 4 * [None]
    pad[0] = 0  # up
    pad[1] = 0  # left
    pad[2] = 0 if (h % stride == 0) else stride - (h % stride)  # down
    pad[3] = 0 if (w % stride == 0) else stride - (w % stride)  # right
    img_padded = img
    pad_up = np.tile(img_padded[0:1, :, :] * 0 + padValue, (pad[0], 1, 1))
    img_padded = np.concatenate((pad_up, img_padded), axis=0)
    pad_left = np.tile(img_padded[:, 0:1, :] * 0 + padValue, (1, pad[1], 1))
    img_padded = np.concatenate((pad_left, img_padded), axis=1)
    pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + padValue, (pad[2], 1, 1))
    img_padded = np.concatenate((img_padded, pad_down), axis=0)
    pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + padValue, (1, pad[3], 1))
    img_padded = np.concatenate((img_padded, pad_right), axis=1)
    return img_padded, pad
 def transfer(model, model_weights):
    transfered_model_weights = {}
    for weights_name in model.state_dict().keys():
        transfered_model_weights[weights_name] = model_weights['.'.join(
            weights_name.split('.')[1:])]
    return transfered_model_weights
--- a/modelscope/models/cv/image_body_reshaping/slim_utils.py
+++ b/modelscope/models/cv/image_body_reshaping/slim_utils.py
@@ -0,0 +1,507 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import math
 import os
 import random
 import cv2
 import numba
 import numpy as np
 import torch
 def resize_on_long_side(img, long_side=800):
    src_height = img.shape[0]
    src_width = img.shape[1]
    if src_height > src_width:
        scale = long_side * 1.0 / src_height
        _img = cv2.resize(
            img, (int(src_width * scale), long_side),
            interpolation=cv2.INTER_LINEAR)
    else:
        scale = long_side * 1.0 / src_width
        _img = cv2.resize(
            img, (long_side, int(src_height * scale)),
            interpolation=cv2.INTER_LINEAR)
    return _img, scale
 def point_in_box(pt, box):
    pt_x = pt[0]
    pt_y = pt[1]
    if pt_x >= box[0] and pt_x <= box[0] + box[2] and pt_y >= box[
            1] and pt_y <= box[1] + box[3]:
        return True
    else:
        return False
 def enlarge_box_tblr(roi_bbox, mask, ratio=0.4, use_long_side=True):
    if roi_bbox is None or None in roi_bbox:
        return [None, None, None, None]
    top = roi_bbox[0]
    bottom = roi_bbox[1]
    left = roi_bbox[2]
    right = roi_bbox[3]
    roi_width = roi_bbox[3] - roi_bbox[2]
    roi_height = roi_bbox[1] - roi_bbox[0]
    right = left + roi_width
    bottom = top + roi_height
    long_side = roi_width if roi_width > roi_height else roi_height
    if use_long_side:
        new_left = left - int(long_side * ratio)
    else:
        new_left = left - int(roi_width * ratio)
    new_left = 1 if new_left < 0 else new_left
    if use_long_side:
        new_top = top - int(long_side * ratio)
    else:
        new_top = top - int(roi_height * ratio)
    new_top = 1 if new_top < 0 else new_top
    if use_long_side:
        new_right = right + int(long_side * ratio)
    else:
        new_right = right + int(roi_width * ratio)
    new_right = mask.shape[1] - 2 if new_right > mask.shape[1] else new_right
    if use_long_side:
        new_bottom = bottom + int(long_side * ratio)
    else:
        new_bottom = bottom + int(roi_height * ratio)
    new_bottom = mask.shape[0] - 2 if new_bottom > mask.shape[0] else new_bottom
    bbox = [new_top, new_bottom, new_left, new_right]
    return bbox
 def gen_PAF(image, joints):
    assert joints.shape[0] == 18
    assert joints.shape[1] == 3
    org_h = image.shape[0]
    org_w = image.shape[1]
    small_image, resize_scale = resize_on_long_side(image, 120)
    joints[:, :2] = joints[:, :2] * resize_scale
    joint_left = int(np.min(joints, axis=0)[0])
    joint_right = int(np.max(joints, axis=0)[0])
    joint_top = int(np.min(joints, axis=0)[1])
    joint_bottom = int(np.max(joints, axis=0)[1])
    limb_width = min(
        abs(joint_right - joint_left), abs(joint_bottom - joint_top)) // 6
    if limb_width % 2 == 0:
        limb_width += 1
    kernel_size = limb_width
    part_orders = [(5, 11), (2, 8), (5, 6), (6, 7), (2, 3), (3, 4), (11, 12),
                   (12, 13), (8, 9), (9, 10)]
    map_list = []
    mask_list = []
    PAF_all = np.zeros(
        shape=(small_image.shape[0], small_image.shape[1], 2),
        dtype=np.float32)
    for c, pair in enumerate(part_orders):
        idx_a_name = pair[0]
        idx_b_name = pair[1]
        jointa = joints[idx_a_name]
        jointb = joints[idx_b_name]
        confidence_threshold = 0.05
        if jointa[2] > confidence_threshold and jointb[
                2] > confidence_threshold:
            canvas = np.zeros(
                shape=(small_image.shape[0], small_image.shape[1]),
                dtype=np.uint8)
            canvas = cv2.line(canvas, (int(jointa[0]), int(jointa[1])),
                              (int(jointb[0]), int(jointb[1])),
                              (255, 255, 255), 5)
            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,
                                               (kernel_size, kernel_size))
            canvas = cv2.dilate(canvas, kernel, 1)
            canvas = cv2.GaussianBlur(canvas, (kernel_size, kernel_size), 0)
            canvas = canvas.astype(np.float32) / 255
            PAF = np.zeros(
                shape=(small_image.shape[0], small_image.shape[1], 2),
                dtype=np.float32)
            PAF[..., 0] = jointb[0] - jointa[0]
            PAF[..., 1] = jointb[1] - jointa[1]
            mag, ang = cv2.cartToPolar(PAF[..., 0], PAF[..., 1])
            PAF /= (np.dstack((mag, mag)) + 1e-5)
            single_PAF = PAF * np.dstack((canvas, canvas))
            map_list.append(
                cv2.GaussianBlur(single_PAF,
                                 (kernel_size * 3, kernel_size * 3), 0))
            mask_list.append(
                cv2.GaussianBlur(canvas.copy(),
                                 (kernel_size * 3, kernel_size * 3), 0))
            PAF_all = PAF_all * (1.0 - np.dstack(
                (canvas, canvas))) + single_PAF
    PAF_all = cv2.GaussianBlur(PAF_all, (kernel_size * 3, kernel_size * 3), 0)
    PAF_all = cv2.resize(
        PAF_all, (org_w, org_h), interpolation=cv2.INTER_LINEAR)
    map_list.append(PAF_all)
    return PAF_all, map_list, mask_list
 def gen_skeleton_map(joints, stack_mode='column', input_roi_box=None):
    if type(joints) == list:
        joints = np.array(joints)
    assert stack_mode == 'column' or stack_mode == 'depth'
    part_orders = [(2, 5), (5, 11), (2, 8), (8, 11), (5, 6), (6, 7), (2, 3),
                   (3, 4), (11, 12), (12, 13), (8, 9), (9, 10)]
    def link(img, a, b, color, line_width, scale=1.0, x_offset=0, y_offset=0):
        jointa = joints[a]
        jointb = joints[b]
        temp1 = int((jointa[0] - x_offset) * scale)
        temp2 = int((jointa[1] - y_offset) * scale)
        temp3 = int((jointb[0] - x_offset) * scale)
        temp4 = int((jointb[1] - y_offset) * scale)
        cv2.line(img, (temp1, temp2), (temp3, temp4), color, line_width)
    roi_box = input_roi_box
    roi_box_width = roi_box[3] - roi_box[2]
    roi_box_height = roi_box[1] - roi_box[0]
    short_side_length = min(roi_box_width, roi_box_height)
    line_width = short_side_length // 30
    line_width = max(line_width, 2)
    map_cube = np.zeros(
        shape=(roi_box_height, roi_box_width, len(part_orders) + 1),
        dtype=np.float32)
    use_line_width = min(5, line_width)
    fx = use_line_width * 1.0 / line_width  # fx 最大值为1
    if fx < 0.99:
        map_cube = cv2.resize(map_cube, (0, 0), fx=fx, fy=fx)
    for c, pair in enumerate(part_orders):
        tmp = map_cube[..., c].copy()
        link(
            tmp,
            pair[0],
            pair[1], (2.0, 2.0, 2.0),
            use_line_width,
            scale=fx,
            x_offset=roi_box[2],
            y_offset=roi_box[0])
        map_cube[..., c] = tmp
        tmp = map_cube[..., -1].copy()
        link(
            tmp,
            pair[0],
            pair[1], (2.0, 2.0, 2.0),
            use_line_width,
            scale=fx,
            x_offset=roi_box[2],
            y_offset=roi_box[0])
        map_cube[..., -1] = tmp
    map_cube = cv2.resize(map_cube, (roi_box_width, roi_box_height))
    if stack_mode == 'depth':
        return map_cube, roi_box
    elif stack_mode == 'column':
        joint_maps = []
        for c in range(len(part_orders) + 1):
            joint_maps.append(map_cube[..., c])
        joint_map = np.column_stack(joint_maps)
        return joint_map, roi_box
 def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    tl = line_thickness or round(
        0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(
            img,
            label, (c1[0], c1[1] - 2),
            0,
            tl / 3, [225, 255, 255],
            thickness=tf,
            lineType=cv2.LINE_AA)
 def draw_line(im, points, color, stroke_size=2, closed=False):
    points = points.astype(np.int32)
    for i in range(len(points) - 1):
        cv2.line(im, tuple(points[i]), tuple(points[i + 1]), color,
                 stroke_size)
    if closed:
        cv2.line(im, tuple(points[0]), tuple(points[-1]), color, stroke_size)
 def enlarged_bbox(bbox, img_width, img_height, enlarge_ratio=0.2):
    left = bbox[0]
    top = bbox[1]
    right = bbox[2]
    bottom = bbox[3]
    roi_width = right - left
    roi_height = bottom - top
    new_left = left - int(roi_width * enlarge_ratio)
    new_left = 0 if new_left < 0 else new_left
    new_top = top - int(roi_height * enlarge_ratio)
    new_top = 0 if new_top < 0 else new_top
    new_right = right + int(roi_width * enlarge_ratio)
    new_right = img_width if new_right > img_width else new_right
    new_bottom = bottom + int(roi_height * enlarge_ratio)
    new_bottom = img_height if new_bottom > img_height else new_bottom
    bbox = [new_left, new_top, new_right, new_bottom]
    bbox = [int(x) for x in bbox]
    return bbox
 def get_map_fusion_map_cuda(map_list, threshold=1, device=torch.device('cpu')):
    map_list_cuda = [torch.from_numpy(x).to(device) for x in map_list]
    map_concat = torch.stack(tuple(map_list_cuda), dim=-1)
    map_concat = torch.abs(map_concat)
    map_concat[map_concat < threshold] = 0
    map_concat[map_concat > 1e-5] = 1.0
    sum_map = torch.sum(map_concat, dim=2)
    a = torch.ones_like(sum_map)
    acc_map = torch.where(sum_map > 0, a * 2.0, torch.zeros_like(sum_map))
    fusion_map = torch.where(sum_map < 0.5, a * 1.5, sum_map)
    fusion_map = fusion_map.float()
    acc_map = acc_map.float()
    fusion_map = fusion_map.cpu().numpy().astype(np.float32)
    acc_map = acc_map.cpu().numpy().astype(np.float32)
    return fusion_map, acc_map
 def gen_border_shade(height, width, height_band, width_band):
    height_ratio = height_band * 1.0 / height
    width_ratio = width_band * 1.0 / width
    _height_band = int(256 * height_ratio)
    _width_band = int(256 * width_ratio)
    canvas = np.zeros((256, 256), dtype=np.float32)
    canvas[_height_band // 2:-_height_band // 2,
           _width_band // 2:-_width_band // 2] = 1.0
    canvas = cv2.blur(canvas, (_height_band, _width_band))
    canvas = cv2.resize(canvas, (width, height))
    return canvas
 def get_mask_bbox(mask, threshold=127):
    ret, mask = cv2.threshold(mask, threshold, 1, 0)
    if cv2.countNonZero(mask) == 0:
        return [None, None, None, None]
    col_acc = np.sum(mask, 0)
    row_acc = np.sum(mask, 1)
    col_acc = col_acc.tolist()
    row_acc = row_acc.tolist()
    for x in range(len(col_acc)):
        if col_acc[x] > 0:
            left = x
            break
    for x in range(1, len(col_acc)):
        if col_acc[-x] > 0:
            right = len(col_acc) - x
            break
    for x in range(len(row_acc)):
        if row_acc[x] > 0:
            top = x
            break
    for x in range(1, len(row_acc)):
        if row_acc[-x] > 0:
            bottom = len(row_acc[::-1]) - x
            break
    return [top, bottom, left, right]
 def visualize_flow(flow):
    h, w = flow.shape[:2]
    hsv = np.zeros((h, w, 3), np.uint8)
    mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    hsv[..., 0] = ang * 180 / np.pi / 2
    hsv[..., 1] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
    hsv[..., 2] = 255
    bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
    bgr = bgr * 1.0 / 255
    return bgr.astype(np.float32)
 def vis_joints(image, joints, color, show_text=True, confidence_threshold=0.1):
    part_orders = [(2, 5), (5, 11), (2, 8), (8, 11), (5, 6), (6, 7), (2, 3),
                   (3, 4), (11, 12), (12, 13), (8, 9), (9, 10)]
    abandon_idxs = [0, 1, 14, 15, 16, 17]
    # draw joints
    for i, joint in enumerate(joints):
        if i in abandon_idxs:
            continue
        if joint[-1] > confidence_threshold:
            cv2.circle(image, (int(joint[0]), int(joint[1])), 1, color, 2)
            if show_text:
                cv2.putText(image,
                            str(i) + '[{:.2f}]'.format(joint[-1]),
                            (int(joint[0]), int(joint[1])),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    # draw link
    for pair in part_orders:
        if joints[pair[0]][-1] > confidence_threshold and joints[
                pair[1]][-1] > confidence_threshold:
            cv2.line(image, (int(joints[pair[0]][0]), int(joints[pair[0]][1])),
                     (int(joints[pair[1]][0]), int(joints[pair[1]][1])), color,
                     2)
    return image
 def get_heatmap_cv(img, magn, max_flow_mag):
    min_flow_mag = .5
    cv_magn = np.clip(
        255 * (magn - min_flow_mag) / (max_flow_mag - min_flow_mag + 1e-7),
        a_min=0,
        a_max=255).astype(np.uint8)
    if img.dtype != np.uint8:
        img = (255 * img).astype(np.uint8)
    heatmap_img = cv2.applyColorMap(cv_magn, cv2.COLORMAP_JET)
    heatmap_img = heatmap_img[..., ::-1]
    h, w = magn.shape
    img_alpha = np.ones((h, w), dtype=np.double)[:, :, None]
    heatmap_alpha = np.clip(
        magn / (max_flow_mag + 1e-7), a_min=1e-7, a_max=1)[:, :, None]**.7
    heatmap_alpha[heatmap_alpha < .2]**.5
    pm_hm = heatmap_img * heatmap_alpha
    pm_img = img * img_alpha
    cv_out = pm_hm + pm_img * (1 - heatmap_alpha)
    cv_out = np.clip(cv_out, a_min=0, a_max=255).astype(np.uint8)
    return cv_out
 def save_heatmap_cv(img, flow, supression=2):
    flow_magn = np.sqrt(flow[:, :, 0]**2 + flow[:, :, 1]**2)
    flow_magn -= supression
    flow_magn[flow_magn <= 0] = 0
    cv_out = get_heatmap_cv(img, flow_magn, np.max(flow_magn) * 1.3)
    return cv_out
@numba.jit(nopython=True, parallel=False)
 def bilinear_interp(x, y, v11, v12, v21, v22):
    temp1 = (v11 * (1 - y) + v12 * y) * (1 - x)
    temp2 = (v21 * (1 - y) + v22 * y) * x
    result = temp1 + temp2
    return result
@numba.jit(nopython=True, parallel=False)
 def image_warp_grid1(rDx, rDy, oriImg, transRatio, width_expand,
                     height_expand):
    srcW = oriImg.shape[1]
    srcH = oriImg.shape[0]
    newImg = oriImg.copy()
    for i in range(srcH):
        for j in range(srcW):
            _i = i
            _j = j
            deltaX = rDx[_i, _j]
            deltaY = rDy[_i, _j]
            nx = _j + deltaX * transRatio
            ny = _i + deltaY * transRatio
            if nx >= srcW - width_expand - 1:
                if nx > srcW - 1:
                    nx = srcW - 1
            if ny >= srcH - height_expand - 1:
                if ny > srcH - 1:
                    ny = srcH - 1
            if nx < width_expand:
                if nx < 0:
                    nx = 0
            if ny < height_expand:
                if ny < 0:
                    ny = 0
            nxi = int(math.floor(nx))
            nyi = int(math.floor(ny))
            nxi1 = int(math.ceil(nx))
            nyi1 = int(math.ceil(ny))
            for ll in range(3):
                newImg[_i, _j,
                       ll] = bilinear_interp(ny - nyi, nx - nxi,
                                             oriImg[nyi, nxi,
                                                    ll], oriImg[nyi, nxi1, ll],
                                             oriImg[nyi1, nxi,
                                                    ll], oriImg[nyi1, nxi1,
                                                                ll])
    return newImg
--- a/modelscope/outputs.py
+++ b/modelscope/outputs.py
@@ -184,6 +184,7 @@ TASK_OUTPUTS = {
    Tasks.image_to_image_translation: [OutputKeys.OUTPUT_IMG],
    Tasks.image_style_transfer: [OutputKeys.OUTPUT_IMG],
    Tasks.image_portrait_stylization: [OutputKeys.OUTPUT_IMG],
    Tasks.image_body_reshaping: [OutputKeys.OUTPUT_IMG],
    # live category recognition result for single video
    # {
--- a/modelscope/pipelines/builder.py
+++ b/modelscope/pipelines/builder.py
@@ -75,6 +75,8 @@ DEFAULT_MODEL_FOR_PIPELINE = {
     'damo/nlp_bart_text-error-correction_chinese'),
    Tasks.image_captioning: (Pipelines.image_captioning,
                             'damo/ofa_image-caption_coco_large_en'),
    Tasks.image_body_reshaping: (Pipelines.image_body_reshaping,
                                 'damo/cv_flow-based-body-reshaping_damo'),
    Tasks.image_portrait_stylization:
    (Pipelines.person_image_cartoon,
     'damo/cv_unet_person-image-cartoon_compound-models'),
--- a/modelscope/pipelines/cv/image_body_reshaping_pipeline.py
+++ b/modelscope/pipelines/cv/image_body_reshaping_pipeline.py
@@ -0,0 +1,40 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import Any, Dict
 from modelscope.metainfo import Pipelines
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines.base import Input, Pipeline
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.preprocessors import LoadImage
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.logger import get_logger
 logger = get_logger()
@PIPELINES.register_module(
    Tasks.image_body_reshaping, module_name=Pipelines.image_body_reshaping)
 class ImageBodyReshapingPipeline(Pipeline):
    def __init__(self, model: str, **kwargs):
        """
        use `model` to create a image body reshaping pipeline for prediction
        Args:
            model: model id on modelscope hub.
        """
        super().__init__(model=model, **kwargs)
        logger.info('body reshaping model init done')
    def preprocess(self, input: Input) -> Dict[str, Any]:
        img = LoadImage.convert_to_ndarray(input)
        result = {'img': img}
        return result
    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
        output = self.model.inference(input['img'])
        result = {'outputs': output}
        return result
    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        output_img = inputs['outputs']
        return {OutputKeys.OUTPUT_IMG: output_img}
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -60,7 +60,7 @@ class CVTasks(object):
    image_to_image_generation = 'image-to-image-generation'
    image_style_transfer = 'image-style-transfer'
    image_portrait_stylization = 'image-portrait-stylization'
    image_body_reshaping = 'image-body-reshaping'
    image_embedding = 'image-embedding'
    product_retrieval_embedding = 'product-retrieval-embedding'
--- a/requirements/cv.txt
+++ b/requirements/cv.txt
@@ -13,6 +13,7 @@ ml_collections
 mmcls>=0.21.0
 mmdet>=2.25.0
 networkx>=2.5
 numba
 onnxruntime>=1.10
 pai-easycv>=0.6.3.6
 pandas
--- a/tests/pipelines/test_image_body_reshaping.py
+++ b/tests/pipelines/test_image_body_reshaping.py
@@ -0,0 +1,58 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import os.path as osp
 import unittest
 import cv2
 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.pipelines.base import Pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level
 class ImageBodyReshapingTest(unittest.TestCase, DemoCompatibilityCheck):
    def setUp(self) -> None:
        self.task = Tasks.image_body_reshaping
        self.model_id = 'damo/cv_flow-based-body-reshaping_damo'
        self.test_image = 'data/test/images/image_body_reshaping.jpg'
    def pipeline_inference(self, pipeline: Pipeline, input_location: str):
        result = pipeline(input_location)
        if result is not None:
            cv2.imwrite('result_bodyreshaping.png',
                        result[OutputKeys.OUTPUT_IMG])
            print(
                f'Output written to {osp.abspath("result_body_reshaping.png")}'
            )
        else:
            raise Exception('Testing failed: invalid output')
    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_run_by_direct_model_download(self):
        model_dir = snapshot_download(self.model_id)
        image_body_reshaping = pipeline(
            Tasks.image_body_reshaping, model=model_dir)
        self.pipeline_inference(image_body_reshaping, self.test_image)
    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_run_modelhub(self):
        image_body_reshaping = pipeline(
            Tasks.image_body_reshaping, model=self.model_id)
        self.pipeline_inference(image_body_reshaping, self.test_image)
    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_run_modelhub_default_model(self):
        image_body_reshaping = pipeline(Tasks.image_body_reshaping)
        self.pipeline_inference(image_body_reshaping, self.test_image)
    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
    def test_demo_compatibility(self):
        self.compatibility_check()
 if __name__ == '__main__':
    unittest.main()