diff --git a/modelscope/models/cv/face_emotion/emotion_infer.py b/modelscope/models/cv/face_emotion/emotion_infer.py index e3398592..618822ff 100644 --- a/modelscope/models/cv/face_emotion/emotion_infer.py +++ b/modelscope/models/cv/face_emotion/emotion_infer.py @@ -25,9 +25,9 @@ emotion_list = [ ] -def inference(image_path, model, face_model, score_thre=0.5, GPU=0): - image = Image.open(image_path).convert('RGB') - +def inference(image, model, face_model, score_thre=0.5, GPU=0): + image = image.cpu().numpy() + image = Image.fromarray(image) face, bbox = face_detection_PIL_v2(image, face_model) if bbox is None: logger.warn('no face detected!') diff --git a/modelscope/models/cv/face_human_hand_detection/det_infer.py b/modelscope/models/cv/face_human_hand_detection/det_infer.py index 7a7225ee..6822bd9f 100644 --- a/modelscope/models/cv/face_human_hand_detection/det_infer.py +++ b/modelscope/models/cv/face_human_hand_detection/det_infer.py @@ -115,9 +115,9 @@ std = [57.375, 57.12, 58.395] class_names = ['person', 'face', 'hand'] -def inference(model, device, img_path): +def inference(model, device, img): + img = img.cpu().numpy() img_info = {'id': 0} - img = cv2.imread(img_path) height, width = img.shape[:2] img_info['height'] = height img_info['width'] = width @@ -130,4 +130,9 @@ def inference(model, device, img_path): with torch.no_grad(): res = model(meta) result = overlay_bbox_cv(res[0], class_names, score_thresh=0.35) - return result + cls_list, bbox_list, score_list = [], [], [] + for pred in result: + cls_list.append(pred[0]) + bbox_list.append([pred[1], pred[2], pred[3], pred[4]]) + score_list.append(pred[5]) + return cls_list, bbox_list, score_list diff --git a/modelscope/models/cv/hand_static/hand_model.py b/modelscope/models/cv/hand_static/hand_model.py index 38517307..7a8a323e 100644 --- a/modelscope/models/cv/hand_static/hand_model.py +++ b/modelscope/models/cv/hand_static/hand_model.py @@ -8,7 +8,7 @@ import torch import torch.nn.functional as F from PIL import Image from torch import nn -from torchvision.transforms import transforms +from torchvision import transforms from modelscope.metainfo import Models from modelscope.models.base import TorchModel @@ -80,9 +80,9 @@ class HandStatic(TorchModel): return pred_result -def infer(img_path, model, device): - - img = Image.open(img_path) +def infer(img, model, device): + img = img.cpu().numpy() + img = Image.fromarray(img) clip = spatial_transform(img) clip = clip.unsqueeze(0).to(device).float() outputs = model(clip) diff --git a/modelscope/models/cv/product_segmentation/seg_infer.py b/modelscope/models/cv/product_segmentation/seg_infer.py index 876fac66..8814d619 100644 --- a/modelscope/models/cv/product_segmentation/seg_infer.py +++ b/modelscope/models/cv/product_segmentation/seg_infer.py @@ -59,9 +59,8 @@ mean, std = np.array([[[124.55, 118.90, 102.94]]]), np.array([[[56.77, 55.97, 57.50]]]) -def inference(model, device, input_path): - img = Image.open(input_path) - img = np.array(img.convert('RGB')).astype(np.float32) +def inference(model, device, img): + img = img.cpu().numpy() img = (img - mean) / std img = cv2.resize(img, dsize=(448, 448), interpolation=cv2.INTER_LINEAR) img = torch.from_numpy(img) diff --git a/modelscope/outputs/outputs.py b/modelscope/outputs/outputs.py index 721fb271..cbdeede4 100644 --- a/modelscope/outputs/outputs.py +++ b/modelscope/outputs/outputs.py @@ -762,12 +762,13 @@ TASK_OUTPUTS = { # } Tasks.hand_static: [OutputKeys.OUTPUT], - # 'output': [ - # [2, 75, 287, 240, 510, 0.8335018754005432], - # [1, 127, 83, 332, 366, 0.9175254702568054], - # [0, 0, 0, 367, 639, 0.9693422317504883]] + # { 'labels': [2, 1, 0], + # 'boxes':[[[78, 282, 240, 504], [127, 87, 332, 370], [0, 0, 367, 639]] + # 'scores':[0.8202137351036072, 0.8987470269203186, 0.9679114818572998] # } - Tasks.face_human_hand_detection: [OutputKeys.OUTPUT], + Tasks.face_human_hand_detection: [ + OutputKeys.LABELS, OutputKeys.BOXES, OutputKeys.SCORES + ], # { # {'output': 'Happiness', 'boxes': (203, 104, 663, 564)} diff --git a/modelscope/pipelines/cv/face_emotion_pipeline.py b/modelscope/pipelines/cv/face_emotion_pipeline.py index 249493b6..9d9aa6ee 100644 --- a/modelscope/pipelines/cv/face_emotion_pipeline.py +++ b/modelscope/pipelines/cv/face_emotion_pipeline.py @@ -1,11 +1,14 @@ # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from typing import Any, Dict +import numpy as np + from modelscope.metainfo import Pipelines from modelscope.models.cv.face_emotion import emotion_infer from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Input, Pipeline from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import LoadImage from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.logger import get_logger @@ -28,10 +31,11 @@ class FaceEmotionPipeline(Pipeline): logger.info('load model done') def preprocess(self, input: Input) -> Dict[str, Any]: - return input + img = LoadImage.convert_to_ndarray(input['img_path']) + return img def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: - result, bbox = emotion_infer.inference(input['img_path'], self.model, + result, bbox = emotion_infer.inference(input, self.model, self.face_model) return {OutputKeys.OUTPUT: result, OutputKeys.BOXES: bbox} diff --git a/modelscope/pipelines/cv/face_human_hand_detection_pipeline.py b/modelscope/pipelines/cv/face_human_hand_detection_pipeline.py index d9f214c9..d41a14dd 100644 --- a/modelscope/pipelines/cv/face_human_hand_detection_pipeline.py +++ b/modelscope/pipelines/cv/face_human_hand_detection_pipeline.py @@ -2,11 +2,14 @@ from typing import Any, Dict +import numpy as np + from modelscope.metainfo import Pipelines from modelscope.models.cv.face_human_hand_detection import det_infer from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Input, Pipeline from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import LoadImage from modelscope.utils.constant import Tasks from modelscope.utils.logger import get_logger @@ -29,14 +32,19 @@ class NanoDettForFaceHumanHandDetectionPipeline(Pipeline): logger.info('load model done') def preprocess(self, input: Input) -> Dict[str, Any]: - return input + img = LoadImage.convert_to_ndarray(input['input_path']) + return img def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: - result = det_infer.inference(self.model, self.device, - input['input_path']) - logger.info(result) - return {OutputKeys.OUTPUT: result} + cls_list, bbox_list, score_list = det_infer.inference( + self.model, self.device, input) + logger.info(cls_list, bbox_list, score_list) + return { + OutputKeys.LABELS: cls_list, + OutputKeys.BOXES: bbox_list, + OutputKeys.SCORES: score_list + } def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: return inputs diff --git a/modelscope/pipelines/cv/hand_static_pipeline.py b/modelscope/pipelines/cv/hand_static_pipeline.py index 1219c873..c020b7aa 100644 --- a/modelscope/pipelines/cv/hand_static_pipeline.py +++ b/modelscope/pipelines/cv/hand_static_pipeline.py @@ -1,11 +1,14 @@ # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from typing import Any, Dict +import numpy as np + from modelscope.metainfo import Pipelines from modelscope.models.cv.hand_static import hand_model from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Input, Pipeline from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import LoadImage from modelscope.utils.constant import Tasks from modelscope.utils.logger import get_logger @@ -27,10 +30,11 @@ class HandStaticPipeline(Pipeline): logger.info('load model done') def preprocess(self, input: Input) -> Dict[str, Any]: - return input + img = LoadImage.convert_to_ndarray(input['img_path']) + return img def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: - result = hand_model.infer(input['img_path'], self.model, self.device) + result = hand_model.infer(input, self.model, self.device) return {OutputKeys.OUTPUT: result} def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: diff --git a/modelscope/pipelines/cv/product_segmentation_pipeline.py b/modelscope/pipelines/cv/product_segmentation_pipeline.py index 244b01d7..3b1b2381 100644 --- a/modelscope/pipelines/cv/product_segmentation_pipeline.py +++ b/modelscope/pipelines/cv/product_segmentation_pipeline.py @@ -2,11 +2,14 @@ from typing import Any, Dict +import numpy as np + from modelscope.metainfo import Pipelines from modelscope.models.cv.product_segmentation import seg_infer from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Input, Pipeline from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import LoadImage from modelscope.utils.constant import Tasks from modelscope.utils.logger import get_logger @@ -28,12 +31,13 @@ class F3NetForProductSegmentationPipeline(Pipeline): logger.info('load model done') def preprocess(self, input: Input) -> Dict[str, Any]: - return input + img = LoadImage.convert_to_ndarray(input['input_path']) + img = img.astype(np.float32) + return img def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: - mask = seg_infer.inference(self.model, self.device, - input['input_path']) + mask = seg_infer.inference(self.model, self.device, input) return {OutputKeys.MASKS: mask} def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: