Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9477652master
@@ -76,7 +76,8 @@ class Pipelines(object): | |||
person_image_cartoon = 'unet-person-image-cartoon' | |||
ocr_detection = 'resnet18-ocr-detection' | |||
action_recognition = 'TAdaConv_action-recognition' | |||
animal_recognation = 'resnet101-animal_recog' | |||
animal_recognition = 'resnet101-animal-recognition' | |||
general_recognition = 'resnet101-general-recognition' | |||
cmdssl_video_embedding = 'cmdssl-r2p1d_video_embedding' | |||
body_2d_keypoints = 'hrnetv2w32_body-2d-keypoints_image' | |||
human_detection = 'resnet18-human-detection' | |||
@@ -81,8 +81,7 @@ class Bottleneck(nn.Module): | |||
norm_layer=norm_layer, | |||
dropblock_prob=dropblock_prob) | |||
elif rectified_conv: | |||
from rfconv import RFConv2d | |||
self.conv2 = RFConv2d( | |||
self.conv2 = nn.Conv2d( | |||
group_width, | |||
group_width, | |||
kernel_size=3, | |||
@@ -90,8 +89,7 @@ class Bottleneck(nn.Module): | |||
padding=dilation, | |||
dilation=dilation, | |||
groups=cardinality, | |||
bias=False, | |||
average_mode=rectify_avg) | |||
bias=False) | |||
self.bn2 = norm_layer(group_width) | |||
else: | |||
self.conv2 = nn.Conv2d( | |||
@@ -190,8 +188,7 @@ class ResNet(nn.Module): | |||
self.rectified_conv = rectified_conv | |||
self.rectify_avg = rectify_avg | |||
if rectified_conv: | |||
from rfconv import RFConv2d | |||
conv_layer = RFConv2d | |||
conv_layer = nn.Conv2d | |||
else: | |||
conv_layer = nn.Conv2d | |||
conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {} | |||
@@ -39,8 +39,7 @@ class SplAtConv2d(Module): | |||
self.channels = channels | |||
self.dropblock_prob = dropblock_prob | |||
if self.rectify: | |||
from rfconv import RFConv2d | |||
self.conv = RFConv2d( | |||
self.conv = Conv2d( | |||
in_channels, | |||
channels * radix, | |||
kernel_size, | |||
@@ -49,7 +48,6 @@ class SplAtConv2d(Module): | |||
dilation, | |||
groups=groups * radix, | |||
bias=bias, | |||
average_mode=rectify_avg, | |||
**kwargs) | |||
else: | |||
self.conv = Conv2d( | |||
@@ -10,8 +10,9 @@ if TYPE_CHECKING: | |||
from .cmdssl_video_embedding_pipeline import CMDSSLVideoEmbeddingPipeline | |||
from .image_detection_pipeline import ImageDetectionPipeline | |||
from .face_detection_pipeline import FaceDetectionPipeline | |||
from .face_recognition_pipeline import FaceRecognitionPipeline | |||
from .face_image_generation_pipeline import FaceImageGenerationPipeline | |||
from .face_recognition_pipeline import FaceRecognitionPipeline | |||
from .general_recognition_pipeline import GeneralRecognitionPipeline | |||
from .image_cartoon_pipeline import ImageCartoonPipeline | |||
from .image_classification_pipeline import GeneralImageClassificationPipeline | |||
from .image_color_enhance_pipeline import ImageColorEnhancePipeline | |||
@@ -23,7 +24,7 @@ if TYPE_CHECKING: | |||
from .image_portrait_enhancement_pipeline import ImagePortraitEnhancementPipeline | |||
from .image_style_transfer_pipeline import ImageStyleTransferPipeline | |||
from .image_super_resolution_pipeline import ImageSuperResolutionPipeline | |||
from .image_to_image_generate_pipeline import Image2ImageGenerationePipeline | |||
from .image_to_image_generate_pipeline import Image2ImageGenerationPipeline | |||
from .image_to_image_translation_pipeline import Image2ImageTranslationPipeline | |||
from .product_retrieval_embedding_pipeline import ProductRetrievalEmbeddingPipeline | |||
from .live_category_pipeline import LiveCategoryPipeline | |||
@@ -41,6 +42,7 @@ else: | |||
'face_detection_pipeline': ['FaceDetectionPipeline'], | |||
'face_image_generation_pipeline': ['FaceImageGenerationPipeline'], | |||
'face_recognition_pipeline': ['FaceRecognitionPipeline'], | |||
'general_recognition_pipeline': ['GeneralRecognitionPipeline'], | |||
'image_classification_pipeline': | |||
['GeneralImageClassificationPipeline', 'ImageClassificationPipeline'], | |||
'image_cartoon_pipeline': ['ImageCartoonPipeline'], | |||
@@ -60,7 +62,7 @@ else: | |||
['ProductRetrievalEmbeddingPipeline'], | |||
'live_category_pipeline': ['LiveCategoryPipeline'], | |||
'image_to_image_generation_pipeline': | |||
['Image2ImageGenerationePipeline'], | |||
['Image2ImageGenerationPipeline'], | |||
'ocr_detection_pipeline': ['OCRDetectionPipeline'], | |||
'skin_retouching_pipeline': ['SkinRetouchingPipeline'], | |||
'video_category_pipeline': ['VideoCategoryPipeline'], | |||
@@ -21,7 +21,7 @@ logger = get_logger() | |||
@PIPELINES.register_module( | |||
Tasks.image_classification, module_name=Pipelines.animal_recognation) | |||
Tasks.animal_recognition, module_name=Pipelines.animal_recognition) | |||
class AnimalRecognitionPipeline(Pipeline): | |||
def __init__(self, model: str, **kwargs): | |||
@@ -0,0 +1,121 @@ | |||
import os.path as osp | |||
from typing import Any, Dict | |||
import cv2 | |||
import numpy as np | |||
import torch | |||
from PIL import Image | |||
from torchvision import transforms | |||
from modelscope.hub.snapshot_download import snapshot_download | |||
from modelscope.metainfo import Pipelines | |||
from modelscope.models.cv.animal_recognition import resnet | |||
from modelscope.outputs import OutputKeys | |||
from modelscope.pipelines.base import Input, Pipeline | |||
from modelscope.pipelines.builder import PIPELINES | |||
from modelscope.preprocessors import LoadImage, load_image | |||
from modelscope.utils.constant import ModelFile, Tasks | |||
from modelscope.utils.logger import get_logger | |||
logger = get_logger() | |||
@PIPELINES.register_module( | |||
Tasks.general_recognition, module_name=Pipelines.general_recognition) | |||
class GeneralRecognitionPipeline(Pipeline): | |||
def __init__(self, model: str, device: str): | |||
""" | |||
use `model` to create a general recognition pipeline for prediction | |||
Args: | |||
model: model id on modelscope hub. | |||
""" | |||
super().__init__(model=model) | |||
import torch | |||
def resnest101(**kwargs): | |||
model = resnet.ResNet( | |||
resnet.Bottleneck, [3, 4, 23, 3], | |||
radix=2, | |||
groups=1, | |||
bottleneck_width=64, | |||
deep_stem=True, | |||
stem_width=64, | |||
avg_down=True, | |||
avd=True, | |||
avd_first=False, | |||
**kwargs) | |||
return model | |||
def filter_param(src_params, own_state): | |||
copied_keys = [] | |||
for name, param in src_params.items(): | |||
if 'module.' == name[0:7]: | |||
name = name[7:] | |||
if '.module.' not in list(own_state.keys())[0]: | |||
name = name.replace('.module.', '.') | |||
if (name in own_state) and (own_state[name].shape | |||
== param.shape): | |||
own_state[name].copy_(param) | |||
copied_keys.append(name) | |||
def load_pretrained(model, src_params): | |||
if 'state_dict' in src_params: | |||
src_params = src_params['state_dict'] | |||
own_state = model.state_dict() | |||
filter_param(src_params, own_state) | |||
model.load_state_dict(own_state) | |||
self.model = resnest101(num_classes=54092) | |||
local_model_dir = model | |||
device = 'cpu' | |||
if osp.exists(model): | |||
local_model_dir = model | |||
else: | |||
local_model_dir = snapshot_download(model) | |||
self.local_path = local_model_dir | |||
src_params = torch.load( | |||
osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE), device) | |||
load_pretrained(self.model, src_params) | |||
logger.info('load model done') | |||
def preprocess(self, input: Input) -> Dict[str, Any]: | |||
img = LoadImage.convert_to_img(input) | |||
normalize = transforms.Normalize( | |||
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | |||
transform = transforms.Compose([ | |||
transforms.Resize(256), | |||
transforms.CenterCrop(224), | |||
transforms.ToTensor(), normalize | |||
]) | |||
img = transform(img) | |||
result = {'img': img} | |||
return result | |||
def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | |||
def set_phase(model, is_train): | |||
if is_train: | |||
model.train() | |||
else: | |||
model.eval() | |||
is_train = False | |||
set_phase(self.model, is_train) | |||
img = input['img'] | |||
input_img = torch.unsqueeze(img, 0) | |||
outputs = self.model(input_img) | |||
return {'outputs': outputs} | |||
def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | |||
label_mapping_path = osp.join(self.local_path, 'meta_info.txt') | |||
with open(label_mapping_path, 'r') as f: | |||
label_mapping = f.readlines() | |||
score = torch.max(inputs['outputs']) | |||
inputs = { | |||
OutputKeys.SCORES: | |||
score.item(), | |||
OutputKeys.LABELS: | |||
label_mapping[inputs['outputs'].argmax()].split('\t')[1] | |||
} | |||
return inputs |
@@ -32,7 +32,7 @@ logger = get_logger() | |||
@PIPELINES.register_module( | |||
Tasks.image_to_image_generation, | |||
module_name=Pipelines.image_to_image_generation) | |||
class Image2ImageGenerationePipeline(Pipeline): | |||
class Image2ImageGenerationPipeline(Pipeline): | |||
def __init__(self, model: str, **kwargs): | |||
""" | |||
@@ -17,12 +17,14 @@ class CVTasks(object): | |||
ocr_recognition = 'ocr-recognition' | |||
# human face body related | |||
animal_recognition = 'animal-recognition' | |||
face_detection = 'face-detection' | |||
face_recognition = 'face-recognition' | |||
human_detection = 'human-detection' | |||
human_object_interaction = 'human-object-interaction' | |||
face_image_generation = 'face-image-generation' | |||
body_2d_keypoints = 'body-2d-keypoints' | |||
general_recognition = 'general-recognition' | |||
image_classification = 'image-classification' | |||
image_multilabel_classification = 'image-multilabel-classification' | |||
@@ -5,14 +5,14 @@ from modelscope.utils.constant import Tasks | |||
from modelscope.utils.test_utils import test_level | |||
class MultiModalFeatureTest(unittest.TestCase): | |||
class AnimalRecognitionTest(unittest.TestCase): | |||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
def test_run(self): | |||
animal_recog = pipeline( | |||
Tasks.image_classification, | |||
animal_recognition = pipeline( | |||
Tasks.animal_recognition, | |||
model='damo/cv_resnest101_animal_recognition') | |||
result = animal_recog('data/test/images/dogs.jpg') | |||
result = animal_recognition('data/test/images/dogs.jpg') | |||
print(result) | |||
@@ -0,0 +1,20 @@ | |||
import unittest | |||
from modelscope.pipelines import pipeline | |||
from modelscope.utils.constant import Tasks | |||
from modelscope.utils.test_utils import test_level | |||
class GeneralRecognitionTest(unittest.TestCase): | |||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
def test_run(self): | |||
general_recognition = pipeline( | |||
Tasks.general_recognition, | |||
model='damo/cv_resnest101_general_recognition') | |||
result = general_recognition('data/test/images/dogs.jpg') | |||
print(result) | |||
if __name__ == '__main__': | |||
unittest.main() |