diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index 91d0a4b6..b32fed0d 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -76,7 +76,8 @@ class Pipelines(object): person_image_cartoon = 'unet-person-image-cartoon' ocr_detection = 'resnet18-ocr-detection' action_recognition = 'TAdaConv_action-recognition' - animal_recognation = 'resnet101-animal_recog' + animal_recognition = 'resnet101-animal-recognition' + general_recognition = 'resnet101-general-recognition' cmdssl_video_embedding = 'cmdssl-r2p1d_video_embedding' body_2d_keypoints = 'hrnetv2w32_body-2d-keypoints_image' human_detection = 'resnet18-human-detection' diff --git a/modelscope/models/cv/animal_recognition/resnet.py b/modelscope/models/cv/animal_recognition/resnet.py index 1fd4b93e..73953de4 100644 --- a/modelscope/models/cv/animal_recognition/resnet.py +++ b/modelscope/models/cv/animal_recognition/resnet.py @@ -81,8 +81,7 @@ class Bottleneck(nn.Module): norm_layer=norm_layer, dropblock_prob=dropblock_prob) elif rectified_conv: - from rfconv import RFConv2d - self.conv2 = RFConv2d( + self.conv2 = nn.Conv2d( group_width, group_width, kernel_size=3, @@ -90,8 +89,7 @@ class Bottleneck(nn.Module): padding=dilation, dilation=dilation, groups=cardinality, - bias=False, - average_mode=rectify_avg) + bias=False) self.bn2 = norm_layer(group_width) else: self.conv2 = nn.Conv2d( @@ -190,8 +188,7 @@ class ResNet(nn.Module): self.rectified_conv = rectified_conv self.rectify_avg = rectify_avg if rectified_conv: - from rfconv import RFConv2d - conv_layer = RFConv2d + conv_layer = nn.Conv2d else: conv_layer = nn.Conv2d conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {} diff --git a/modelscope/models/cv/animal_recognition/splat.py b/modelscope/models/cv/animal_recognition/splat.py index b12bf154..0aab555e 100644 --- a/modelscope/models/cv/animal_recognition/splat.py +++ b/modelscope/models/cv/animal_recognition/splat.py @@ -39,8 +39,7 @@ class SplAtConv2d(Module): self.channels = channels self.dropblock_prob = dropblock_prob if self.rectify: - from rfconv import RFConv2d - self.conv = RFConv2d( + self.conv = Conv2d( in_channels, channels * radix, kernel_size, @@ -49,7 +48,6 @@ class SplAtConv2d(Module): dilation, groups=groups * radix, bias=bias, - average_mode=rectify_avg, **kwargs) else: self.conv = Conv2d( diff --git a/modelscope/pipelines/cv/__init__.py b/modelscope/pipelines/cv/__init__.py index d7a8da2c..6027923e 100644 --- a/modelscope/pipelines/cv/__init__.py +++ b/modelscope/pipelines/cv/__init__.py @@ -10,8 +10,9 @@ if TYPE_CHECKING: from .cmdssl_video_embedding_pipeline import CMDSSLVideoEmbeddingPipeline from .image_detection_pipeline import ImageDetectionPipeline from .face_detection_pipeline import FaceDetectionPipeline - from .face_recognition_pipeline import FaceRecognitionPipeline from .face_image_generation_pipeline import FaceImageGenerationPipeline + from .face_recognition_pipeline import FaceRecognitionPipeline + from .general_recognition_pipeline import GeneralRecognitionPipeline from .image_cartoon_pipeline import ImageCartoonPipeline from .image_classification_pipeline import GeneralImageClassificationPipeline from .image_color_enhance_pipeline import ImageColorEnhancePipeline @@ -23,7 +24,7 @@ if TYPE_CHECKING: from .image_portrait_enhancement_pipeline import ImagePortraitEnhancementPipeline from .image_style_transfer_pipeline import ImageStyleTransferPipeline from .image_super_resolution_pipeline import ImageSuperResolutionPipeline - from .image_to_image_generate_pipeline import Image2ImageGenerationePipeline + from .image_to_image_generate_pipeline import Image2ImageGenerationPipeline from .image_to_image_translation_pipeline import Image2ImageTranslationPipeline from .product_retrieval_embedding_pipeline import ProductRetrievalEmbeddingPipeline from .live_category_pipeline import LiveCategoryPipeline @@ -41,6 +42,7 @@ else: 'face_detection_pipeline': ['FaceDetectionPipeline'], 'face_image_generation_pipeline': ['FaceImageGenerationPipeline'], 'face_recognition_pipeline': ['FaceRecognitionPipeline'], + 'general_recognition_pipeline': ['GeneralRecognitionPipeline'], 'image_classification_pipeline': ['GeneralImageClassificationPipeline', 'ImageClassificationPipeline'], 'image_cartoon_pipeline': ['ImageCartoonPipeline'], @@ -60,7 +62,7 @@ else: ['ProductRetrievalEmbeddingPipeline'], 'live_category_pipeline': ['LiveCategoryPipeline'], 'image_to_image_generation_pipeline': - ['Image2ImageGenerationePipeline'], + ['Image2ImageGenerationPipeline'], 'ocr_detection_pipeline': ['OCRDetectionPipeline'], 'skin_retouching_pipeline': ['SkinRetouchingPipeline'], 'video_category_pipeline': ['VideoCategoryPipeline'], diff --git a/modelscope/pipelines/cv/animal_recognition_pipeline.py b/modelscope/pipelines/cv/animal_recognition_pipeline.py index ab0232bd..18cba92c 100644 --- a/modelscope/pipelines/cv/animal_recognition_pipeline.py +++ b/modelscope/pipelines/cv/animal_recognition_pipeline.py @@ -21,7 +21,7 @@ logger = get_logger() @PIPELINES.register_module( - Tasks.image_classification, module_name=Pipelines.animal_recognation) + Tasks.animal_recognition, module_name=Pipelines.animal_recognition) class AnimalRecognitionPipeline(Pipeline): def __init__(self, model: str, **kwargs): diff --git a/modelscope/pipelines/cv/general_recognition_pipeline.py b/modelscope/pipelines/cv/general_recognition_pipeline.py new file mode 100644 index 00000000..9ba5117b --- /dev/null +++ b/modelscope/pipelines/cv/general_recognition_pipeline.py @@ -0,0 +1,121 @@ +import os.path as osp +from typing import Any, Dict + +import cv2 +import numpy as np +import torch +from PIL import Image +from torchvision import transforms + +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.metainfo import Pipelines +from modelscope.models.cv.animal_recognition import resnet +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import Input, Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import LoadImage, load_image +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.general_recognition, module_name=Pipelines.general_recognition) +class GeneralRecognitionPipeline(Pipeline): + + def __init__(self, model: str, device: str): + """ + use `model` to create a general recognition pipeline for prediction + Args: + model: model id on modelscope hub. + """ + super().__init__(model=model) + import torch + + def resnest101(**kwargs): + model = resnet.ResNet( + resnet.Bottleneck, [3, 4, 23, 3], + radix=2, + groups=1, + bottleneck_width=64, + deep_stem=True, + stem_width=64, + avg_down=True, + avd=True, + avd_first=False, + **kwargs) + return model + + def filter_param(src_params, own_state): + copied_keys = [] + for name, param in src_params.items(): + if 'module.' == name[0:7]: + name = name[7:] + if '.module.' not in list(own_state.keys())[0]: + name = name.replace('.module.', '.') + if (name in own_state) and (own_state[name].shape + == param.shape): + own_state[name].copy_(param) + copied_keys.append(name) + + def load_pretrained(model, src_params): + if 'state_dict' in src_params: + src_params = src_params['state_dict'] + own_state = model.state_dict() + filter_param(src_params, own_state) + model.load_state_dict(own_state) + + self.model = resnest101(num_classes=54092) + local_model_dir = model + device = 'cpu' + if osp.exists(model): + local_model_dir = model + else: + local_model_dir = snapshot_download(model) + self.local_path = local_model_dir + src_params = torch.load( + osp.join(local_model_dir, ModelFile.TORCH_MODEL_FILE), device) + load_pretrained(self.model, src_params) + logger.info('load model done') + + def preprocess(self, input: Input) -> Dict[str, Any]: + img = LoadImage.convert_to_img(input) + normalize = transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), normalize + ]) + img = transform(img) + result = {'img': img} + return result + + def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + + def set_phase(model, is_train): + if is_train: + model.train() + else: + model.eval() + + is_train = False + set_phase(self.model, is_train) + img = input['img'] + input_img = torch.unsqueeze(img, 0) + outputs = self.model(input_img) + return {'outputs': outputs} + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + label_mapping_path = osp.join(self.local_path, 'meta_info.txt') + with open(label_mapping_path, 'r') as f: + label_mapping = f.readlines() + score = torch.max(inputs['outputs']) + inputs = { + OutputKeys.SCORES: + score.item(), + OutputKeys.LABELS: + label_mapping[inputs['outputs'].argmax()].split('\t')[1] + } + return inputs diff --git a/modelscope/pipelines/cv/image_to_image_generate_pipeline.py b/modelscope/pipelines/cv/image_to_image_generate_pipeline.py index 6533a14c..2a3881e7 100644 --- a/modelscope/pipelines/cv/image_to_image_generate_pipeline.py +++ b/modelscope/pipelines/cv/image_to_image_generate_pipeline.py @@ -32,7 +32,7 @@ logger = get_logger() @PIPELINES.register_module( Tasks.image_to_image_generation, module_name=Pipelines.image_to_image_generation) -class Image2ImageGenerationePipeline(Pipeline): +class Image2ImageGenerationPipeline(Pipeline): def __init__(self, model: str, **kwargs): """ diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 2e49dfc5..4b49efdc 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -17,12 +17,14 @@ class CVTasks(object): ocr_recognition = 'ocr-recognition' # human face body related + animal_recognition = 'animal-recognition' face_detection = 'face-detection' face_recognition = 'face-recognition' human_detection = 'human-detection' human_object_interaction = 'human-object-interaction' face_image_generation = 'face-image-generation' body_2d_keypoints = 'body-2d-keypoints' + general_recognition = 'general-recognition' image_classification = 'image-classification' image_multilabel_classification = 'image-multilabel-classification' diff --git a/tests/pipelines/test_animal_recognation.py b/tests/pipelines/test_animal_recognition.py similarity index 67% rename from tests/pipelines/test_animal_recognation.py rename to tests/pipelines/test_animal_recognition.py index b2f2a8ee..8b856396 100644 --- a/tests/pipelines/test_animal_recognation.py +++ b/tests/pipelines/test_animal_recognition.py @@ -5,14 +5,14 @@ from modelscope.utils.constant import Tasks from modelscope.utils.test_utils import test_level -class MultiModalFeatureTest(unittest.TestCase): +class AnimalRecognitionTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run(self): - animal_recog = pipeline( - Tasks.image_classification, + animal_recognition = pipeline( + Tasks.animal_recognition, model='damo/cv_resnest101_animal_recognition') - result = animal_recog('data/test/images/dogs.jpg') + result = animal_recognition('data/test/images/dogs.jpg') print(result) diff --git a/tests/pipelines/test_general_recognition.py b/tests/pipelines/test_general_recognition.py new file mode 100644 index 00000000..0e1117d9 --- /dev/null +++ b/tests/pipelines/test_general_recognition.py @@ -0,0 +1,20 @@ +import unittest + +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class GeneralRecognitionTest(unittest.TestCase): + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_run(self): + general_recognition = pipeline( + Tasks.general_recognition, + model='damo/cv_resnest101_general_recognition') + result = general_recognition('data/test/images/dogs.jpg') + print(result) + + +if __name__ == '__main__': + unittest.main()