Browse Source

Merge branch 'master' into ofa/finetune

# Conflicts:
#	modelscope/preprocessors/multi_modal.py
#	modelscope/trainers/trainer.py
#	tests/pipelines/test_ofa_tasks.py
master
行嗔 3 years ago
parent
commit
d979b90439
100 changed files with 2802 additions and 96 deletions
  1. +1
    -0
      .dev_scripts/dockerci.sh
  2. +3
    -0
      data/test/images/mog_face_detection.jpg
  3. +3
    -0
      data/test/images/mtcnn_face_detection.jpg
  4. +3
    -0
      data/test/images/multimodal_similarity.jpg
  5. +3
    -0
      data/test/images/ulfd_face_detection.jpg
  6. +3
    -0
      data/test/videos/mask_dir/mask_00000_00320.png
  7. +3
    -0
      data/test/videos/mask_dir/mask_00321_00633.png
  8. +3
    -0
      data/test/videos/video_inpainting_test.mp4
  9. +3
    -1
      docker/Dockerfile.ubuntu
  10. +4
    -0
      modelscope/exporters/__init__.py
  11. +53
    -0
      modelscope/exporters/base.py
  12. +21
    -0
      modelscope/exporters/builder.py
  13. +2
    -0
      modelscope/exporters/nlp/__init__.py
  14. +81
    -0
      modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py
  15. +247
    -0
      modelscope/exporters/torch_model_exporter.py
  16. +2
    -0
      modelscope/fileio/__init__.py
  17. +2
    -0
      modelscope/fileio/format/__init__.py
  18. +3
    -1
      modelscope/hub/api.py
  19. +2
    -0
      modelscope/hub/constants.py
  20. +3
    -1
      modelscope/hub/errors.py
  21. +2
    -0
      modelscope/hub/file_download.py
  22. +17
    -4
      modelscope/hub/git.py
  23. +24
    -4
      modelscope/hub/repository.py
  24. +2
    -0
      modelscope/hub/snapshot_download.py
  25. +2
    -0
      modelscope/hub/utils/caching.py
  26. +2
    -0
      modelscope/hub/utils/utils.py
  27. +32
    -0
      modelscope/metainfo.py
  28. +2
    -0
      modelscope/metrics/audio_noise_metric.py
  29. +2
    -2
      modelscope/metrics/sequence_classification_metric.py
  30. +2
    -0
      modelscope/models/audio/aec/layers/activations.py
  31. +2
    -0
      modelscope/models/audio/aec/layers/affine_transform.py
  32. +2
    -0
      modelscope/models/audio/aec/layers/deep_fsmn.py
  33. +2
    -0
      modelscope/models/audio/aec/layers/layer_base.py
  34. +2
    -0
      modelscope/models/audio/aec/layers/uni_deep_fsmn.py
  35. +2
    -0
      modelscope/models/audio/aec/network/loss.py
  36. +2
    -0
      modelscope/models/audio/aec/network/modulation_loss.py
  37. +2
    -0
      modelscope/models/audio/aec/network/se_net.py
  38. +6
    -5
      modelscope/models/audio/ans/complex_nn.py
  39. +7
    -5
      modelscope/models/audio/ans/unet.py
  40. +2
    -0
      modelscope/models/audio/kws/farfield/fsmn.py
  41. +2
    -0
      modelscope/models/audio/kws/farfield/fsmn_sele_v2.py
  42. +2
    -0
      modelscope/models/audio/kws/farfield/model.py
  43. +2
    -0
      modelscope/models/audio/kws/farfield/model_def.py
  44. +2
    -0
      modelscope/models/base/__init__.py
  45. +8
    -13
      modelscope/models/base/base_head.py
  46. +10
    -18
      modelscope/models/base/base_model.py
  47. +3
    -5
      modelscope/models/base/base_torch_head.py
  48. +6
    -7
      modelscope/models/base/base_torch_model.py
  49. +2
    -0
      modelscope/models/cv/action_detection/action_detection_onnx.py
  50. +6
    -1
      modelscope/models/cv/face_detection/__init__.py
  51. +2
    -3
      modelscope/models/cv/face_detection/mmdet_patch/__init__.py
  52. +4
    -0
      modelscope/models/cv/face_detection/mmdet_patch/core/bbox/__init__.py
  53. +2
    -1
      modelscope/models/cv/face_detection/mmdet_patch/core/bbox/transforms.py
  54. +4
    -0
      modelscope/models/cv/face_detection/mmdet_patch/core/post_processing/__init__.py
  55. +2
    -1
      modelscope/models/cv/face_detection/mmdet_patch/core/post_processing/bbox_nms.py
  56. +4
    -0
      modelscope/models/cv/face_detection/mmdet_patch/datasets/__init__.py
  57. +4
    -0
      modelscope/models/cv/face_detection/mmdet_patch/datasets/pipelines/__init__.py
  58. +2
    -1
      modelscope/models/cv/face_detection/mmdet_patch/datasets/pipelines/transforms.py
  59. +2
    -1
      modelscope/models/cv/face_detection/mmdet_patch/datasets/retinaface.py
  60. +4
    -0
      modelscope/models/cv/face_detection/mmdet_patch/models/__init__.py
  61. +4
    -0
      modelscope/models/cv/face_detection/mmdet_patch/models/backbones/__init__.py
  62. +2
    -1
      modelscope/models/cv/face_detection/mmdet_patch/models/backbones/resnet.py
  63. +4
    -0
      modelscope/models/cv/face_detection/mmdet_patch/models/dense_heads/__init__.py
  64. +2
    -1
      modelscope/models/cv/face_detection/mmdet_patch/models/dense_heads/scrfd_head.py
  65. +4
    -0
      modelscope/models/cv/face_detection/mmdet_patch/models/detectors/__init__.py
  66. +2
    -1
      modelscope/models/cv/face_detection/mmdet_patch/models/detectors/scrfd.py
  67. +1
    -0
      modelscope/models/cv/face_detection/mogface/__init__.py
  68. +0
    -0
      modelscope/models/cv/face_detection/mogface/models/__init__.py
  69. +96
    -0
      modelscope/models/cv/face_detection/mogface/models/detectors.py
  70. +135
    -0
      modelscope/models/cv/face_detection/mogface/models/mogface.py
  71. +164
    -0
      modelscope/models/cv/face_detection/mogface/models/mogprednet.py
  72. +193
    -0
      modelscope/models/cv/face_detection/mogface/models/resnet.py
  73. +212
    -0
      modelscope/models/cv/face_detection/mogface/models/utils.py
  74. +1
    -0
      modelscope/models/cv/face_detection/mtcnn/__init__.py
  75. +0
    -0
      modelscope/models/cv/face_detection/mtcnn/models/__init__.py
  76. +240
    -0
      modelscope/models/cv/face_detection/mtcnn/models/box_utils.py
  77. +149
    -0
      modelscope/models/cv/face_detection/mtcnn/models/detector.py
  78. +100
    -0
      modelscope/models/cv/face_detection/mtcnn/models/first_stage.py
  79. +160
    -0
      modelscope/models/cv/face_detection/mtcnn/models/get_nets.py
  80. +1
    -0
      modelscope/models/cv/face_detection/ulfd_slim/__init__.py
  81. +44
    -0
      modelscope/models/cv/face_detection/ulfd_slim/detection.py
  82. +0
    -0
      modelscope/models/cv/face_detection/ulfd_slim/vision/__init__.py
  83. +124
    -0
      modelscope/models/cv/face_detection/ulfd_slim/vision/box_utils.py
  84. +49
    -0
      modelscope/models/cv/face_detection/ulfd_slim/vision/mb_tiny.py
  85. +0
    -0
      modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/__init__.py
  86. +18
    -0
      modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/data_preprocessing.py
  87. +49
    -0
      modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/fd_config.py
  88. +124
    -0
      modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/mb_tiny_fd.py
  89. +80
    -0
      modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/predictor.py
  90. +129
    -0
      modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/ssd.py
  91. +56
    -0
      modelscope/models/cv/face_detection/ulfd_slim/vision/transforms.py
  92. +4
    -0
      modelscope/models/cv/face_recognition/align_face.py
  93. +2
    -0
      modelscope/models/cv/face_recognition/torchkit/backbone/__init__.py
  94. +2
    -0
      modelscope/models/cv/face_recognition/torchkit/backbone/common.py
  95. +2
    -2
      modelscope/models/cv/face_recognition/torchkit/backbone/model_irse.py
  96. +2
    -2
      modelscope/models/cv/face_recognition/torchkit/backbone/model_resnet.py
  97. +10
    -12
      modelscope/models/cv/image_instance_segmentation/postprocess_utils.py
  98. +1
    -1
      modelscope/models/cv/image_reid_person/pass_model.py
  99. +1
    -1
      modelscope/models/cv/image_reid_person/transreid_model.py
  100. +1
    -1
      modelscope/models/cv/shop_segmentation/models.py

+ 1
- 0
.dev_scripts/dockerci.sh View File

@@ -36,6 +36,7 @@ do
-e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \
-e TEST_LEVEL=$TEST_LEVEL \
-e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \
-e MODEL_TAG_URL=$MODEL_TAG_URL \
--workdir=$CODE_DIR_IN_CONTAINER \
--net host \
${IMAGE_NAME}:${IMAGE_VERSION} \


+ 3
- 0
data/test/images/mog_face_detection.jpg View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:176c824d99af119b36f743d3d90b44529167b0e4fc6db276da60fa140ee3f4a9
size 87228

+ 3
- 0
data/test/images/mtcnn_face_detection.jpg View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:176c824d99af119b36f743d3d90b44529167b0e4fc6db276da60fa140ee3f4a9
size 87228

+ 3
- 0
data/test/images/multimodal_similarity.jpg View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1f24abbba43782d733dedbb0b4f416635af50263862e5632963ac9263e430555
size 88542

+ 3
- 0
data/test/images/ulfd_face_detection.jpg View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:176c824d99af119b36f743d3d90b44529167b0e4fc6db276da60fa140ee3f4a9
size 87228

+ 3
- 0
data/test/videos/mask_dir/mask_00000_00320.png View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b158f6029d9763d7f84042f7c5835f398c688fdbb6b3f4fe6431101d4118c66c
size 2766

+ 3
- 0
data/test/videos/mask_dir/mask_00321_00633.png View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0dcf46b93077e2229ab69cd6ddb80e2689546c575ee538bb2033fee1124ef3e3
size 2761

+ 3
- 0
data/test/videos/video_inpainting_test.mp4 View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9c9870df5a86acaaec67063183dace795479cd0f05296f13058995f475149c56
size 2957783

+ 3
- 1
docker/Dockerfile.ubuntu View File

@@ -75,7 +75,9 @@ RUN pip install --no-cache-dir --upgrade pip && \
ENV SHELL=/bin/bash

# install special package
RUN pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 numpy==1.18.5 datasets==2.1.0
RUN pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 datasets==2.1.0 ipykernel && \
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
pip config set install.trusted-host pypi.tuna.tsinghua.edu.cn

RUN if [ "$USE_GPU" = "True" ] ; then \
pip install --no-cache-dir dgl-cu113 dglgo -f https://data.dgl.ai/wheels/repo.html; \


+ 4
- 0
modelscope/exporters/__init__.py View File

@@ -0,0 +1,4 @@
from .base import Exporter
from .builder import build_exporter
from .nlp import SbertForSequenceClassificationExporter
from .torch_model_exporter import TorchModelExporter

+ 53
- 0
modelscope/exporters/base.py View File

@@ -0,0 +1,53 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from abc import ABC, abstractmethod

from modelscope.models import Model
from modelscope.utils.config import Config, ConfigDict
from modelscope.utils.constant import ModelFile
from .builder import build_exporter


class Exporter(ABC):
"""Exporter base class to output model to onnx, torch_script, graphdef, etc.
"""

def __init__(self):
self.model = None

@classmethod
def from_model(cls, model: Model, **kwargs):
"""Build the Exporter instance.

@param model: A model instance. it will be used to output the generated file,
and the configuration.json in its model_dir field will be used to create the exporter instance.
@param kwargs: Extra kwargs used to create the Exporter instance.
@return: The Exporter instance
"""
cfg = Config.from_file(
os.path.join(model.model_dir, ModelFile.CONFIGURATION))
task_name = cfg.task
model_cfg = cfg.model
if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'):
model_cfg.type = model_cfg.model_type
export_cfg = ConfigDict({'type': model_cfg.type})
if hasattr(cfg, 'export'):
export_cfg.update(cfg.export)
exporter = build_exporter(export_cfg, task_name, kwargs)
exporter.model = model
return exporter

@abstractmethod
def export_onnx(self, outputs: str, opset=11, **kwargs):
"""Export the model as onnx format files.

In some cases, several files may be generated,
So please return a dict which contains the generated name with the file path.

@param opset: The version of the ONNX operator set to use.
@param outputs: The output dir.
@param kwargs: In this default implementation,
kwargs will be carried to generate_dummy_inputs as extra arguments (like input shape).
@return: A dict contains the model name with the model file path.
"""
pass

+ 21
- 0
modelscope/exporters/builder.py View File

@@ -0,0 +1,21 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from modelscope.utils.config import ConfigDict
from modelscope.utils.registry import Registry, build_from_cfg

EXPORTERS = Registry('exporters')


def build_exporter(cfg: ConfigDict,
task_name: str = None,
default_args: dict = None):
""" build exporter by the given model config dict

Args:
cfg (:obj:`ConfigDict`): config dict for exporter object.
task_name (str, optional): task name, refer to
:obj:`Tasks` for more details
default_args (dict, optional): Default initialization arguments.
"""
return build_from_cfg(
cfg, EXPORTERS, group_key=task_name, default_args=default_args)

+ 2
- 0
modelscope/exporters/nlp/__init__.py View File

@@ -0,0 +1,2 @@
from .sbert_for_sequence_classification_exporter import \
SbertForSequenceClassificationExporter

+ 81
- 0
modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py View File

@@ -0,0 +1,81 @@
import os
from collections import OrderedDict
from typing import Any, Dict, Mapping, Tuple

from torch.utils.data.dataloader import default_collate

from modelscope.exporters.builder import EXPORTERS
from modelscope.exporters.torch_model_exporter import TorchModelExporter
from modelscope.metainfo import Models
from modelscope.preprocessors import Preprocessor, build_preprocessor
from modelscope.utils.config import Config
from modelscope.utils.constant import ModeKeys, Tasks


@EXPORTERS.register_module(
Tasks.sentence_similarity, module_name=Models.structbert)
@EXPORTERS.register_module(
Tasks.sentiment_classification, module_name=Models.structbert)
@EXPORTERS.register_module(Tasks.nli, module_name=Models.structbert)
@EXPORTERS.register_module(
Tasks.zero_shot_classification, module_name=Models.structbert)
class SbertForSequenceClassificationExporter(TorchModelExporter):

def generate_dummy_inputs(self,
shape: Tuple = None,
**kwargs) -> Dict[str, Any]:
"""Generate dummy inputs for model exportation to onnx or other formats by tracing.

@param shape: A tuple of input shape which should have at most two dimensions.
shape = (1, ) batch_size=1, sequence_length will be taken from the preprocessor.
shape = (8, 128) batch_size=1, sequence_length=128, which will cover the config of the preprocessor.
@return: Dummy inputs.
"""

cfg = Config.from_file(
os.path.join(self.model.model_dir, 'configuration.json'))
field_name = Tasks.find_field_by_task(cfg.task)
if 'type' not in cfg.preprocessor and 'val' in cfg.preprocessor:
cfg = cfg.preprocessor.val
else:
cfg = cfg.preprocessor

batch_size = 1
sequence_length = {}
if shape is not None:
if len(shape) == 1:
batch_size = shape[0]
elif len(shape) == 2:
batch_size, max_length = shape
sequence_length = {'sequence_length': max_length}

cfg.update({
'model_dir': self.model.model_dir,
'mode': ModeKeys.TRAIN,
**sequence_length
})
preprocessor: Preprocessor = build_preprocessor(cfg, field_name)
if preprocessor.pair:
first_sequence = preprocessor.tokenizer.unk_token
second_sequence = preprocessor.tokenizer.unk_token
else:
first_sequence = preprocessor.tokenizer.unk_token
second_sequence = None

batched = []
for _ in range(batch_size):
batched.append(preprocessor((first_sequence, second_sequence)))
return default_collate(batched)

@property
def inputs(self) -> Mapping[str, Mapping[int, str]]:
dynamic_axis = {0: 'batch', 1: 'sequence'}
return OrderedDict([
('input_ids', dynamic_axis),
('attention_mask', dynamic_axis),
('token_type_ids', dynamic_axis),
])

@property
def outputs(self) -> Mapping[str, Mapping[int, str]]:
return OrderedDict({'logits': {0: 'batch'}})

+ 247
- 0
modelscope/exporters/torch_model_exporter.py View File

@@ -0,0 +1,247 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from contextlib import contextmanager
from itertools import chain
from typing import Any, Dict, Mapping

import torch
from torch import nn
from torch.onnx import export as onnx_export
from torch.onnx.utils import _decide_input_format

from modelscope.models import TorchModel
from modelscope.pipelines.base import collate_fn
from modelscope.utils.constant import ModelFile
from modelscope.utils.logger import get_logger
from modelscope.utils.regress_test_utils import compare_arguments_nested
from modelscope.utils.tensor_utils import torch_nested_numpify
from .base import Exporter

logger = get_logger(__name__)


class TorchModelExporter(Exporter):
"""The torch base class of exporter.

This class provides the default implementations for exporting onnx and torch script.
Each specific model may implement its own exporter by overriding the export_onnx/export_torch_script,
and to provide implementations for generate_dummy_inputs/inputs/outputs methods.
"""

def export_onnx(self, outputs: str, opset=11, **kwargs):
"""Export the model as onnx format files.

In some cases, several files may be generated,
So please return a dict which contains the generated name with the file path.

@param opset: The version of the ONNX operator set to use.
@param outputs: The output dir.
@param kwargs: In this default implementation,
you can pass the arguments needed by _torch_export_onnx, other unrecognized args
will be carried to generate_dummy_inputs as extra arguments (such as input shape).
@return: A dict containing the model key - model file path pairs.
"""
model = self.model
if not isinstance(model, nn.Module) and hasattr(model, 'model'):
model = model.model
onnx_file = os.path.join(outputs, ModelFile.ONNX_MODEL_FILE)
self._torch_export_onnx(model, onnx_file, opset=opset, **kwargs)
return {'model': onnx_file}

def export_torch_script(self, outputs: str, **kwargs):
"""Export the model as torch script files.

In some cases, several files may be generated,
So please return a dict which contains the generated name with the file path.

@param outputs: The output dir.
@param kwargs: In this default implementation,
you can pass the arguments needed by _torch_export_torch_script, other unrecognized args
will be carried to generate_dummy_inputs as extra arguments (like input shape).
@return: A dict contains the model name with the model file path.
"""
model = self.model
if not isinstance(model, nn.Module) and hasattr(model, 'model'):
model = model.model
ts_file = os.path.join(outputs, ModelFile.TS_MODEL_FILE)
# generate ts by tracing
self._torch_export_torch_script(model, ts_file, **kwargs)
return {'model': ts_file}

def generate_dummy_inputs(self, **kwargs) -> Dict[str, Any]:
"""Generate dummy inputs for model exportation to onnx or other formats by tracing.
@return: Dummy inputs.
"""
return None

@property
def inputs(self) -> Mapping[str, Mapping[int, str]]:
"""Return an ordered dict contains the model's input arguments name with their dynamic axis.

About the information of dynamic axis please check the dynamic_axes argument of torch.onnx.export function
"""
return None

@property
def outputs(self) -> Mapping[str, Mapping[int, str]]:
"""Return an ordered dict contains the model's output arguments name with their dynamic axis.

About the information of dynamic axis please check the dynamic_axes argument of torch.onnx.export function
"""
return None

def _torch_export_onnx(self,
model: nn.Module,
output: str,
opset: int = 11,
device: str = 'cpu',
validation: bool = True,
rtol: float = None,
atol: float = None,
**kwargs):
"""Export the model to an onnx format file.

@param model: A torch.nn.Module instance to export.
@param output: The output file.
@param opset: The version of the ONNX operator set to use.
@param device: The device used to forward.
@param validation: Whether validate the export file.
@param rtol: The rtol used to regress the outputs.
@param atol: The atol used to regress the outputs.
"""

dummy_inputs = self.generate_dummy_inputs(**kwargs)
inputs = self.inputs
outputs = self.outputs
if dummy_inputs is None or inputs is None or outputs is None:
raise NotImplementedError(
'Model property dummy_inputs,inputs,outputs must be set.')

with torch.no_grad():
model.eval()
device = torch.device(device)
model.to(device)
dummy_inputs = collate_fn(dummy_inputs, device)

if isinstance(dummy_inputs, Mapping):
dummy_inputs = dict(dummy_inputs)
onnx_outputs = list(self.outputs.keys())

with replace_call():
onnx_export(
model,
(dummy_inputs, ),
f=output,
input_names=list(inputs.keys()),
output_names=onnx_outputs,
dynamic_axes={
name: axes
for name, axes in chain(inputs.items(),
outputs.items())
},
do_constant_folding=True,
opset_version=opset,
)

if validation:
try:
import onnx
import onnxruntime as ort
except ImportError:
logger.warn(
'Cannot validate the exported onnx file, because '
'the installation of onnx or onnxruntime cannot be found')
return
onnx_model = onnx.load(output)
onnx.checker.check_model(onnx_model)
ort_session = ort.InferenceSession(output)
with torch.no_grad():
model.eval()
outputs_origin = model.forward(
*_decide_input_format(model, dummy_inputs))
if isinstance(outputs_origin, Mapping):
outputs_origin = torch_nested_numpify(
list(outputs_origin.values()))
outputs = ort_session.run(
onnx_outputs,
torch_nested_numpify(dummy_inputs),
)

tols = {}
if rtol is not None:
tols['rtol'] = rtol
if atol is not None:
tols['atol'] = atol
if not compare_arguments_nested('Onnx model output match failed',
outputs, outputs_origin, **tols):
raise RuntimeError(
'export onnx failed because of validation error.')

def _torch_export_torch_script(self,
model: nn.Module,
output: str,
device: str = 'cpu',
validation: bool = True,
rtol: float = None,
atol: float = None,
**kwargs):
"""Export the model to a torch script file.

@param model: A torch.nn.Module instance to export.
@param output: The output file.
@param device: The device used to forward.
@param validation: Whether validate the export file.
@param rtol: The rtol used to regress the outputs.
@param atol: The atol used to regress the outputs.
"""

model.eval()
dummy_inputs = self.generate_dummy_inputs(**kwargs)
if dummy_inputs is None:
raise NotImplementedError(
'Model property dummy_inputs must be set.')
dummy_inputs = collate_fn(dummy_inputs, device)
if isinstance(dummy_inputs, Mapping):
dummy_inputs = tuple(dummy_inputs.values())
with torch.no_grad():
model.eval()
with replace_call():
traced_model = torch.jit.trace(
model, dummy_inputs, strict=False)
torch.jit.save(traced_model, output)

if validation:
ts_model = torch.jit.load(output)
with torch.no_grad():
model.eval()
ts_model.eval()
outputs = ts_model.forward(*dummy_inputs)
outputs = torch_nested_numpify(outputs)
outputs_origin = model.forward(*dummy_inputs)
outputs_origin = torch_nested_numpify(outputs_origin)
tols = {}
if rtol is not None:
tols['rtol'] = rtol
if atol is not None:
tols['atol'] = atol
if not compare_arguments_nested(
'Torch script model output match failed', outputs,
outputs_origin, **tols):
raise RuntimeError(
'export torch script failed because of validation error.')


@contextmanager
def replace_call():
"""This function is used to recover the original call method.

The Model class of modelscope overrides the call method. When exporting to onnx or torchscript, torch will
prepare the parameters as the prototype of forward method, and trace the call method, this causes
problems. Here we recover the call method to the default implementation of torch.nn.Module, and change it
back after the tracing was done.
"""

TorchModel.call_origin, TorchModel.__call__ = TorchModel.__call__, TorchModel._call_impl
yield
TorchModel.__call__ = TorchModel.call_origin
del TorchModel.call_origin

+ 2
- 0
modelscope/fileio/__init__.py View File

@@ -1,2 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from .file import File, LocalStorage
from .io import dump, dumps, load

+ 2
- 0
modelscope/fileio/format/__init__.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from .base import FormatHandler
from .json import JsonHandler
from .yaml import YamlHandler

+ 3
- 1
modelscope/hub/api.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
import pickle
import shutil
@@ -389,7 +391,7 @@ class HubApi:
cookies = requests.utils.dict_from_cookiejar(cookies)
r = requests.get(url=datahub_url, cookies=cookies)
resp = r.json()
datahub_raise_on_error(datahub_url, resp)
raise_on_error(resp)
return resp['Data']

def on_dataset_download(self, dataset_name: str, namespace: str) -> None:


+ 2
- 0
modelscope/hub/constants.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from pathlib import Path

MODELSCOPE_URL_SCHEME = 'http://'


+ 3
- 1
modelscope/hub/errors.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from http import HTTPStatus

from requests.exceptions import HTTPError
@@ -60,7 +62,7 @@ def raise_on_error(rsp):
Args:
rsp (_type_): The server response
"""
if rsp['Code'] == HTTPStatus.OK and rsp['Success']:
if rsp['Code'] == HTTPStatus.OK:
return True
else:
raise RequestError(rsp['Message'])


+ 2
- 0
modelscope/hub/file_download.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import copy
import os
import sys


+ 17
- 4
modelscope/hub/git.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
import subprocess
from typing import List
@@ -39,17 +41,28 @@ class GitCommandWrapper(metaclass=Singleton):
subprocess.CompletedProcess: the command response
"""
logger.debug(' '.join(args))
git_env = os.environ.copy()
git_env['GIT_TERMINAL_PROMPT'] = '0'
response = subprocess.run(
[self.git_path, *args],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE) # compatible for python3.6
stderr=subprocess.PIPE,
env=git_env,
) # compatible for python3.6
try:
response.check_returncode()
return response
except subprocess.CalledProcessError as error:
raise GitError(
'stdout: %s, stderr: %s' %
(response.stdout.decode('utf8'), error.stderr.decode('utf8')))
if response.returncode == 1:
logger.info('Nothing to commit.')
return response
else:
logger.error(
'There are error run git command, you may need to login first.'
)
raise GitError('stdout: %s, stderr: %s' %
(response.stdout.decode('utf8'),
error.stderr.decode('utf8')))

def config_auth_token(self, repo_dir, auth_token):
url = self.get_repo_remote_url(repo_dir)


+ 24
- 4
modelscope/hub/repository.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
from typing import Optional

@@ -40,6 +42,11 @@ class Repository:
self.model_dir = model_dir
self.model_base_dir = os.path.dirname(model_dir)
self.model_repo_name = os.path.basename(model_dir)

if not revision:
err_msg = 'a non-default value of revision cannot be empty.'
raise InvalidParameter(err_msg)

if auth_token:
self.auth_token = auth_token
else:
@@ -145,10 +152,21 @@ class DatasetRepository:
The git command line path, if None, we use 'git'
"""
self.dataset_id = dataset_id
self.repo_work_dir = repo_work_dir
self.repo_base_dir = os.path.dirname(repo_work_dir)
self.repo_name = os.path.basename(repo_work_dir)
if not repo_work_dir or not isinstance(repo_work_dir, str):
err_msg = 'dataset_work_dir must be provided!'
raise InvalidParameter(err_msg)
self.repo_work_dir = repo_work_dir.rstrip('/')
if not self.repo_work_dir:
err_msg = 'dataset_work_dir can not be root dir!'
raise InvalidParameter(err_msg)
self.repo_base_dir = os.path.dirname(self.repo_work_dir)
self.repo_name = os.path.basename(self.repo_work_dir)

if not revision:
err_msg = 'a non-default value of revision cannot be empty.'
raise InvalidParameter(err_msg)
self.revision = revision

if auth_token:
self.auth_token = auth_token
else:
@@ -199,7 +217,9 @@ class DatasetRepository:
self.git_wrapper.config_auth_token(self.repo_work_dir, self.auth_token)
self.git_wrapper.add_user_info(self.repo_base_dir, self.repo_name)

remote_url = self.git_wrapper.get_repo_remote_url(self.repo_work_dir)
remote_url = self._get_remote_url()
remote_url = self.git_wrapper.remove_token_from_url(remote_url)

self.git_wrapper.pull(self.repo_work_dir)
self.git_wrapper.add(self.repo_work_dir, all_files=True)
self.git_wrapper.commit(self.repo_work_dir, commit_message)


+ 2
- 0
modelscope/hub/snapshot_download.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
import tempfile
from pathlib import Path


+ 2
- 0
modelscope/hub/utils/caching.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import hashlib
import os
import pickle


+ 2
- 0
modelscope/hub/utils/utils.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import hashlib
import os
from typing import Optional


+ 32
- 0
modelscope/metainfo.py View File

@@ -35,6 +35,10 @@ class Models(object):
fer = 'fer'
retinaface = 'retinaface'
shop_segmentation = 'shop-segmentation'
mogface = 'mogface'
mtcnn = 'mtcnn'
ulfd = 'ulfd'
video_inpainting = 'video-inpainting'

# EasyCV models
yolox = 'YOLOX'
@@ -51,11 +55,16 @@ class Models(object):
space_intent = 'space-intent'
space_modeling = 'space-modeling'
star = 'star'
star3 = 'star3'
tcrf = 'transformer-crf'
transformer_softmax = 'transformer-softmax'
lcrf = 'lstm-crf'
gcnncrf = 'gcnn-crf'
bart = 'bart'
gpt3 = 'gpt3'
plug = 'plug'
bert_for_ds = 'bert-for-document-segmentation'
ponet = 'ponet'

# audio models
sambert_hifigan = 'sambert-hifigan'
@@ -70,6 +79,7 @@ class Models(object):
gemm = 'gemm-generative-multi-modal'
mplug = 'mplug'
diffusion = 'diffusion-text-to-image-synthesis'
multi_stage_diffusion = 'multi-stage-diffusion-text-to-image-synthesis'
team = 'team-multi-modal-similarity'
video_clip = 'video-clip-multi-modal-embedding'

@@ -77,6 +87,7 @@ class Models(object):
class TaskModels(object):
# nlp task
text_classification = 'text-classification'
token_classification = 'token-classification'
information_extraction = 'information-extraction'


@@ -87,6 +98,8 @@ class Heads(object):
bert_mlm = 'bert-mlm'
# roberta mlm
roberta_mlm = 'roberta-mlm'
# token cls
token_classification = 'token-classification'
information_extraction = 'information-extraction'


@@ -121,8 +134,11 @@ class Pipelines(object):
salient_detection = 'u2net-salient-detection'
image_classification = 'image-classification'
face_detection = 'resnet-face-detection-scrfd10gkps'
ulfd_face_detection = 'manual-face-detection-ulfd'
facial_expression_recognition = 'vgg19-facial-expression-recognition-fer'
retina_face_detection = 'resnet50-face-detection-retinaface'
mog_face_detection = 'resnet101-face-detection-cvpr22papermogface'
mtcnn_face_detection = 'manual-face-detection-mtcnn'
live_category = 'live-category'
general_image_classification = 'vit-base_image-classification_ImageNet-labels'
daily_image_classification = 'vit-base_image-classification_Dailylife-labels'
@@ -155,16 +171,19 @@ class Pipelines(object):
text_driven_segmentation = 'text-driven-segmentation'
movie_scene_segmentation = 'resnet50-bert-movie-scene-segmentation'
shop_segmentation = 'shop-segmentation'
video_inpainting = 'video-inpainting'

# nlp tasks
sentence_similarity = 'sentence-similarity'
word_segmentation = 'word-segmentation'
part_of_speech = 'part-of-speech'
named_entity_recognition = 'named-entity-recognition'
text_generation = 'text-generation'
sentiment_analysis = 'sentiment-analysis'
sentiment_classification = 'sentiment-classification'
text_classification = 'text-classification'
fill_mask = 'fill-mask'
fill_mask_ponet = 'fill-mask-ponet'
csanmt_translation = 'csanmt-translation'
nli = 'nli'
dialog_intent_prediction = 'dialog-intent-prediction'
@@ -172,8 +191,12 @@ class Pipelines(object):
dialog_state_tracking = 'dialog-state-tracking'
zero_shot_classification = 'zero-shot-classification'
text_error_correction = 'text-error-correction'
plug_generation = 'plug-generation'
faq_question_answering = 'faq-question-answering'
conversational_text_to_sql = 'conversational-text-to-sql'
table_question_answering_pipeline = 'table-question-answering-pipeline'
sentence_embedding = 'sentence-embedding'
passage_ranking = 'passage-ranking'
relation_extraction = 'relation-extraction'
document_segmentation = 'document-segmentation'

@@ -223,8 +246,11 @@ class Trainers(object):

# nlp trainers
bert_sentiment_analysis = 'bert-sentiment-analysis'
dialog_modeling_trainer = 'dialog-modeling-trainer'
dialog_intent_trainer = 'dialog-intent-trainer'
nlp_base_trainer = 'nlp-base-trainer'
nlp_veco_trainer = 'nlp-veco-trainer'
nlp_passage_ranking_trainer = 'nlp-passage-ranking-trainer'

# audio trainers
speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k'
@@ -252,6 +278,7 @@ class Preprocessors(object):

# nlp preprocessor
sen_sim_tokenizer = 'sen-sim-tokenizer'
cross_encoder_tokenizer = 'cross-encoder-tokenizer'
bert_seq_cls_tokenizer = 'bert-seq-cls-tokenizer'
text_gen_tokenizer = 'text-gen-tokenizer'
token_cls_tokenizer = 'token-cls-tokenizer'
@@ -264,10 +291,15 @@ class Preprocessors(object):
sbert_token_cls_tokenizer = 'sbert-token-cls-tokenizer'
zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer'
text_error_correction = 'text-error-correction'
sentence_embedding = 'sentence-embedding'
passage_ranking = 'passage-ranking'
sequence_labeling_tokenizer = 'sequence-labeling-tokenizer'
word_segment_text_to_label_preprocessor = 'word-segment-text-to-label-preprocessor'
fill_mask = 'fill-mask'
fill_mask_ponet = 'fill-mask-ponet'
faq_question_answering_preprocessor = 'faq-question-answering-preprocessor'
conversational_text_to_sql = 'conversational-text-to-sql'
table_question_answering_preprocessor = 'table-question-answering-preprocessor'
re_tokenizer = 're-tokenizer'
document_segmentation = 'document-segmentation'



+ 2
- 0
modelscope/metrics/audio_noise_metric.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from typing import Dict

from modelscope.metainfo import Metrics


+ 2
- 2
modelscope/metrics/sequence_classification_metric.py View File

@@ -14,9 +14,9 @@ from .builder import METRICS, MetricKeys
@METRICS.register_module(
group_key=default_group, module_name=Metrics.seq_cls_metric)
class SequenceClassificationMetric(Metric):
"""The metric computation class for sequence classification classes.
"""The metric computation class for sequence classification tasks.

This metric class calculates accuracy for the whole input batches.
This metric class calculates accuracy of the whole input batches.
"""

def __init__(self, *args, **kwargs):


+ 2
- 0
modelscope/models/audio/aec/layers/activations.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import torch.nn as nn

from .layer_base import LayerBase


+ 2
- 0
modelscope/models/audio/aec/layers/affine_transform.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import numpy as np
import torch as th
import torch.nn as nn


+ 2
- 0
modelscope/models/audio/aec/layers/deep_fsmn.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import numpy as np
import torch as th
import torch.nn as nn


+ 2
- 0
modelscope/models/audio/aec/layers/layer_base.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import abc
import re



+ 2
- 0
modelscope/models/audio/aec/layers/uni_deep_fsmn.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import numpy as np
import torch as th
import torch.nn as nn


+ 2
- 0
modelscope/models/audio/aec/network/loss.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import torch
import torch.nn.functional as F



+ 2
- 0
modelscope/models/audio/aec/network/modulation_loss.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import math

import torch


+ 2
- 0
modelscope/models/audio/aec/network/se_net.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import torch
import torch.nn as nn
import torch.nn.functional as F


+ 6
- 5
modelscope/models/audio/ans/complex_nn.py View File

@@ -1,9 +1,10 @@
"""
The implementation of class ComplexConv2d, ComplexConvTranspose2d and ComplexBatchNorm2d
here is modified based on Jongho Choi(sweetcocoa@snu.ac.kr / Seoul National Univ., ESTsoft )
and publicly available at https://github.com/sweetcocoa/DeepComplexUNetPyTorch
# Copyright (c) Alibaba, Inc. and its affiliates.
#
# The implementation of class ComplexConv2d, ComplexConvTranspose2d and
# ComplexBatchNorm2d here is modified based on Jongho Choi(sweetcocoa@snu.ac.kr
# / Seoul National Univ., ESTsoft ) and publicly available at
# https://github.com/sweetcocoa/DeepComplexUNetPyTorch

"""
import torch
import torch.nn as nn
import torch.nn.functional as F


+ 7
- 5
modelscope/models/audio/ans/unet.py View File

@@ -1,8 +1,10 @@
"""
The implementation here is modified based on
Jongho Choi(sweetcocoa@snu.ac.kr / Seoul National Univ., ESTsoft )
and publicly available at https://github.com/sweetcocoa/DeepComplexUNetPyTorch
"""
# Copyright (c) Alibaba, Inc. and its affiliates.
#
# The implementation here is modified based on
# Jongho Choi(sweetcocoa@snu.ac.kr / Seoul National Univ., ESTsoft )
# and publicly available at
# https://github.com/sweetcocoa/DeepComplexUNetPyTorch

import torch
import torch.nn as nn



+ 2
- 0
modelscope/models/audio/kws/farfield/fsmn.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import numpy as np
import torch
import torch.nn as nn


+ 2
- 0
modelscope/models/audio/kws/farfield/fsmn_sele_v2.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import torch
import torch.nn as nn
import torch.nn.functional as F


+ 2
- 0
modelscope/models/audio/kws/farfield/model.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
from typing import Dict



+ 2
- 0
modelscope/models/audio/kws/farfield/model_def.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import math
import struct
from enum import Enum


+ 2
- 0
modelscope/models/base/__init__.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from .base_head import * # noqa F403
from .base_model import * # noqa F403
from .base_torch_head import * # noqa F403


+ 8
- 13
modelscope/models/base/base_head.py View File

@@ -1,6 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from abc import ABC, abstractmethod
from typing import Dict, Union
from typing import Any, Dict, Union

from modelscope.models.base.base_model import Model
from modelscope.utils.config import ConfigDict
@@ -22,25 +22,20 @@ class Head(ABC):
self.config = ConfigDict(kwargs)

@abstractmethod
def forward(self, input: Input) -> Dict[str, Tensor]:
def forward(self, *args, **kwargs) -> Dict[str, Any]:
"""
This method will use the output from backbone model to do any
downstream tasks
Args:
input: The tensor output or a model from backbone model
(text generation need a model as input)
Returns: The output from downstream taks
downstream tasks. Recieve The output from backbone model.

Returns (Dict[str, Any]): The output from downstream task.
"""
pass

@abstractmethod
def compute_loss(self, outputs: Dict[str, Tensor],
labels) -> Dict[str, Tensor]:
def compute_loss(self, *args, **kwargs) -> Dict[str, Any]:
"""
compute loss for head during the finetuning
compute loss for head during the finetuning.

Args:
outputs (Dict[str, Tensor]): the output from the model forward
Returns: the loss(Dict[str, Tensor]):
Returns (Dict[str, Any]): The loss dict
"""
pass

+ 10
- 18
modelscope/models/base/base_model.py View File

@@ -2,7 +2,7 @@
import os
import os.path as osp
from abc import ABC, abstractmethod
from typing import Callable, Dict, List, Optional, Union
from typing import Any, Callable, Dict, List, Optional, Union

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.builder import build_model
@@ -10,8 +10,6 @@ from modelscope.utils.checkpoint import save_pretrained
from modelscope.utils.config import Config
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
from modelscope.utils.device import device_placement, verify_device
from modelscope.utils.file_utils import func_receive_dict_inputs
from modelscope.utils.hub import parse_label_mapping
from modelscope.utils.logger import get_logger

logger = get_logger()
@@ -27,35 +25,31 @@ class Model(ABC):
verify_device(device_name)
self._device_name = device_name

def __call__(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
return self.postprocess(self.forward(input))
def __call__(self, *args, **kwargs) -> Dict[str, Any]:
return self.postprocess(self.forward(*args, **kwargs))

@abstractmethod
def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
def forward(self, *args, **kwargs) -> Dict[str, Any]:
"""
Run the forward pass for a model.

Args:
input (Dict[str, Tensor]): the dict of the model inputs for the forward method

Returns:
Dict[str, Tensor]: output from the model forward pass
Dict[str, Any]: output from the model forward pass
"""
pass

def postprocess(self, input: Dict[str, Tensor],
**kwargs) -> Dict[str, Tensor]:
def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]:
""" Model specific postprocess and convert model output to
standard model outputs.

Args:
input: input data
inputs: input data

Return:
dict of results: a dict containing outputs of model, each
output should have the standard output name.
"""
return input
return inputs

@classmethod
def _instantiate(cls, **kwargs):
@@ -97,7 +91,6 @@ class Model(ABC):
osp.join(local_model_dir, ModelFile.CONFIGURATION))
task_name = cfg.task
model_cfg = cfg.model
framework = cfg.framework

if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'):
model_cfg.type = model_cfg.model_type
@@ -107,9 +100,8 @@ class Model(ABC):
model_cfg[k] = v
if device is not None:
model_cfg.device = device
with device_placement(framework, device):
model = build_model(
model_cfg, task_name=task_name, default_args=kwargs)
model = build_model(
model_cfg, task_name=task_name, default_args=kwargs)
else:
model = build_model(
model_cfg, task_name=task_name, default_args=kwargs)


+ 3
- 5
modelscope/models/base/base_torch_head.py View File

@@ -1,5 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Dict
from typing import Any, Dict

import torch

@@ -18,10 +18,8 @@ class TorchHead(Head, torch.nn.Module):
super().__init__(**kwargs)
torch.nn.Module.__init__(self)

def forward(self, inputs: Dict[str,
torch.Tensor]) -> Dict[str, torch.Tensor]:
def forward(self, *args, **kwargs) -> Dict[str, Any]:
raise NotImplementedError

def compute_loss(self, outputs: Dict[str, torch.Tensor],
labels) -> Dict[str, torch.Tensor]:
def compute_loss(self, *args, **kwargs) -> Dict[str, Any]:
raise NotImplementedError

+ 6
- 7
modelscope/models/base/base_torch_model.py View File

@@ -1,6 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from typing import Any, Dict, Optional, Union
from typing import Any, Dict

import torch
from torch import nn
@@ -21,15 +21,14 @@ class TorchModel(Model, torch.nn.Module):
super().__init__(model_dir, *args, **kwargs)
torch.nn.Module.__init__(self)

def __call__(self, input: Dict[str,
torch.Tensor]) -> Dict[str, torch.Tensor]:
def __call__(self, *args, **kwargs) -> Dict[str, Any]:
# Adapting a model with only one dict arg, and the arg name must be input or inputs
if func_receive_dict_inputs(self.forward):
return self.postprocess(self.forward(input))
return self.postprocess(self.forward(args[0], **kwargs))
else:
return self.postprocess(self.forward(**input))
return self.postprocess(self.forward(*args, **kwargs))

def forward(self, inputs: Dict[str,
torch.Tensor]) -> Dict[str, torch.Tensor]:
def forward(self, *args, **kwargs) -> Dict[str, Any]:
raise NotImplementedError

def post_init(self):


+ 2
- 0
modelscope/models/cv/action_detection/action_detection_onnx.py View File

@@ -1,3 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
import os.path as osp
import shutil


+ 6
- 1
modelscope/models/cv/face_detection/__init__.py View File

@@ -4,11 +4,16 @@ from typing import TYPE_CHECKING
from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .mogface import MogFaceDetector
from .mtcnn import MtcnnFaceDetector
from .retinaface import RetinaFaceDetection

from .ulfd_slim import UlfdFaceDetector
else:
_import_structure = {
'ulfd_slim': ['UlfdFaceDetector'],
'retinaface': ['RetinaFaceDetection'],
'mtcnn': ['MtcnnFaceDetector'],
'mogface': ['MogFaceDetector']
}

import sys


+ 2
- 3
modelscope/models/cv/face_detection/mmdet_patch/__init__.py View File

@@ -1,5 +1,4 @@
"""
mmdet_patch is based on
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet,
all duplicate functions from official mmdetection are removed.
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet
"""

+ 4
- 0
modelscope/models/cv/face_detection/mmdet_patch/core/bbox/__init__.py View File

@@ -1,3 +1,7 @@
"""
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox
"""
from .transforms import bbox2result, distance2kps, kps2distance

__all__ = ['bbox2result', 'distance2kps', 'kps2distance']

+ 2
- 1
modelscope/models/cv/face_detection/mmdet_patch/core/bbox/transforms.py View File

@@ -1,5 +1,6 @@
"""
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox/transforms.py
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox/transforms.py
"""
import numpy as np
import torch


+ 4
- 0
modelscope/models/cv/face_detection/mmdet_patch/core/post_processing/__init__.py View File

@@ -1,3 +1,7 @@
"""
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py
"""
from .bbox_nms import multiclass_nms

__all__ = ['multiclass_nms']

+ 2
- 1
modelscope/models/cv/face_detection/mmdet_patch/core/post_processing/bbox_nms.py View File

@@ -1,5 +1,6 @@
"""
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py
"""
import torch



+ 4
- 0
modelscope/models/cv/face_detection/mmdet_patch/datasets/__init__.py View File

@@ -1,3 +1,7 @@
"""
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets
"""
from .retinaface import RetinaFaceDataset

__all__ = ['RetinaFaceDataset']

+ 4
- 0
modelscope/models/cv/face_detection/mmdet_patch/datasets/pipelines/__init__.py View File

@@ -1,3 +1,7 @@
"""
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines
"""
from .transforms import RandomSquareCrop

__all__ = ['RandomSquareCrop']

+ 2
- 1
modelscope/models/cv/face_detection/mmdet_patch/datasets/pipelines/transforms.py View File

@@ -1,5 +1,6 @@
"""
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/transforms.py
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/transforms.py
"""
import numpy as np
from mmdet.datasets.builder import PIPELINES


+ 2
- 1
modelscope/models/cv/face_detection/mmdet_patch/datasets/retinaface.py View File

@@ -1,5 +1,6 @@
"""
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/retinaface.py
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/retinaface.py
"""
import numpy as np
from mmdet.datasets.builder import DATASETS


+ 4
- 0
modelscope/models/cv/face_detection/mmdet_patch/models/__init__.py View File

@@ -1,2 +1,6 @@
"""
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models
"""
from .dense_heads import * # noqa: F401,F403
from .detectors import * # noqa: F401,F403

+ 4
- 0
modelscope/models/cv/face_detection/mmdet_patch/models/backbones/__init__.py View File

@@ -1,3 +1,7 @@
"""
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones
"""
from .resnet import ResNetV1e

__all__ = ['ResNetV1e']

+ 2
- 1
modelscope/models/cv/face_detection/mmdet_patch/models/backbones/resnet.py View File

@@ -1,5 +1,6 @@
"""
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones/resnet.py
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones/resnet.py
"""
import torch.nn as nn
import torch.utils.checkpoint as cp


+ 4
- 0
modelscope/models/cv/face_detection/mmdet_patch/models/dense_heads/__init__.py View File

@@ -1,3 +1,7 @@
"""
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads
"""
from .scrfd_head import SCRFDHead

__all__ = ['SCRFDHead']

+ 2
- 1
modelscope/models/cv/face_detection/mmdet_patch/models/dense_heads/scrfd_head.py View File

@@ -1,5 +1,6 @@
"""
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py
"""
import numpy as np
import torch


+ 4
- 0
modelscope/models/cv/face_detection/mmdet_patch/models/detectors/__init__.py View File

@@ -1,3 +1,7 @@
"""
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors
"""
from .scrfd import SCRFD

__all__ = ['SCRFD']

+ 2
- 1
modelscope/models/cv/face_detection/mmdet_patch/models/detectors/scrfd.py View File

@@ -1,5 +1,6 @@
"""
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors/scrfd.py
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors/scrfd.py
"""
import torch
from mmdet.models.builder import DETECTORS


+ 1
- 0
modelscope/models/cv/face_detection/mogface/__init__.py View File

@@ -0,0 +1 @@
from .models.detectors import MogFaceDetector

+ 0
- 0
modelscope/models/cv/face_detection/mogface/models/__init__.py View File


+ 96
- 0
modelscope/models/cv/face_detection/mogface/models/detectors.py View File

@@ -0,0 +1,96 @@
import os

import cv2
import numpy as np
import torch
import torch.backends.cudnn as cudnn

from modelscope.metainfo import Models
from modelscope.models.base import TorchModel
from modelscope.models.builder import MODELS
from modelscope.utils.constant import Tasks
from .mogface import MogFace
from .utils import MogPriorBox, mogdecode, py_cpu_nms


@MODELS.register_module(Tasks.face_detection, module_name=Models.mogface)
class MogFaceDetector(TorchModel):

def __init__(self, model_path, device='cuda'):
super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True
self.model_path = model_path
self.device = device
self.net = MogFace()
self.load_model()
self.net = self.net.to(device)

self.mean = np.array([[104, 117, 123]])

def load_model(self, load_to_cpu=False):
pretrained_dict = torch.load(
self.model_path, map_location=torch.device('cpu'))
self.net.load_state_dict(pretrained_dict, strict=False)
self.net.eval()

def forward(self, input):
img_raw = input['img']
img = np.array(img_raw.cpu().detach())
img = img[:, :, ::-1]

im_height, im_width = img.shape[:2]
ss = 1.0
# tricky
if max(im_height, im_width) > 1500:
ss = 1000.0 / max(im_height, im_width)
img = cv2.resize(img, (0, 0), fx=ss, fy=ss)
im_height, im_width = img.shape[:2]

scale = torch.Tensor(
[img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
img -= np.array([[103.53, 116.28, 123.675]])
img /= np.array([[57.375, 57.120003, 58.395]])
img /= 255
img = img[:, :, ::-1].copy()
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(self.device)
scale = scale.to(self.device)

conf, loc = self.net(img) # forward pass

confidence_threshold = 0.82
nms_threshold = 0.4
top_k = 5000
keep_top_k = 750

priorbox = MogPriorBox(scale_list=[0.68])
priors = priorbox(im_height, im_width)
priors = torch.tensor(priors).to(self.device)
prior_data = priors.data

boxes = mogdecode(loc.data.squeeze(0), prior_data)
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).data.cpu().numpy()[:, 0]

# ignore low scores
inds = np.where(scores > confidence_threshold)[0]
boxes = boxes[inds]
scores = scores[inds]

# keep top-K before NMS
order = scores.argsort()[::-1][:top_k]
boxes = boxes[order]
scores = scores[order]

# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(
np.float32, copy=False)
keep = py_cpu_nms(dets, nms_threshold)
dets = dets[keep, :]

# keep top-K faster NMS
dets = dets[:keep_top_k, :]

return dets / ss

+ 135
- 0
modelscope/models/cv/face_detection/mogface/models/mogface.py View File

@@ -0,0 +1,135 @@
# --------------------------------------------------------
# The implementation is also open-sourced by the authors as Yang Liu, and is available publicly on
# https://github.com/damo-cv/MogFace
# --------------------------------------------------------
import torch.nn as nn
import torch.nn.functional as F

from .mogprednet import MogPredNet
from .resnet import ResNet


class MogFace(nn.Module):

def __init__(self):
super(MogFace, self).__init__()
self.backbone = ResNet(depth=101)
self.fpn = LFPN()
self.pred_net = MogPredNet()

def forward(self, x):
feature_list = self.backbone(x)
fpn_list = self.fpn(feature_list)
pyramid_feature_list = fpn_list[0]
conf, loc = self.pred_net(pyramid_feature_list)
return conf, loc


class FeatureFusion(nn.Module):

def __init__(self, lat_ch=256, **channels):
super(FeatureFusion, self).__init__()
self.main_conv = nn.Conv2d(channels['main'], lat_ch, kernel_size=1)

def forward(self, up, main):
main = self.main_conv(main)
_, _, H, W = main.size()
res = F.upsample(up, scale_factor=2, mode='bilinear')
if res.size(2) != main.size(2) or res.size(3) != main.size(3):
res = res[:, :, 0:H, 0:W]
res = res + main
return res


class LFPN(nn.Module):

def __init__(self,
c2_out_ch=256,
c3_out_ch=512,
c4_out_ch=1024,
c5_out_ch=2048,
c6_mid_ch=512,
c6_out_ch=512,
c7_mid_ch=128,
c7_out_ch=256,
out_dsfd_ft=True):
super(LFPN, self).__init__()
self.out_dsfd_ft = out_dsfd_ft
if self.out_dsfd_ft:
dsfd_module = []
dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1))
dsfd_module.append(nn.Conv2d(512, 256, kernel_size=3, padding=1))
dsfd_module.append(nn.Conv2d(1024, 256, kernel_size=3, padding=1))
dsfd_module.append(nn.Conv2d(2048, 256, kernel_size=3, padding=1))
dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1))
dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1))
self.dsfd_modules = nn.ModuleList(dsfd_module)

c6_input_ch = c5_out_ch
self.c6 = nn.Sequential(*[
nn.Conv2d(
c6_input_ch,
c6_mid_ch,
kernel_size=1,
),
nn.BatchNorm2d(c6_mid_ch),
nn.ReLU(inplace=True),
nn.Conv2d(
c6_mid_ch, c6_out_ch, kernel_size=3, padding=1, stride=2),
nn.BatchNorm2d(c6_out_ch),
nn.ReLU(inplace=True)
])
self.c7 = nn.Sequential(*[
nn.Conv2d(
c6_out_ch,
c7_mid_ch,
kernel_size=1,
),
nn.BatchNorm2d(c7_mid_ch),
nn.ReLU(inplace=True),
nn.Conv2d(
c7_mid_ch, c7_out_ch, kernel_size=3, padding=1, stride=2),
nn.BatchNorm2d(c7_out_ch),
nn.ReLU(inplace=True)
])

self.p2_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.p3_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.p4_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1)

self.c5_lat = nn.Conv2d(c6_input_ch, 256, kernel_size=3, padding=1)
self.c6_lat = nn.Conv2d(c6_out_ch, 256, kernel_size=3, padding=1)
self.c7_lat = nn.Conv2d(c7_out_ch, 256, kernel_size=3, padding=1)

self.ff_c5_c4 = FeatureFusion(main=c4_out_ch)
self.ff_c4_c3 = FeatureFusion(main=c3_out_ch)
self.ff_c3_c2 = FeatureFusion(main=c2_out_ch)

def forward(self, feature_list):
c2, c3, c4, c5 = feature_list
c6 = self.c6(c5)
c7 = self.c7(c6)

c5 = self.c5_lat(c5)
c6 = self.c6_lat(c6)
c7 = self.c7_lat(c7)

if self.out_dsfd_ft:
dsfd_fts = []
dsfd_fts.append(self.dsfd_modules[0](c2))
dsfd_fts.append(self.dsfd_modules[1](c3))
dsfd_fts.append(self.dsfd_modules[2](c4))
dsfd_fts.append(self.dsfd_modules[3](feature_list[-1]))
dsfd_fts.append(self.dsfd_modules[4](c6))
dsfd_fts.append(self.dsfd_modules[5](c7))

p4 = self.ff_c5_c4(c5, c4)
p3 = self.ff_c4_c3(p4, c3)
p2 = self.ff_c3_c2(p3, c2)

p2 = self.p2_lat(p2)
p3 = self.p3_lat(p3)
p4 = self.p4_lat(p4)

if self.out_dsfd_ft:
return ([p2, p3, p4, c5, c6, c7], dsfd_fts)

+ 164
- 0
modelscope/models/cv/face_detection/mogface/models/mogprednet.py View File

@@ -0,0 +1,164 @@
# --------------------------------------------------------
# The implementation is also open-sourced by the authors as Yang Liu, and is available publicly on
# https://github.com/damo-cv/MogFace
# --------------------------------------------------------
import math

import torch
import torch.nn as nn
import torch.nn.functional as F


class conv_bn(nn.Module):
"""docstring for conv"""

def __init__(self, in_plane, out_plane, kernel_size, stride, padding):
super(conv_bn, self).__init__()
self.conv1 = nn.Conv2d(
in_plane,
out_plane,
kernel_size=kernel_size,
stride=stride,
padding=padding)
self.bn1 = nn.BatchNorm2d(out_plane)

def forward(self, x):
x = self.conv1(x)
return self.bn1(x)


class SSHContext(nn.Module):

def __init__(self, channels, Xchannels=256):
super(SSHContext, self).__init__()

self.conv1 = nn.Conv2d(
channels, Xchannels, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(
channels,
Xchannels // 2,
kernel_size=3,
dilation=2,
stride=1,
padding=2)
self.conv2_1 = nn.Conv2d(
Xchannels // 2, Xchannels // 2, kernel_size=3, stride=1, padding=1)
self.conv2_2 = nn.Conv2d(
Xchannels // 2,
Xchannels // 2,
kernel_size=3,
dilation=2,
stride=1,
padding=2)
self.conv2_2_1 = nn.Conv2d(
Xchannels // 2, Xchannels // 2, kernel_size=3, stride=1, padding=1)

def forward(self, x):
x1 = F.relu(self.conv1(x), inplace=True)
x2 = F.relu(self.conv2(x), inplace=True)
x2_1 = F.relu(self.conv2_1(x2), inplace=True)
x2_2 = F.relu(self.conv2_2(x2), inplace=True)
x2_2 = F.relu(self.conv2_2_1(x2_2), inplace=True)

return torch.cat([x1, x2_1, x2_2], 1)


class DeepHead(nn.Module):

def __init__(self,
in_channel=256,
out_channel=256,
use_gn=False,
num_conv=4):
super(DeepHead, self).__init__()
self.use_gn = use_gn
self.num_conv = num_conv
self.conv1 = nn.Conv2d(in_channel, out_channel, 3, 1, 1)
self.conv2 = nn.Conv2d(out_channel, out_channel, 3, 1, 1)
self.conv3 = nn.Conv2d(out_channel, out_channel, 3, 1, 1)
self.conv4 = nn.Conv2d(out_channel, out_channel, 3, 1, 1)
if self.use_gn:
self.gn1 = nn.GroupNorm(16, out_channel)
self.gn2 = nn.GroupNorm(16, out_channel)
self.gn3 = nn.GroupNorm(16, out_channel)
self.gn4 = nn.GroupNorm(16, out_channel)

def forward(self, x):
if self.use_gn:
x1 = F.relu(self.gn1(self.conv1(x)), inplace=True)
x2 = F.relu(self.gn2(self.conv1(x1)), inplace=True)
x3 = F.relu(self.gn3(self.conv1(x2)), inplace=True)
x4 = F.relu(self.gn4(self.conv1(x3)), inplace=True)
else:
x1 = F.relu(self.conv1(x), inplace=True)
x2 = F.relu(self.conv1(x1), inplace=True)
if self.num_conv == 2:
return x2
x3 = F.relu(self.conv1(x2), inplace=True)
x4 = F.relu(self.conv1(x3), inplace=True)

return x4


class MogPredNet(nn.Module):

def __init__(self,
num_anchor_per_pixel=1,
num_classes=1,
input_ch_list=[256, 256, 256, 256, 256, 256],
use_deep_head=True,
deep_head_with_gn=True,
use_ssh=True,
deep_head_ch=512):
super(MogPredNet, self).__init__()
self.num_classes = num_classes
self.use_deep_head = use_deep_head
self.deep_head_with_gn = deep_head_with_gn

self.use_ssh = use_ssh

self.deep_head_ch = deep_head_ch

if self.use_ssh:
self.conv_SSH = SSHContext(input_ch_list[0],
self.deep_head_ch // 2)

if self.use_deep_head:
if self.deep_head_with_gn:
self.deep_loc_head = DeepHead(
self.deep_head_ch, self.deep_head_ch, use_gn=True)
self.deep_cls_head = DeepHead(
self.deep_head_ch, self.deep_head_ch, use_gn=True)

self.pred_cls = nn.Conv2d(self.deep_head_ch,
1 * num_anchor_per_pixel, 3, 1, 1)
self.pred_loc = nn.Conv2d(self.deep_head_ch,
4 * num_anchor_per_pixel, 3, 1, 1)

self.sigmoid = nn.Sigmoid()

def forward(self, pyramid_feature_list, dsfd_ft_list=None):
loc = []
conf = []

if self.use_deep_head:
for x in pyramid_feature_list:
if self.use_ssh:
x = self.conv_SSH(x)
x_cls = self.deep_cls_head(x)
x_loc = self.deep_loc_head(x)

conf.append(
self.pred_cls(x_cls).permute(0, 2, 3, 1).contiguous())
loc.append(
self.pred_loc(x_loc).permute(0, 2, 3, 1).contiguous())

loc = torch.cat([o.view(o.size(0), -1, 4) for o in loc], 1)
conf = torch.cat(
[o.view(o.size(0), -1, self.num_classes) for o in conf], 1)
output = (
self.sigmoid(conf.view(conf.size(0), -1, self.num_classes)),
loc.view(loc.size(0), -1, 4),
)

return output

+ 193
- 0
modelscope/models/cv/face_detection/mogface/models/resnet.py View File

@@ -0,0 +1,193 @@
# The implementation is modified from original resent implementaiton, which is
# also open-sourced by the authors as Yang Liu,
# and is available publicly on https://github.com/damo-cv/MogFace

import torch.nn as nn


def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=dilation,
groups=groups,
bias=False,
dilation=dilation)


def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(
in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class Bottleneck(nn.Module):
expansion = 4

def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
groups=1,
base_width=64,
dilation=1,
norm_layer=None):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride

def forward(self, x):
identity = x

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)

out = self.conv3(out)
out = self.bn3(out)

if self.downsample is not None:
identity = self.downsample(x)

out += identity
out = self.relu(out)

return out


class ResNet(nn.Module):

def __init__(self,
depth=50,
groups=1,
width_per_group=64,
replace_stride_with_dilation=None,
norm_layer=None,
inplanes=64,
shrink_ch_ratio=1):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer

if depth == 50:
block = Bottleneck
layers = [3, 4, 6, 3]
elif depth == 101:
block = Bottleneck
layers = [3, 4, 23, 3]
elif depth == 152:
block = Bottleneck
layers = [3, 4, 36, 3]
elif depth == 18:
block = BasicBlock
layers = [2, 2, 2, 2]
else:
raise ValueError('only support depth in [18, 50, 101, 152]')

shrink_input_ch = int(inplanes * shrink_ch_ratio)
self.inplanes = int(inplanes * shrink_ch_ratio)
if shrink_ch_ratio == 0.125:
layers = [2, 3, 3, 3]

self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError('replace_stride_with_dilation should be None '
'or a 3-element tuple, got {}'.format(
replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(
3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, shrink_input_ch, layers[0])
self.layer2 = self._make_layer(
block,
shrink_input_ch * 2,
layers[1],
stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(
block,
shrink_input_ch * 4,
layers[2],
stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(
block,
shrink_input_ch * 8,
layers[3],
stride=2,
dilate=replace_stride_with_dilation[2])

def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)

layers = []
layers.append(
block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(
block(
self.inplanes,
planes,
groups=self.groups,
base_width=self.base_width,
dilation=self.dilation,
norm_layer=norm_layer))

return nn.Sequential(*layers)

def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
four_conv_layer = []
x = self.layer1(x)
four_conv_layer.append(x)
x = self.layer2(x)
four_conv_layer.append(x)
x = self.layer3(x)
four_conv_layer.append(x)
x = self.layer4(x)
four_conv_layer.append(x)

return four_conv_layer

+ 212
- 0
modelscope/models/cv/face_detection/mogface/models/utils.py View File

@@ -0,0 +1,212 @@
# Modified from https://github.com/biubug6/Pytorch_Retinaface

import math
from itertools import product as product
from math import ceil

import numpy as np
import torch


def transform_anchor(anchors):
"""
from [x0, x1, y0, y1] to [c_x, cy, w, h]
x1 = x0 + w - 1
c_x = (x0 + x1) / 2 = (2x0 + w - 1) / 2 = x0 + (w - 1) / 2
"""
return np.concatenate(((anchors[:, :2] + anchors[:, 2:]) / 2,
anchors[:, 2:] - anchors[:, :2] + 1),
axis=1)


def normalize_anchor(anchors):
"""
from [c_x, cy, w, h] to [x0, x1, y0, y1]
"""
item_1 = anchors[:, :2] - (anchors[:, 2:] - 1) / 2
item_2 = anchors[:, :2] + (anchors[:, 2:] - 1) / 2
return np.concatenate((item_1, item_2), axis=1)


class MogPriorBox(object):
"""
both for fpn and single layer, single layer need to test
return (np.array) [num_anchros, 4] [x0, y0, x1, y1]
"""

def __init__(self,
scale_list=[1.],
aspect_ratio_list=[1.0],
stride_list=[4, 8, 16, 32, 64, 128],
anchor_size_list=[16, 32, 64, 128, 256, 512]):
self.scale_list = scale_list
self.aspect_ratio_list = aspect_ratio_list
self.stride_list = stride_list
self.anchor_size_list = anchor_size_list

def __call__(self, img_height, img_width):
final_anchor_list = []

for idx, stride in enumerate(self.stride_list):
anchor_list = []
cur_img_height = img_height
cur_img_width = img_width
tmp_stride = stride

while tmp_stride != 1:
tmp_stride = tmp_stride // 2
cur_img_height = (cur_img_height + 1) // 2
cur_img_width = (cur_img_width + 1) // 2

for i in range(cur_img_height):
for j in range(cur_img_width):
for scale in self.scale_list:
cx = (j + 0.5) * stride
cy = (i + 0.5) * stride
side_x = self.anchor_size_list[idx] * scale
side_y = self.anchor_size_list[idx] * scale
for ratio in self.aspect_ratio_list:
anchor_list.append([
cx, cy, side_x / math.sqrt(ratio),
side_y * math.sqrt(ratio)
])

final_anchor_list.append(anchor_list)
final_anchor_arr = np.concatenate(final_anchor_list, axis=0)
normalized_anchor_arr = normalize_anchor(final_anchor_arr).astype(
'float32')
transformed_anchor = transform_anchor(normalized_anchor_arr)

return transformed_anchor


class PriorBox(object):

def __init__(self, cfg, image_size=None, phase='train'):
super(PriorBox, self).__init__()
self.min_sizes = cfg['min_sizes']
self.steps = cfg['steps']
self.clip = cfg['clip']
self.image_size = image_size
self.feature_maps = [[
ceil(self.image_size[0] / step),
ceil(self.image_size[1] / step)
] for step in self.steps]
self.name = 's'

def forward(self):
anchors = []
for k, f in enumerate(self.feature_maps):
min_sizes = self.min_sizes[k]
for i, j in product(range(f[0]), range(f[1])):
for min_size in min_sizes:
s_kx = min_size / self.image_size[1]
s_ky = min_size / self.image_size[0]
dense_cx = [
x * self.steps[k] / self.image_size[1]
for x in [j + 0.5]
]
dense_cy = [
y * self.steps[k] / self.image_size[0]
for y in [i + 0.5]
]
for cy, cx in product(dense_cy, dense_cx):
anchors += [cx, cy, s_kx, s_ky]

# back to torch land
output = torch.Tensor(anchors).view(-1, 4)
if self.clip:
output.clamp_(max=1, min=0)
return output


def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]

areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]

keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])

w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)

inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]

return keep


def mogdecode(loc, anchors):
"""
loc: torch.Tensor
anchors: 2-d, torch.Tensor (cx, cy, w, h)
boxes: 2-d, torch.Tensor (x0, y0, x1, y1)
"""

boxes = torch.cat((anchors[:, :2] + loc[:, :2] * anchors[:, 2:],
anchors[:, 2:] * torch.exp(loc[:, 2:])), 1)

boxes[:, 0] -= (boxes[:, 2] - 1) / 2
boxes[:, 1] -= (boxes[:, 3] - 1) / 2
boxes[:, 2] += boxes[:, 0] - 1
boxes[:, 3] += boxes[:, 1] - 1

return boxes


# Adapted from https://github.com/Hakuyume/chainer-ssd
def decode(loc, priors, variances):
"""Decode locations from predictions using priors to undo
the encoding we did for offset regression at train time.
Args:
loc (tensor): location predictions for loc layers,
Shape: [num_priors,4]
priors (tensor): Prior boxes in center-offset form.
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
decoded bounding box predictions
"""

boxes = torch.cat(
(priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes


def decode_landm(pre, priors, variances):
"""Decode landm from predictions using priors to undo
the encoding we did for offset regression at train time.
Args:
pre (tensor): landm predictions for loc layers,
Shape: [num_priors,10]
priors (tensor): Prior boxes in center-offset form.
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
decoded landm predictions
"""
a = priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:]
b = priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:]
c = priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:]
d = priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:]
e = priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]
landms = torch.cat((a, b, c, d, e), dim=1)
return landms

+ 1
- 0
modelscope/models/cv/face_detection/mtcnn/__init__.py View File

@@ -0,0 +1 @@
from .models.detector import MtcnnFaceDetector

+ 0
- 0
modelscope/models/cv/face_detection/mtcnn/models/__init__.py View File


+ 240
- 0
modelscope/models/cv/face_detection/mtcnn/models/box_utils.py View File

@@ -0,0 +1,240 @@
# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch
import numpy as np
from PIL import Image


def nms(boxes, overlap_threshold=0.5, mode='union'):
"""Non-maximum suppression.

Arguments:
boxes: a float numpy array of shape [n, 5],
where each row is (xmin, ymin, xmax, ymax, score).
overlap_threshold: a float number.
mode: 'union' or 'min'.

Returns:
list with indices of the selected boxes
"""

# if there are no boxes, return the empty list
if len(boxes) == 0:
return []

# list of picked indices
pick = []

# grab the coordinates of the bounding boxes
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]

area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0)
ids = np.argsort(score) # in increasing order

while len(ids) > 0:

# grab index of the largest value
last = len(ids) - 1
i = ids[last]
pick.append(i)

# compute intersections
# of the box with the largest score
# with the rest of boxes

# left top corner of intersection boxes
ix1 = np.maximum(x1[i], x1[ids[:last]])
iy1 = np.maximum(y1[i], y1[ids[:last]])

# right bottom corner of intersection boxes
ix2 = np.minimum(x2[i], x2[ids[:last]])
iy2 = np.minimum(y2[i], y2[ids[:last]])

# width and height of intersection boxes
w = np.maximum(0.0, ix2 - ix1 + 1.0)
h = np.maximum(0.0, iy2 - iy1 + 1.0)

# intersections' areas
inter = w * h
if mode == 'min':
overlap = inter / np.minimum(area[i], area[ids[:last]])
elif mode == 'union':
# intersection over union (IoU)
overlap = inter / (area[i] + area[ids[:last]] - inter)

# delete all boxes where overlap is too big
ids = np.delete(
ids,
np.concatenate([[last],
np.where(overlap > overlap_threshold)[0]]))

return pick


def convert_to_square(bboxes):
"""Convert bounding boxes to a square form.

Arguments:
bboxes: a float numpy array of shape [n, 5].

Returns:
a float numpy array of shape [n, 5],
squared bounding boxes.
"""

square_bboxes = np.zeros_like(bboxes)
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
h = y2 - y1 + 1.0
w = x2 - x1 + 1.0
max_side = np.maximum(h, w)
square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5
square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5
square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
return square_bboxes


def calibrate_box(bboxes, offsets):
"""Transform bounding boxes to be more like true bounding boxes.
'offsets' is one of the outputs of the nets.

Arguments:
bboxes: a float numpy array of shape [n, 5].
offsets: a float numpy array of shape [n, 4].

Returns:
a float numpy array of shape [n, 5].
"""
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
w = x2 - x1 + 1.0
h = y2 - y1 + 1.0
w = np.expand_dims(w, 1)
h = np.expand_dims(h, 1)

# this is what happening here:
# tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
# x1_true = x1 + tx1*w
# y1_true = y1 + ty1*h
# x2_true = x2 + tx2*w
# y2_true = y2 + ty2*h
# below is just more compact form of this

# are offsets always such that
# x1 < x2 and y1 < y2 ?

translation = np.hstack([w, h, w, h]) * offsets
bboxes[:, 0:4] = bboxes[:, 0:4] + translation
return bboxes


def get_image_boxes(bounding_boxes, img, size=24):
"""Cut out boxes from the image.

Arguments:
bounding_boxes: a float numpy array of shape [n, 5].
img: an instance of PIL.Image.
size: an integer, size of cutouts.

Returns:
a float numpy array of shape [n, 3, size, size].
"""

num_boxes = len(bounding_boxes)
width, height = img.size

[dy, edy, dx, edx, y, ey, x, ex, w,
h] = correct_bboxes(bounding_boxes, width, height)
img_boxes = np.zeros((num_boxes, 3, size, size), 'float32')

for i in range(num_boxes):
img_box = np.zeros((h[i], w[i], 3), 'uint8')

img_array = np.asarray(img, 'uint8')
img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\
img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]

# resize
img_box = Image.fromarray(img_box)
img_box = img_box.resize((size, size), Image.BILINEAR)
img_box = np.asarray(img_box, 'float32')

img_boxes[i, :, :, :] = _preprocess(img_box)

return img_boxes


def correct_bboxes(bboxes, width, height):
"""Crop boxes that are too big and get coordinates
with respect to cutouts.

Arguments:
bboxes: a float numpy array of shape [n, 5],
where each row is (xmin, ymin, xmax, ymax, score).
width: a float number.
height: a float number.

Returns:
dy, dx, edy, edx: a int numpy arrays of shape [n],
coordinates of the boxes with respect to the cutouts.
y, x, ey, ex: a int numpy arrays of shape [n],
corrected ymin, xmin, ymax, xmax.
h, w: a int numpy arrays of shape [n],
just heights and widths of boxes.

in the following order:
[dy, edy, dx, edx, y, ey, x, ex, w, h].
"""

x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
w, h = x2 - x1 + 1.0, y2 - y1 + 1.0
num_boxes = bboxes.shape[0]

# 'e' stands for end
# (x, y) -> (ex, ey)
x, y, ex, ey = x1, y1, x2, y2

# we need to cut out a box from the image.
# (x, y, ex, ey) are corrected coordinates of the box
# in the image.
# (dx, dy, edx, edy) are coordinates of the box in the cutout
# from the image.
dx, dy = np.zeros((num_boxes, )), np.zeros((num_boxes, ))
edx, edy = w.copy() - 1.0, h.copy() - 1.0

# if box's bottom right corner is too far right
ind = np.where(ex > width - 1.0)[0]
edx[ind] = w[ind] + width - 2.0 - ex[ind]
ex[ind] = width - 1.0

# if box's bottom right corner is too low
ind = np.where(ey > height - 1.0)[0]
edy[ind] = h[ind] + height - 2.0 - ey[ind]
ey[ind] = height - 1.0

# if box's top left corner is too far left
ind = np.where(x < 0.0)[0]
dx[ind] = 0.0 - x[ind]
x[ind] = 0.0

# if box's top left corner is too high
ind = np.where(y < 0.0)[0]
dy[ind] = 0.0 - y[ind]
y[ind] = 0.0

return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
return_list = [i.astype('int32') for i in return_list]

return return_list


def _preprocess(img):
"""Preprocessing step before feeding the network.

Arguments:
img: a float numpy array of shape [h, w, c].

Returns:
a float numpy array of shape [1, c, h, w].
"""
img = img.transpose((2, 0, 1))
img = np.expand_dims(img, 0)
img = (img - 127.5) * 0.0078125
return img

+ 149
- 0
modelscope/models/cv/face_detection/mtcnn/models/detector.py View File

@@ -0,0 +1,149 @@
# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch
import os

import numpy as np
import torch
import torch.backends.cudnn as cudnn
from PIL import Image
from torch.autograd import Variable

from modelscope.metainfo import Models
from modelscope.models.base import TorchModel
from modelscope.models.builder import MODELS
from modelscope.utils.constant import Tasks
from .box_utils import calibrate_box, convert_to_square, get_image_boxes, nms
from .first_stage import run_first_stage
from .get_nets import ONet, PNet, RNet


@MODELS.register_module(Tasks.face_detection, module_name=Models.mtcnn)
class MtcnnFaceDetector(TorchModel):

def __init__(self, model_path, device='cuda'):
super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True
self.model_path = model_path
self.device = device

self.pnet = PNet(model_path=os.path.join(self.model_path, 'pnet.npy'))
self.rnet = RNet(model_path=os.path.join(self.model_path, 'rnet.npy'))
self.onet = ONet(model_path=os.path.join(self.model_path, 'onet.npy'))

self.pnet = self.pnet.to(device)
self.rnet = self.rnet.to(device)
self.onet = self.onet.to(device)

def forward(self, input):
image = Image.fromarray(np.uint8(input['img'].cpu().numpy()))
pnet = self.pnet
rnet = self.rnet
onet = self.onet
onet.eval()

min_face_size = 20.0
thresholds = [0.7, 0.8, 0.9]
nms_thresholds = [0.7, 0.7, 0.7]

# BUILD AN IMAGE PYRAMID
width, height = image.size
min_length = min(height, width)

min_detection_size = 12
factor = 0.707 # sqrt(0.5)

# scales for scaling the image
scales = []

m = min_detection_size / min_face_size
min_length *= m

factor_count = 0
while min_length > min_detection_size:
scales.append(m * factor**factor_count)
min_length *= factor
factor_count += 1

# STAGE 1

# it will be returned
bounding_boxes = []

# run P-Net on different scales
for s in scales:
boxes = run_first_stage(
image,
pnet,
scale=s,
threshold=thresholds[0],
device=self.device)
bounding_boxes.append(boxes)

# collect boxes (and offsets, and scores) from different scales
bounding_boxes = [i for i in bounding_boxes if i is not None]
bounding_boxes = np.vstack(bounding_boxes)

keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
bounding_boxes = bounding_boxes[keep]

# use offsets predicted by pnet to transform bounding boxes
bounding_boxes = calibrate_box(bounding_boxes[:, 0:5],
bounding_boxes[:, 5:])
# shape [n_boxes, 5]

bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])

# STAGE 2

img_boxes = get_image_boxes(bounding_boxes, image, size=24)
img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True)
output = rnet(img_boxes.to(self.device))
offsets = output[0].cpu().data.numpy() # shape [n_boxes, 4]
probs = output[1].cpu().data.numpy() # shape [n_boxes, 2]

keep = np.where(probs[:, 1] > thresholds[1])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
offsets = offsets[keep]

keep = nms(bounding_boxes, nms_thresholds[1])
bounding_boxes = bounding_boxes[keep]
bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
bounding_boxes = convert_to_square(bounding_boxes)
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])

# STAGE 3

img_boxes = get_image_boxes(bounding_boxes, image, size=48)
if len(img_boxes) == 0:
return [], []
img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True)
output = onet(img_boxes.to(self.device))
landmarks = output[0].cpu().data.numpy() # shape [n_boxes, 10]
offsets = output[1].cpu().data.numpy() # shape [n_boxes, 4]
probs = output[2].cpu().data.numpy() # shape [n_boxes, 2]

keep = np.where(probs[:, 1] > thresholds[2])[0]
bounding_boxes = bounding_boxes[keep]
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
offsets = offsets[keep]
landmarks = landmarks[keep]

# compute landmark points
width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
landmarks[:, 0:5] = np.expand_dims(
xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
landmarks[:, 5:10] = np.expand_dims(
ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]

bounding_boxes = calibrate_box(bounding_boxes, offsets)
keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
bounding_boxes = bounding_boxes[keep]
landmarks = landmarks[keep]
landmarks = landmarks.reshape(-1, 2, 5).transpose(
(0, 2, 1)).reshape(-1, 10)

return bounding_boxes, landmarks

+ 100
- 0
modelscope/models/cv/face_detection/mtcnn/models/first_stage.py View File

@@ -0,0 +1,100 @@
# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch
import math

import numpy as np
import torch
from PIL import Image
from torch.autograd import Variable

from .box_utils import _preprocess, nms


def run_first_stage(image, net, scale, threshold, device='cuda'):
"""Run P-Net, generate bounding boxes, and do NMS.

Arguments:
image: an instance of PIL.Image.
net: an instance of pytorch's nn.Module, P-Net.
scale: a float number,
scale width and height of the image by this number.
threshold: a float number,
threshold on the probability of a face when generating
bounding boxes from predictions of the net.

Returns:
a float numpy array of shape [n_boxes, 9],
bounding boxes with scores and offsets (4 + 1 + 4).
"""

# scale the image and convert it to a float array
width, height = image.size
sw, sh = math.ceil(width * scale), math.ceil(height * scale)
img = image.resize((sw, sh), Image.BILINEAR)
img = np.asarray(img, 'float32')

img = Variable(
torch.FloatTensor(_preprocess(img)), volatile=True).to(device)
output = net(img)
probs = output[1].cpu().data.numpy()[0, 1, :, :]
offsets = output[0].cpu().data.numpy()
# probs: probability of a face at each sliding window
# offsets: transformations to true bounding boxes

boxes = _generate_bboxes(probs, offsets, scale, threshold)
if len(boxes) == 0:
return None

keep = nms(boxes[:, 0:5], overlap_threshold=0.5)
return boxes[keep]


def _generate_bboxes(probs, offsets, scale, threshold):
"""Generate bounding boxes at places
where there is probably a face.

Arguments:
probs: a float numpy array of shape [n, m].
offsets: a float numpy array of shape [1, 4, n, m].
scale: a float number,
width and height of the image were scaled by this number.
threshold: a float number.

Returns:
a float numpy array of shape [n_boxes, 9]
"""

# applying P-Net is equivalent, in some sense, to
# moving 12x12 window with stride 2
stride = 2
cell_size = 12

# indices of boxes where there is probably a face
inds = np.where(probs > threshold)

if inds[0].size == 0:
return np.array([])

# transformations of bounding boxes
tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)]
# they are defined as:
# w = x2 - x1 + 1
# h = y2 - y1 + 1
# x1_true = x1 + tx1*w
# x2_true = x2 + tx2*w
# y1_true = y1 + ty1*h
# y2_true = y2 + ty2*h

offsets = np.array([tx1, ty1, tx2, ty2])
score = probs[inds[0], inds[1]]

# P-Net is applied to scaled images
# so we need to rescale bounding boxes back
bounding_boxes = np.vstack([
np.round((stride * inds[1] + 1.0) / scale),
np.round((stride * inds[0] + 1.0) / scale),
np.round((stride * inds[1] + 1.0 + cell_size) / scale),
np.round((stride * inds[0] + 1.0 + cell_size) / scale), score, offsets
])
# why one is added?

return bounding_boxes.T

+ 160
- 0
modelscope/models/cv/face_detection/mtcnn/models/get_nets.py View File

@@ -0,0 +1,160 @@
# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch
from collections import OrderedDict

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F


class Flatten(nn.Module):

def __init__(self):
super(Flatten, self).__init__()

def forward(self, x):
"""
Arguments:
x: a float tensor with shape [batch_size, c, h, w].
Returns:
a float tensor with shape [batch_size, c*h*w].
"""

# without this pretrained model isn't working
x = x.transpose(3, 2).contiguous()

return x.view(x.size(0), -1)


class PNet(nn.Module):

def __init__(self, model_path=None):

super(PNet, self).__init__()

# suppose we have input with size HxW, then
# after first layer: H - 2,
# after pool: ceil((H - 2)/2),
# after second conv: ceil((H - 2)/2) - 2,
# after last conv: ceil((H - 2)/2) - 4,
# and the same for W

self.features = nn.Sequential(
OrderedDict([('conv1', nn.Conv2d(3, 10, 3, 1)),
('prelu1', nn.PReLU(10)),
('pool1', nn.MaxPool2d(2, 2, ceil_mode=True)),
('conv2', nn.Conv2d(10, 16, 3, 1)),
('prelu2', nn.PReLU(16)),
('conv3', nn.Conv2d(16, 32, 3, 1)),
('prelu3', nn.PReLU(32))]))

self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
self.conv4_2 = nn.Conv2d(32, 4, 1, 1)

weights = np.load(model_path, allow_pickle=True)[()]
for n, p in self.named_parameters():
p.data = torch.FloatTensor(weights[n])

def forward(self, x):
"""
Arguments:
x: a float tensor with shape [batch_size, 3, h, w].
Returns:
b: a float tensor with shape [batch_size, 4, h', w'].
a: a float tensor with shape [batch_size, 2, h', w'].
"""
x = self.features(x)
a = self.conv4_1(x)
b = self.conv4_2(x)
a = F.softmax(a)
return b, a


class RNet(nn.Module):

def __init__(self, model_path=None):

super(RNet, self).__init__()

self.features = nn.Sequential(
OrderedDict([('conv1', nn.Conv2d(3, 28, 3, 1)),
('prelu1', nn.PReLU(28)),
('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
('conv2', nn.Conv2d(28, 48, 3, 1)),
('prelu2', nn.PReLU(48)),
('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
('conv3', nn.Conv2d(48, 64, 2, 1)),
('prelu3', nn.PReLU(64)), ('flatten', Flatten()),
('conv4', nn.Linear(576, 128)),
('prelu4', nn.PReLU(128))]))

self.conv5_1 = nn.Linear(128, 2)
self.conv5_2 = nn.Linear(128, 4)

weights = np.load(model_path, allow_pickle=True)[()]
for n, p in self.named_parameters():
p.data = torch.FloatTensor(weights[n])

def forward(self, x):
"""
Arguments:
x: a float tensor with shape [batch_size, 3, h, w].
Returns:
b: a float tensor with shape [batch_size, 4].
a: a float tensor with shape [batch_size, 2].
"""
x = self.features(x)
a = self.conv5_1(x)
b = self.conv5_2(x)
a = F.softmax(a)
return b, a


class ONet(nn.Module):

def __init__(self, model_path=None):

super(ONet, self).__init__()

self.features = nn.Sequential(
OrderedDict([
('conv1', nn.Conv2d(3, 32, 3, 1)),
('prelu1', nn.PReLU(32)),
('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
('conv2', nn.Conv2d(32, 64, 3, 1)),
('prelu2', nn.PReLU(64)),
('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
('conv3', nn.Conv2d(64, 64, 3, 1)),
('prelu3', nn.PReLU(64)),
('pool3', nn.MaxPool2d(2, 2, ceil_mode=True)),
('conv4', nn.Conv2d(64, 128, 2, 1)),
('prelu4', nn.PReLU(128)),
('flatten', Flatten()),
('conv5', nn.Linear(1152, 256)),
('drop5', nn.Dropout(0.25)),
('prelu5', nn.PReLU(256)),
]))

self.conv6_1 = nn.Linear(256, 2)
self.conv6_2 = nn.Linear(256, 4)
self.conv6_3 = nn.Linear(256, 10)

weights = np.load(model_path, allow_pickle=True)[()]
for n, p in self.named_parameters():
p.data = torch.FloatTensor(weights[n])

def forward(self, x):
"""
Arguments:
x: a float tensor with shape [batch_size, 3, h, w].
Returns:
c: a float tensor with shape [batch_size, 10].
b: a float tensor with shape [batch_size, 4].
a: a float tensor with shape [batch_size, 2].
"""
x = self.features(x)
a = self.conv6_1(x)
b = self.conv6_2(x)
c = self.conv6_3(x)
a = F.softmax(a)
return c, b, a

+ 1
- 0
modelscope/models/cv/face_detection/ulfd_slim/__init__.py View File

@@ -0,0 +1 @@
from .detection import UlfdFaceDetector

+ 44
- 0
modelscope/models/cv/face_detection/ulfd_slim/detection.py View File

@@ -0,0 +1,44 @@
# The implementation is based on ULFD, available at
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
import os

import cv2
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn.functional as F

from modelscope.metainfo import Models
from modelscope.models.base import Tensor, TorchModel
from modelscope.models.builder import MODELS
from modelscope.utils.constant import ModelFile, Tasks
from .vision.ssd.fd_config import define_img_size
from .vision.ssd.mb_tiny_fd import (create_mb_tiny_fd,
create_mb_tiny_fd_predictor)

define_img_size(640)


@MODELS.register_module(Tasks.face_detection, module_name=Models.ulfd)
class UlfdFaceDetector(TorchModel):

def __init__(self, model_path, device='cuda'):
super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True
self.model_path = model_path
self.device = device
self.net = create_mb_tiny_fd(2, is_test=True, device=device)
self.predictor = create_mb_tiny_fd_predictor(
self.net, candidate_size=1500, device=device)
self.net.load(model_path)
self.net = self.net.to(device)

def forward(self, input):
img_raw = input['img']
img = np.array(img_raw.cpu().detach())
img = img[:, :, ::-1]
prob_th = 0.85
keep_top_k = 750
boxes, labels, probs = self.predictor.predict(img, keep_top_k, prob_th)
return boxes, probs

+ 0
- 0
modelscope/models/cv/face_detection/ulfd_slim/vision/__init__.py View File


+ 124
- 0
modelscope/models/cv/face_detection/ulfd_slim/vision/box_utils.py View File

@@ -0,0 +1,124 @@
# The implementation is based on ULFD, available at
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
import math

import torch


def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
"""

Args:
box_scores (N, 5): boxes in corner-form and probabilities.
iou_threshold: intersection over union threshold.
top_k: keep top_k results. If k <= 0, keep all the results.
candidate_size: only consider the candidates with the highest scores.
Returns:
picked: a list of indexes of the kept boxes
"""
scores = box_scores[:, -1]
boxes = box_scores[:, :-1]
picked = []
_, indexes = scores.sort(descending=True)
indexes = indexes[:candidate_size]
while len(indexes) > 0:
current = indexes[0]
picked.append(current.item())
if 0 < top_k == len(picked) or len(indexes) == 1:
break
current_box = boxes[current, :]
indexes = indexes[1:]
rest_boxes = boxes[indexes, :]
iou = iou_of(
rest_boxes,
current_box.unsqueeze(0),
)
indexes = indexes[iou <= iou_threshold]

return box_scores[picked, :]


def nms(box_scores,
nms_method=None,
score_threshold=None,
iou_threshold=None,
sigma=0.5,
top_k=-1,
candidate_size=200):
return hard_nms(
box_scores, iou_threshold, top_k, candidate_size=candidate_size)


def generate_priors(feature_map_list,
shrinkage_list,
image_size,
min_boxes,
clamp=True) -> torch.Tensor:
priors = []
for index in range(0, len(feature_map_list[0])):
scale_w = image_size[0] / shrinkage_list[0][index]
scale_h = image_size[1] / shrinkage_list[1][index]
for j in range(0, feature_map_list[1][index]):
for i in range(0, feature_map_list[0][index]):
x_center = (i + 0.5) / scale_w
y_center = (j + 0.5) / scale_h

for min_box in min_boxes[index]:
w = min_box / image_size[0]
h = min_box / image_size[1]
priors.append([x_center, y_center, w, h])
priors = torch.tensor(priors)
if clamp:
torch.clamp(priors, 0.0, 1.0, out=priors)
return priors


def convert_locations_to_boxes(locations, priors, center_variance,
size_variance):
# priors can have one dimension less.
if priors.dim() + 1 == locations.dim():
priors = priors.unsqueeze(0)
a = locations[..., :2] * center_variance * priors[...,
2:] + priors[..., :2]
b = torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:]

return torch.cat([a, b], dim=locations.dim() - 1)


def center_form_to_corner_form(locations):
a = locations[..., :2] - locations[..., 2:] / 2
b = locations[..., :2] + locations[..., 2:] / 2
return torch.cat([a, b], locations.dim() - 1)


def iou_of(boxes0, boxes1, eps=1e-5):
"""Return intersection-over-union (Jaccard index) of boxes.

Args:
boxes0 (N, 4): ground truth boxes.
boxes1 (N or 1, 4): predicted boxes.
eps: a small number to avoid 0 as denominator.
Returns:
iou (N): IoU values.
"""
overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2])
overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:])

overlap_area = area_of(overlap_left_top, overlap_right_bottom)
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
return overlap_area / (area0 + area1 - overlap_area + eps)


def area_of(left_top, right_bottom) -> torch.Tensor:
"""Compute the areas of rectangles given two corners.

Args:
left_top (N, 2): left top corner.
right_bottom (N, 2): right bottom corner.

Returns:
area (N): return the area.
"""
hw = torch.clamp(right_bottom - left_top, min=0.0)
return hw[..., 0] * hw[..., 1]

+ 49
- 0
modelscope/models/cv/face_detection/ulfd_slim/vision/mb_tiny.py View File

@@ -0,0 +1,49 @@
# The implementation is based on ULFD, available at
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
import torch.nn as nn
import torch.nn.functional as F


class Mb_Tiny(nn.Module):

def __init__(self, num_classes=2):
super(Mb_Tiny, self).__init__()
self.base_channel = 8 * 2

def conv_bn(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup), nn.ReLU(inplace=True))

def conv_dw(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
nn.BatchNorm2d(inp),
nn.ReLU(inplace=True),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True),
)

self.model = nn.Sequential(
conv_bn(3, self.base_channel, 2), # 160*120
conv_dw(self.base_channel, self.base_channel * 2, 1),
conv_dw(self.base_channel * 2, self.base_channel * 2, 2), # 80*60
conv_dw(self.base_channel * 2, self.base_channel * 2, 1),
conv_dw(self.base_channel * 2, self.base_channel * 4, 2), # 40*30
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
conv_dw(self.base_channel * 4, self.base_channel * 8, 2), # 20*15
conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
conv_dw(self.base_channel * 8, self.base_channel * 16, 2), # 10*8
conv_dw(self.base_channel * 16, self.base_channel * 16, 1))
self.fc = nn.Linear(1024, num_classes)

def forward(self, x):
x = self.model(x)
x = F.avg_pool2d(x, 7)
x = x.view(-1, 1024)
x = self.fc(x)
return x

+ 0
- 0
modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/__init__.py View File


+ 18
- 0
modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/data_preprocessing.py View File

@@ -0,0 +1,18 @@
# The implementation is based on ULFD, available at
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
from ..transforms import Compose, Resize, SubtractMeans, ToTensor


class PredictionTransform:

def __init__(self, size, mean=0.0, std=1.0):
self.transform = Compose([
Resize(size),
SubtractMeans(mean), lambda img, boxes=None, labels=None:
(img / std, boxes, labels),
ToTensor()
])

def __call__(self, image):
image, _, _ = self.transform(image)
return image

+ 49
- 0
modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/fd_config.py View File

@@ -0,0 +1,49 @@
# The implementation is based on ULFD, available at
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
import numpy as np

from ..box_utils import generate_priors

image_mean_test = image_mean = np.array([127, 127, 127])
image_std = 128.0
iou_threshold = 0.3
center_variance = 0.1
size_variance = 0.2

min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
shrinkage_list = []
image_size = [320, 240] # default input size 320*240
feature_map_w_h_list = [[40, 20, 10, 5], [30, 15, 8,
4]] # default feature map size
priors = []


def define_img_size(size):
global image_size, feature_map_w_h_list, priors
img_size_dict = {
128: [128, 96],
160: [160, 120],
320: [320, 240],
480: [480, 360],
640: [640, 480],
1280: [1280, 960]
}
image_size = img_size_dict[size]

feature_map_w_h_list_dict = {
128: [[16, 8, 4, 2], [12, 6, 3, 2]],
160: [[20, 10, 5, 3], [15, 8, 4, 2]],
320: [[40, 20, 10, 5], [30, 15, 8, 4]],
480: [[60, 30, 15, 8], [45, 23, 12, 6]],
640: [[80, 40, 20, 10], [60, 30, 15, 8]],
1280: [[160, 80, 40, 20], [120, 60, 30, 15]]
}
feature_map_w_h_list = feature_map_w_h_list_dict[size]

for i in range(0, len(image_size)):
item_list = []
for k in range(0, len(feature_map_w_h_list[i])):
item_list.append(image_size[i] / feature_map_w_h_list[i][k])
shrinkage_list.append(item_list)
priors = generate_priors(feature_map_w_h_list, shrinkage_list, image_size,
min_boxes)

+ 124
- 0
modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/mb_tiny_fd.py View File

@@ -0,0 +1,124 @@
# The implementation is based on ULFD, available at
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
from torch.nn import Conv2d, ModuleList, ReLU, Sequential

from ..mb_tiny import Mb_Tiny
from . import fd_config as config
from .predictor import Predictor
from .ssd import SSD


def SeperableConv2d(in_channels,
out_channels,
kernel_size=1,
stride=1,
padding=0):
"""Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
"""
return Sequential(
Conv2d(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=kernel_size,
groups=in_channels,
stride=stride,
padding=padding),
ReLU(),
Conv2d(
in_channels=in_channels, out_channels=out_channels, kernel_size=1),
)


def create_mb_tiny_fd(num_classes, is_test=False, device='cuda'):
base_net = Mb_Tiny(2)
base_net_model = base_net.model # disable dropout layer

source_layer_indexes = [8, 11, 13]
extras = ModuleList([
Sequential(
Conv2d(
in_channels=base_net.base_channel * 16,
out_channels=base_net.base_channel * 4,
kernel_size=1), ReLU(),
SeperableConv2d(
in_channels=base_net.base_channel * 4,
out_channels=base_net.base_channel * 16,
kernel_size=3,
stride=2,
padding=1), ReLU())
])

regression_headers = ModuleList([
SeperableConv2d(
in_channels=base_net.base_channel * 4,
out_channels=3 * 4,
kernel_size=3,
padding=1),
SeperableConv2d(
in_channels=base_net.base_channel * 8,
out_channels=2 * 4,
kernel_size=3,
padding=1),
SeperableConv2d(
in_channels=base_net.base_channel * 16,
out_channels=2 * 4,
kernel_size=3,
padding=1),
Conv2d(
in_channels=base_net.base_channel * 16,
out_channels=3 * 4,
kernel_size=3,
padding=1)
])

classification_headers = ModuleList([
SeperableConv2d(
in_channels=base_net.base_channel * 4,
out_channels=3 * num_classes,
kernel_size=3,
padding=1),
SeperableConv2d(
in_channels=base_net.base_channel * 8,
out_channels=2 * num_classes,
kernel_size=3,
padding=1),
SeperableConv2d(
in_channels=base_net.base_channel * 16,
out_channels=2 * num_classes,
kernel_size=3,
padding=1),
Conv2d(
in_channels=base_net.base_channel * 16,
out_channels=3 * num_classes,
kernel_size=3,
padding=1)
])

return SSD(
num_classes,
base_net_model,
source_layer_indexes,
extras,
classification_headers,
regression_headers,
is_test=is_test,
config=config,
device=device)


def create_mb_tiny_fd_predictor(net,
candidate_size=200,
nms_method=None,
sigma=0.5,
device=None):
predictor = Predictor(
net,
config.image_size,
config.image_mean_test,
config.image_std,
nms_method=nms_method,
iou_threshold=config.iou_threshold,
candidate_size=candidate_size,
sigma=sigma,
device=device)
return predictor

+ 80
- 0
modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/predictor.py View File

@@ -0,0 +1,80 @@
# The implementation is based on ULFD, available at
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
import torch

from .. import box_utils
from .data_preprocessing import PredictionTransform


class Predictor:

def __init__(self,
net,
size,
mean=0.0,
std=1.0,
nms_method=None,
iou_threshold=0.3,
filter_threshold=0.85,
candidate_size=200,
sigma=0.5,
device=None):
self.net = net
self.transform = PredictionTransform(size, mean, std)
self.iou_threshold = iou_threshold
self.filter_threshold = filter_threshold
self.candidate_size = candidate_size
self.nms_method = nms_method

self.sigma = sigma
if device:
self.device = device
else:
self.device = torch.device(
'cuda:0' if torch.cuda.is_available() else 'cpu')

self.net.to(self.device)
self.net.eval()

def predict(self, image, top_k=-1, prob_threshold=None):
height, width, _ = image.shape
image = self.transform(image)
images = image.unsqueeze(0)
images = images.to(self.device)
with torch.no_grad():
for i in range(1):
scores, boxes = self.net.forward(images)
boxes = boxes[0]
scores = scores[0]
if not prob_threshold:
prob_threshold = self.filter_threshold
# this version of nms is slower on GPU, so we move data to CPU.
picked_box_probs = []
picked_labels = []
for class_index in range(1, scores.size(1)):
probs = scores[:, class_index]
mask = probs > prob_threshold
probs = probs[mask]
if probs.size(0) == 0:
continue
subset_boxes = boxes[mask, :]
box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
box_probs = box_utils.nms(
box_probs,
self.nms_method,
score_threshold=prob_threshold,
iou_threshold=self.iou_threshold,
sigma=self.sigma,
top_k=top_k,
candidate_size=self.candidate_size)
picked_box_probs.append(box_probs)
picked_labels.extend([class_index] * box_probs.size(0))
if not picked_box_probs:
return torch.tensor([]), torch.tensor([]), torch.tensor([])
picked_box_probs = torch.cat(picked_box_probs)
picked_box_probs[:, 0] *= width
picked_box_probs[:, 1] *= height
picked_box_probs[:, 2] *= width
picked_box_probs[:, 3] *= height
return picked_box_probs[:, :4], torch.tensor(
picked_labels), picked_box_probs[:, 4]

+ 129
- 0
modelscope/models/cv/face_detection/ulfd_slim/vision/ssd/ssd.py View File

@@ -0,0 +1,129 @@
# The implementation is based on ULFD, available at
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
from collections import namedtuple
from typing import List, Tuple

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from .. import box_utils

GraphPath = namedtuple('GraphPath', ['s0', 'name', 's1'])


class SSD(nn.Module):

def __init__(self,
num_classes: int,
base_net: nn.ModuleList,
source_layer_indexes: List[int],
extras: nn.ModuleList,
classification_headers: nn.ModuleList,
regression_headers: nn.ModuleList,
is_test=False,
config=None,
device=None):
"""Compose a SSD model using the given components.
"""
super(SSD, self).__init__()

self.num_classes = num_classes
self.base_net = base_net
self.source_layer_indexes = source_layer_indexes
self.extras = extras
self.classification_headers = classification_headers
self.regression_headers = regression_headers
self.is_test = is_test
self.config = config

# register layers in source_layer_indexes by adding them to a module list
self.source_layer_add_ons = nn.ModuleList([
t[1] for t in source_layer_indexes
if isinstance(t, tuple) and not isinstance(t, GraphPath)
])
if device:
self.device = device
else:
self.device = torch.device(
'cuda:0' if torch.cuda.is_available() else 'cpu')
if is_test:
self.config = config
self.priors = config.priors.to(self.device)

def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
confidences = []
locations = []
start_layer_index = 0
header_index = 0
end_layer_index = 0
for end_layer_index in self.source_layer_indexes:
if isinstance(end_layer_index, GraphPath):
path = end_layer_index
end_layer_index = end_layer_index.s0
added_layer = None
elif isinstance(end_layer_index, tuple):
added_layer = end_layer_index[1]
end_layer_index = end_layer_index[0]
path = None
else:
added_layer = None
path = None
for layer in self.base_net[start_layer_index:end_layer_index]:
x = layer(x)
if added_layer:
y = added_layer(x)
else:
y = x
if path:
sub = getattr(self.base_net[end_layer_index], path.name)
for layer in sub[:path.s1]:
x = layer(x)
y = x
for layer in sub[path.s1:]:
x = layer(x)
end_layer_index += 1
start_layer_index = end_layer_index
confidence, location = self.compute_header(header_index, y)
header_index += 1
confidences.append(confidence)
locations.append(location)

for layer in self.base_net[end_layer_index:]:
x = layer(x)

for layer in self.extras:
x = layer(x)
confidence, location = self.compute_header(header_index, x)
header_index += 1
confidences.append(confidence)
locations.append(location)

confidences = torch.cat(confidences, 1)
locations = torch.cat(locations, 1)

if self.is_test:
confidences = F.softmax(confidences, dim=2)
boxes = box_utils.convert_locations_to_boxes(
locations, self.priors, self.config.center_variance,
self.config.size_variance)
boxes = box_utils.center_form_to_corner_form(boxes)
return confidences, boxes
else:
return confidences, locations

def compute_header(self, i, x):
confidence = self.classification_headers[i](x)
confidence = confidence.permute(0, 2, 3, 1).contiguous()
confidence = confidence.view(confidence.size(0), -1, self.num_classes)

location = self.regression_headers[i](x)
location = location.permute(0, 2, 3, 1).contiguous()
location = location.view(location.size(0), -1, 4)

return confidence, location

def load(self, model):
self.load_state_dict(
torch.load(model, map_location=lambda storage, loc: storage))

+ 56
- 0
modelscope/models/cv/face_detection/ulfd_slim/vision/transforms.py View File

@@ -0,0 +1,56 @@
# The implementation is based on ULFD, available at
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB
import types

import cv2
import numpy as np
import torch
from numpy import random


class Compose(object):
"""Composes several augmentations together.
Args:
transforms (List[Transform]): list of transforms to compose.
Example:
>>> augmentations.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.ToTensor(),
>>> ])
"""

def __init__(self, transforms):
self.transforms = transforms

def __call__(self, img, boxes=None, labels=None):
for t in self.transforms:
img, boxes, labels = t(img, boxes, labels)
return img, boxes, labels


class SubtractMeans(object):

def __init__(self, mean):
self.mean = np.array(mean, dtype=np.float32)

def __call__(self, image, boxes=None, labels=None):
image = image.astype(np.float32)
image -= self.mean
return image.astype(np.float32), boxes, labels


class Resize(object):

def __init__(self, size=(300, 300)):
self.size = size

def __call__(self, image, boxes=None, labels=None):
image = cv2.resize(image, (self.size[0], self.size[1]))
return image, boxes, labels


class ToTensor(object):

def __call__(self, cvimage, boxes=None, labels=None):
return torch.from_numpy(cvimage.astype(np.float32)).permute(
2, 0, 1), boxes, labels

+ 4
- 0
modelscope/models/cv/face_recognition/align_face.py View File

@@ -1,3 +1,7 @@
"""
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
https://github.com/deepinsight/insightface/blob/master/python-package/insightface/utils/face_align.py
"""
import cv2
import numpy as np
from skimage import transform as trans


+ 2
- 0
modelscope/models/cv/face_recognition/torchkit/backbone/__init__.py View File

@@ -1,3 +1,5 @@
# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at
# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone
from .model_irse import (IR_18, IR_34, IR_50, IR_101, IR_152, IR_200, IR_SE_50,
IR_SE_101, IR_SE_152, IR_SE_200)
from .model_resnet import ResNet_50, ResNet_101, ResNet_152


+ 2
- 0
modelscope/models/cv/face_recognition/torchkit/backbone/common.py View File

@@ -1,3 +1,5 @@
# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at
# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/common.py
import torch
import torch.nn as nn
from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Linear, Module, ReLU,


+ 2
- 2
modelscope/models/cv/face_recognition/torchkit/backbone/model_irse.py View File

@@ -1,5 +1,5 @@
# based on:
# https://github.com/ZhaoJ9014/face.evoLVe.PyTorch/blob/master/backbone/model_irse.py
# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at
# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/model_irse.py
from collections import namedtuple

from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Dropout, Linear,


+ 2
- 2
modelscope/models/cv/face_recognition/torchkit/backbone/model_resnet.py View File

@@ -1,5 +1,5 @@
# based on:
# https://github.com/ZhaoJ9014/face.evoLVe.PyTorch/blob/master/backbone/model_resnet.py
# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at
# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/model_resnet.py
import torch.nn as nn
from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Dropout, Linear,
MaxPool2d, Module, ReLU, Sequential)


+ 10
- 12
modelscope/models/cv/image_instance_segmentation/postprocess_utils.py View File

@@ -105,12 +105,12 @@ def get_img_ins_seg_result(img_seg_result=None,
}
for seg_result in img_seg_result:

box = {
'x': np.int(seg_result[0]),
'y': np.int(seg_result[1]),
'w': np.int(seg_result[2] - seg_result[0]),
'h': np.int(seg_result[3] - seg_result[1])
}
box = [
np.int(seg_result[0]),
np.int(seg_result[1]),
np.int(seg_result[2]),
np.int(seg_result[3])
]
score = np.float(seg_result[4])
category = seg_result[5]

@@ -161,12 +161,10 @@ def show_result(
np.random.random() * 255.0
])

x1 = int(box['x'])
y1 = int(box['y'])
w = int(box['w'])
h = int(box['h'])
x2 = x1 + w
y2 = y1 + h
x1 = int(box[0])
y1 = int(box[1])
x2 = int(box[2])
y2 = int(box[3])

if show_box:
cv2.rectangle(


+ 1
- 1
modelscope/models/cv/image_reid_person/pass_model.py View File

@@ -1,4 +1,4 @@
# The implementation is also open-sourced by the authors as PASS-reID, and is available publicly on
# The implementation is adopted from PASS-reID, made pubicly available under the Apache-2.0 License at
# https://github.com/CASIA-IVA-Lab/PASS-reID

import os


+ 1
- 1
modelscope/models/cv/image_reid_person/transreid_model.py View File

@@ -1,4 +1,4 @@
# The implementation is also open-sourced by the authors as PASS-reID, and is available publicly on
# The implementation is adopted from PASS-reID, made pubicly available under the Apache-2.0 License at
# https://github.com/CASIA-IVA-Lab/PASS-reID

import collections.abc as container_abcs


+ 1
- 1
modelscope/models/cv/shop_segmentation/models.py View File

@@ -552,7 +552,7 @@ class CLIPVisionTransformer(nn.Module):
nn.GroupNorm(1, embed_dim),
nn.ConvTranspose2d(
embed_dim, embed_dim, kernel_size=2, stride=2),
nn.SyncBatchNorm(embed_dim),
nn.BatchNorm2d(embed_dim),
nn.GELU(),
nn.ConvTranspose2d(
embed_dim, embed_dim, kernel_size=2, stride=2),


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save