# Conflicts: # modelscope/preprocessors/multi_modal.py # modelscope/trainers/trainer.py # tests/pipelines/test_ofa_tasks.pymaster
@@ -36,6 +36,7 @@ do | |||
-e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \ | |||
-e TEST_LEVEL=$TEST_LEVEL \ | |||
-e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | |||
-e MODEL_TAG_URL=$MODEL_TAG_URL \ | |||
--workdir=$CODE_DIR_IN_CONTAINER \ | |||
--net host \ | |||
${IMAGE_NAME}:${IMAGE_VERSION} \ | |||
@@ -0,0 +1,3 @@ | |||
version https://git-lfs.github.com/spec/v1 | |||
oid sha256:176c824d99af119b36f743d3d90b44529167b0e4fc6db276da60fa140ee3f4a9 | |||
size 87228 |
@@ -0,0 +1,3 @@ | |||
version https://git-lfs.github.com/spec/v1 | |||
oid sha256:176c824d99af119b36f743d3d90b44529167b0e4fc6db276da60fa140ee3f4a9 | |||
size 87228 |
@@ -0,0 +1,3 @@ | |||
version https://git-lfs.github.com/spec/v1 | |||
oid sha256:1f24abbba43782d733dedbb0b4f416635af50263862e5632963ac9263e430555 | |||
size 88542 |
@@ -0,0 +1,3 @@ | |||
version https://git-lfs.github.com/spec/v1 | |||
oid sha256:176c824d99af119b36f743d3d90b44529167b0e4fc6db276da60fa140ee3f4a9 | |||
size 87228 |
@@ -0,0 +1,3 @@ | |||
version https://git-lfs.github.com/spec/v1 | |||
oid sha256:b158f6029d9763d7f84042f7c5835f398c688fdbb6b3f4fe6431101d4118c66c | |||
size 2766 |
@@ -0,0 +1,3 @@ | |||
version https://git-lfs.github.com/spec/v1 | |||
oid sha256:0dcf46b93077e2229ab69cd6ddb80e2689546c575ee538bb2033fee1124ef3e3 | |||
size 2761 |
@@ -0,0 +1,3 @@ | |||
version https://git-lfs.github.com/spec/v1 | |||
oid sha256:9c9870df5a86acaaec67063183dace795479cd0f05296f13058995f475149c56 | |||
size 2957783 |
@@ -75,7 +75,9 @@ RUN pip install --no-cache-dir --upgrade pip && \ | |||
ENV SHELL=/bin/bash | |||
# install special package | |||
RUN pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 numpy==1.18.5 datasets==2.1.0 | |||
RUN pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 datasets==2.1.0 ipykernel && \ | |||
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ | |||
pip config set install.trusted-host pypi.tuna.tsinghua.edu.cn | |||
RUN if [ "$USE_GPU" = "True" ] ; then \ | |||
pip install --no-cache-dir dgl-cu113 dglgo -f https://data.dgl.ai/wheels/repo.html; \ | |||
@@ -0,0 +1,4 @@ | |||
from .base import Exporter | |||
from .builder import build_exporter | |||
from .nlp import SbertForSequenceClassificationExporter | |||
from .torch_model_exporter import TorchModelExporter |
@@ -0,0 +1,53 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import os | |||
from abc import ABC, abstractmethod | |||
from modelscope.models import Model | |||
from modelscope.utils.config import Config, ConfigDict | |||
from modelscope.utils.constant import ModelFile | |||
from .builder import build_exporter | |||
class Exporter(ABC): | |||
"""Exporter base class to output model to onnx, torch_script, graphdef, etc. | |||
""" | |||
def __init__(self): | |||
self.model = None | |||
@classmethod | |||
def from_model(cls, model: Model, **kwargs): | |||
"""Build the Exporter instance. | |||
@param model: A model instance. it will be used to output the generated file, | |||
and the configuration.json in its model_dir field will be used to create the exporter instance. | |||
@param kwargs: Extra kwargs used to create the Exporter instance. | |||
@return: The Exporter instance | |||
""" | |||
cfg = Config.from_file( | |||
os.path.join(model.model_dir, ModelFile.CONFIGURATION)) | |||
task_name = cfg.task | |||
model_cfg = cfg.model | |||
if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'): | |||
model_cfg.type = model_cfg.model_type | |||
export_cfg = ConfigDict({'type': model_cfg.type}) | |||
if hasattr(cfg, 'export'): | |||
export_cfg.update(cfg.export) | |||
exporter = build_exporter(export_cfg, task_name, kwargs) | |||
exporter.model = model | |||
return exporter | |||
@abstractmethod | |||
def export_onnx(self, outputs: str, opset=11, **kwargs): | |||
"""Export the model as onnx format files. | |||
In some cases, several files may be generated, | |||
So please return a dict which contains the generated name with the file path. | |||
@param opset: The version of the ONNX operator set to use. | |||
@param outputs: The output dir. | |||
@param kwargs: In this default implementation, | |||
kwargs will be carried to generate_dummy_inputs as extra arguments (like input shape). | |||
@return: A dict contains the model name with the model file path. | |||
""" | |||
pass |
@@ -0,0 +1,21 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from modelscope.utils.config import ConfigDict | |||
from modelscope.utils.registry import Registry, build_from_cfg | |||
EXPORTERS = Registry('exporters') | |||
def build_exporter(cfg: ConfigDict, | |||
task_name: str = None, | |||
default_args: dict = None): | |||
""" build exporter by the given model config dict | |||
Args: | |||
cfg (:obj:`ConfigDict`): config dict for exporter object. | |||
task_name (str, optional): task name, refer to | |||
:obj:`Tasks` for more details | |||
default_args (dict, optional): Default initialization arguments. | |||
""" | |||
return build_from_cfg( | |||
cfg, EXPORTERS, group_key=task_name, default_args=default_args) |
@@ -0,0 +1,2 @@ | |||
from .sbert_for_sequence_classification_exporter import \ | |||
SbertForSequenceClassificationExporter |
@@ -0,0 +1,81 @@ | |||
import os | |||
from collections import OrderedDict | |||
from typing import Any, Dict, Mapping, Tuple | |||
from torch.utils.data.dataloader import default_collate | |||
from modelscope.exporters.builder import EXPORTERS | |||
from modelscope.exporters.torch_model_exporter import TorchModelExporter | |||
from modelscope.metainfo import Models | |||
from modelscope.preprocessors import Preprocessor, build_preprocessor | |||
from modelscope.utils.config import Config | |||
from modelscope.utils.constant import ModeKeys, Tasks | |||
@EXPORTERS.register_module( | |||
Tasks.sentence_similarity, module_name=Models.structbert) | |||
@EXPORTERS.register_module( | |||
Tasks.sentiment_classification, module_name=Models.structbert) | |||
@EXPORTERS.register_module(Tasks.nli, module_name=Models.structbert) | |||
@EXPORTERS.register_module( | |||
Tasks.zero_shot_classification, module_name=Models.structbert) | |||
class SbertForSequenceClassificationExporter(TorchModelExporter): | |||
def generate_dummy_inputs(self, | |||
shape: Tuple = None, | |||
**kwargs) -> Dict[str, Any]: | |||
"""Generate dummy inputs for model exportation to onnx or other formats by tracing. | |||
@param shape: A tuple of input shape which should have at most two dimensions. | |||
shape = (1, ) batch_size=1, sequence_length will be taken from the preprocessor. | |||
shape = (8, 128) batch_size=1, sequence_length=128, which will cover the config of the preprocessor. | |||
@return: Dummy inputs. | |||
""" | |||
cfg = Config.from_file( | |||
os.path.join(self.model.model_dir, 'configuration.json')) | |||
field_name = Tasks.find_field_by_task(cfg.task) | |||
if 'type' not in cfg.preprocessor and 'val' in cfg.preprocessor: | |||
cfg = cfg.preprocessor.val | |||
else: | |||
cfg = cfg.preprocessor | |||
batch_size = 1 | |||
sequence_length = {} | |||
if shape is not None: | |||
if len(shape) == 1: | |||
batch_size = shape[0] | |||
elif len(shape) == 2: | |||
batch_size, max_length = shape | |||
sequence_length = {'sequence_length': max_length} | |||
cfg.update({ | |||
'model_dir': self.model.model_dir, | |||
'mode': ModeKeys.TRAIN, | |||
**sequence_length | |||
}) | |||
preprocessor: Preprocessor = build_preprocessor(cfg, field_name) | |||
if preprocessor.pair: | |||
first_sequence = preprocessor.tokenizer.unk_token | |||
second_sequence = preprocessor.tokenizer.unk_token | |||
else: | |||
first_sequence = preprocessor.tokenizer.unk_token | |||
second_sequence = None | |||
batched = [] | |||
for _ in range(batch_size): | |||
batched.append(preprocessor((first_sequence, second_sequence))) | |||
return default_collate(batched) | |||
@property | |||
def inputs(self) -> Mapping[str, Mapping[int, str]]: | |||
dynamic_axis = {0: 'batch', 1: 'sequence'} | |||
return OrderedDict([ | |||
('input_ids', dynamic_axis), | |||
('attention_mask', dynamic_axis), | |||
('token_type_ids', dynamic_axis), | |||
]) | |||
@property | |||
def outputs(self) -> Mapping[str, Mapping[int, str]]: | |||
return OrderedDict({'logits': {0: 'batch'}}) |
@@ -0,0 +1,247 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import os | |||
from contextlib import contextmanager | |||
from itertools import chain | |||
from typing import Any, Dict, Mapping | |||
import torch | |||
from torch import nn | |||
from torch.onnx import export as onnx_export | |||
from torch.onnx.utils import _decide_input_format | |||
from modelscope.models import TorchModel | |||
from modelscope.pipelines.base import collate_fn | |||
from modelscope.utils.constant import ModelFile | |||
from modelscope.utils.logger import get_logger | |||
from modelscope.utils.regress_test_utils import compare_arguments_nested | |||
from modelscope.utils.tensor_utils import torch_nested_numpify | |||
from .base import Exporter | |||
logger = get_logger(__name__) | |||
class TorchModelExporter(Exporter): | |||
"""The torch base class of exporter. | |||
This class provides the default implementations for exporting onnx and torch script. | |||
Each specific model may implement its own exporter by overriding the export_onnx/export_torch_script, | |||
and to provide implementations for generate_dummy_inputs/inputs/outputs methods. | |||
""" | |||
def export_onnx(self, outputs: str, opset=11, **kwargs): | |||
"""Export the model as onnx format files. | |||
In some cases, several files may be generated, | |||
So please return a dict which contains the generated name with the file path. | |||
@param opset: The version of the ONNX operator set to use. | |||
@param outputs: The output dir. | |||
@param kwargs: In this default implementation, | |||
you can pass the arguments needed by _torch_export_onnx, other unrecognized args | |||
will be carried to generate_dummy_inputs as extra arguments (such as input shape). | |||
@return: A dict containing the model key - model file path pairs. | |||
""" | |||
model = self.model | |||
if not isinstance(model, nn.Module) and hasattr(model, 'model'): | |||
model = model.model | |||
onnx_file = os.path.join(outputs, ModelFile.ONNX_MODEL_FILE) | |||
self._torch_export_onnx(model, onnx_file, opset=opset, **kwargs) | |||
return {'model': onnx_file} | |||
def export_torch_script(self, outputs: str, **kwargs): | |||
"""Export the model as torch script files. | |||
In some cases, several files may be generated, | |||
So please return a dict which contains the generated name with the file path. | |||
@param outputs: The output dir. | |||
@param kwargs: In this default implementation, | |||
you can pass the arguments needed by _torch_export_torch_script, other unrecognized args | |||
will be carried to generate_dummy_inputs as extra arguments (like input shape). | |||
@return: A dict contains the model name with the model file path. | |||
""" | |||
model = self.model | |||
if not isinstance(model, nn.Module) and hasattr(model, 'model'): | |||
model = model.model | |||
ts_file = os.path.join(outputs, ModelFile.TS_MODEL_FILE) | |||
# generate ts by tracing | |||
self._torch_export_torch_script(model, ts_file, **kwargs) | |||
return {'model': ts_file} | |||
def generate_dummy_inputs(self, **kwargs) -> Dict[str, Any]: | |||
"""Generate dummy inputs for model exportation to onnx or other formats by tracing. | |||
@return: Dummy inputs. | |||
""" | |||
return None | |||
@property | |||
def inputs(self) -> Mapping[str, Mapping[int, str]]: | |||
"""Return an ordered dict contains the model's input arguments name with their dynamic axis. | |||
About the information of dynamic axis please check the dynamic_axes argument of torch.onnx.export function | |||
""" | |||
return None | |||
@property | |||
def outputs(self) -> Mapping[str, Mapping[int, str]]: | |||
"""Return an ordered dict contains the model's output arguments name with their dynamic axis. | |||
About the information of dynamic axis please check the dynamic_axes argument of torch.onnx.export function | |||
""" | |||
return None | |||
def _torch_export_onnx(self, | |||
model: nn.Module, | |||
output: str, | |||
opset: int = 11, | |||
device: str = 'cpu', | |||
validation: bool = True, | |||
rtol: float = None, | |||
atol: float = None, | |||
**kwargs): | |||
"""Export the model to an onnx format file. | |||
@param model: A torch.nn.Module instance to export. | |||
@param output: The output file. | |||
@param opset: The version of the ONNX operator set to use. | |||
@param device: The device used to forward. | |||
@param validation: Whether validate the export file. | |||
@param rtol: The rtol used to regress the outputs. | |||
@param atol: The atol used to regress the outputs. | |||
""" | |||
dummy_inputs = self.generate_dummy_inputs(**kwargs) | |||
inputs = self.inputs | |||
outputs = self.outputs | |||
if dummy_inputs is None or inputs is None or outputs is None: | |||
raise NotImplementedError( | |||
'Model property dummy_inputs,inputs,outputs must be set.') | |||
with torch.no_grad(): | |||
model.eval() | |||
device = torch.device(device) | |||
model.to(device) | |||
dummy_inputs = collate_fn(dummy_inputs, device) | |||
if isinstance(dummy_inputs, Mapping): | |||
dummy_inputs = dict(dummy_inputs) | |||
onnx_outputs = list(self.outputs.keys()) | |||
with replace_call(): | |||
onnx_export( | |||
model, | |||
(dummy_inputs, ), | |||
f=output, | |||
input_names=list(inputs.keys()), | |||
output_names=onnx_outputs, | |||
dynamic_axes={ | |||
name: axes | |||
for name, axes in chain(inputs.items(), | |||
outputs.items()) | |||
}, | |||
do_constant_folding=True, | |||
opset_version=opset, | |||
) | |||
if validation: | |||
try: | |||
import onnx | |||
import onnxruntime as ort | |||
except ImportError: | |||
logger.warn( | |||
'Cannot validate the exported onnx file, because ' | |||
'the installation of onnx or onnxruntime cannot be found') | |||
return | |||
onnx_model = onnx.load(output) | |||
onnx.checker.check_model(onnx_model) | |||
ort_session = ort.InferenceSession(output) | |||
with torch.no_grad(): | |||
model.eval() | |||
outputs_origin = model.forward( | |||
*_decide_input_format(model, dummy_inputs)) | |||
if isinstance(outputs_origin, Mapping): | |||
outputs_origin = torch_nested_numpify( | |||
list(outputs_origin.values())) | |||
outputs = ort_session.run( | |||
onnx_outputs, | |||
torch_nested_numpify(dummy_inputs), | |||
) | |||
tols = {} | |||
if rtol is not None: | |||
tols['rtol'] = rtol | |||
if atol is not None: | |||
tols['atol'] = atol | |||
if not compare_arguments_nested('Onnx model output match failed', | |||
outputs, outputs_origin, **tols): | |||
raise RuntimeError( | |||
'export onnx failed because of validation error.') | |||
def _torch_export_torch_script(self, | |||
model: nn.Module, | |||
output: str, | |||
device: str = 'cpu', | |||
validation: bool = True, | |||
rtol: float = None, | |||
atol: float = None, | |||
**kwargs): | |||
"""Export the model to a torch script file. | |||
@param model: A torch.nn.Module instance to export. | |||
@param output: The output file. | |||
@param device: The device used to forward. | |||
@param validation: Whether validate the export file. | |||
@param rtol: The rtol used to regress the outputs. | |||
@param atol: The atol used to regress the outputs. | |||
""" | |||
model.eval() | |||
dummy_inputs = self.generate_dummy_inputs(**kwargs) | |||
if dummy_inputs is None: | |||
raise NotImplementedError( | |||
'Model property dummy_inputs must be set.') | |||
dummy_inputs = collate_fn(dummy_inputs, device) | |||
if isinstance(dummy_inputs, Mapping): | |||
dummy_inputs = tuple(dummy_inputs.values()) | |||
with torch.no_grad(): | |||
model.eval() | |||
with replace_call(): | |||
traced_model = torch.jit.trace( | |||
model, dummy_inputs, strict=False) | |||
torch.jit.save(traced_model, output) | |||
if validation: | |||
ts_model = torch.jit.load(output) | |||
with torch.no_grad(): | |||
model.eval() | |||
ts_model.eval() | |||
outputs = ts_model.forward(*dummy_inputs) | |||
outputs = torch_nested_numpify(outputs) | |||
outputs_origin = model.forward(*dummy_inputs) | |||
outputs_origin = torch_nested_numpify(outputs_origin) | |||
tols = {} | |||
if rtol is not None: | |||
tols['rtol'] = rtol | |||
if atol is not None: | |||
tols['atol'] = atol | |||
if not compare_arguments_nested( | |||
'Torch script model output match failed', outputs, | |||
outputs_origin, **tols): | |||
raise RuntimeError( | |||
'export torch script failed because of validation error.') | |||
@contextmanager | |||
def replace_call(): | |||
"""This function is used to recover the original call method. | |||
The Model class of modelscope overrides the call method. When exporting to onnx or torchscript, torch will | |||
prepare the parameters as the prototype of forward method, and trace the call method, this causes | |||
problems. Here we recover the call method to the default implementation of torch.nn.Module, and change it | |||
back after the tracing was done. | |||
""" | |||
TorchModel.call_origin, TorchModel.__call__ = TorchModel.__call__, TorchModel._call_impl | |||
yield | |||
TorchModel.__call__ = TorchModel.call_origin | |||
del TorchModel.call_origin |
@@ -1,2 +1,4 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from .file import File, LocalStorage | |||
from .io import dump, dumps, load |
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from .base import FormatHandler | |||
from .json import JsonHandler | |||
from .yaml import YamlHandler |
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import os | |||
import pickle | |||
import shutil | |||
@@ -389,7 +391,7 @@ class HubApi: | |||
cookies = requests.utils.dict_from_cookiejar(cookies) | |||
r = requests.get(url=datahub_url, cookies=cookies) | |||
resp = r.json() | |||
datahub_raise_on_error(datahub_url, resp) | |||
raise_on_error(resp) | |||
return resp['Data'] | |||
def on_dataset_download(self, dataset_name: str, namespace: str) -> None: | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from pathlib import Path | |||
MODELSCOPE_URL_SCHEME = 'http://' | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from http import HTTPStatus | |||
from requests.exceptions import HTTPError | |||
@@ -60,7 +62,7 @@ def raise_on_error(rsp): | |||
Args: | |||
rsp (_type_): The server response | |||
""" | |||
if rsp['Code'] == HTTPStatus.OK and rsp['Success']: | |||
if rsp['Code'] == HTTPStatus.OK: | |||
return True | |||
else: | |||
raise RequestError(rsp['Message']) | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import copy | |||
import os | |||
import sys | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import os | |||
import subprocess | |||
from typing import List | |||
@@ -39,17 +41,28 @@ class GitCommandWrapper(metaclass=Singleton): | |||
subprocess.CompletedProcess: the command response | |||
""" | |||
logger.debug(' '.join(args)) | |||
git_env = os.environ.copy() | |||
git_env['GIT_TERMINAL_PROMPT'] = '0' | |||
response = subprocess.run( | |||
[self.git_path, *args], | |||
stdout=subprocess.PIPE, | |||
stderr=subprocess.PIPE) # compatible for python3.6 | |||
stderr=subprocess.PIPE, | |||
env=git_env, | |||
) # compatible for python3.6 | |||
try: | |||
response.check_returncode() | |||
return response | |||
except subprocess.CalledProcessError as error: | |||
raise GitError( | |||
'stdout: %s, stderr: %s' % | |||
(response.stdout.decode('utf8'), error.stderr.decode('utf8'))) | |||
if response.returncode == 1: | |||
logger.info('Nothing to commit.') | |||
return response | |||
else: | |||
logger.error( | |||
'There are error run git command, you may need to login first.' | |||
) | |||
raise GitError('stdout: %s, stderr: %s' % | |||
(response.stdout.decode('utf8'), | |||
error.stderr.decode('utf8'))) | |||
def config_auth_token(self, repo_dir, auth_token): | |||
url = self.get_repo_remote_url(repo_dir) | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import os | |||
from typing import Optional | |||
@@ -40,6 +42,11 @@ class Repository: | |||
self.model_dir = model_dir | |||
self.model_base_dir = os.path.dirname(model_dir) | |||
self.model_repo_name = os.path.basename(model_dir) | |||
if not revision: | |||
err_msg = 'a non-default value of revision cannot be empty.' | |||
raise InvalidParameter(err_msg) | |||
if auth_token: | |||
self.auth_token = auth_token | |||
else: | |||
@@ -145,10 +152,21 @@ class DatasetRepository: | |||
The git command line path, if None, we use 'git' | |||
""" | |||
self.dataset_id = dataset_id | |||
self.repo_work_dir = repo_work_dir | |||
self.repo_base_dir = os.path.dirname(repo_work_dir) | |||
self.repo_name = os.path.basename(repo_work_dir) | |||
if not repo_work_dir or not isinstance(repo_work_dir, str): | |||
err_msg = 'dataset_work_dir must be provided!' | |||
raise InvalidParameter(err_msg) | |||
self.repo_work_dir = repo_work_dir.rstrip('/') | |||
if not self.repo_work_dir: | |||
err_msg = 'dataset_work_dir can not be root dir!' | |||
raise InvalidParameter(err_msg) | |||
self.repo_base_dir = os.path.dirname(self.repo_work_dir) | |||
self.repo_name = os.path.basename(self.repo_work_dir) | |||
if not revision: | |||
err_msg = 'a non-default value of revision cannot be empty.' | |||
raise InvalidParameter(err_msg) | |||
self.revision = revision | |||
if auth_token: | |||
self.auth_token = auth_token | |||
else: | |||
@@ -199,7 +217,9 @@ class DatasetRepository: | |||
self.git_wrapper.config_auth_token(self.repo_work_dir, self.auth_token) | |||
self.git_wrapper.add_user_info(self.repo_base_dir, self.repo_name) | |||
remote_url = self.git_wrapper.get_repo_remote_url(self.repo_work_dir) | |||
remote_url = self._get_remote_url() | |||
remote_url = self.git_wrapper.remove_token_from_url(remote_url) | |||
self.git_wrapper.pull(self.repo_work_dir) | |||
self.git_wrapper.add(self.repo_work_dir, all_files=True) | |||
self.git_wrapper.commit(self.repo_work_dir, commit_message) | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import os | |||
import tempfile | |||
from pathlib import Path | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import hashlib | |||
import os | |||
import pickle | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import hashlib | |||
import os | |||
from typing import Optional | |||
@@ -35,6 +35,10 @@ class Models(object): | |||
fer = 'fer' | |||
retinaface = 'retinaface' | |||
shop_segmentation = 'shop-segmentation' | |||
mogface = 'mogface' | |||
mtcnn = 'mtcnn' | |||
ulfd = 'ulfd' | |||
video_inpainting = 'video-inpainting' | |||
# EasyCV models | |||
yolox = 'YOLOX' | |||
@@ -51,11 +55,16 @@ class Models(object): | |||
space_intent = 'space-intent' | |||
space_modeling = 'space-modeling' | |||
star = 'star' | |||
star3 = 'star3' | |||
tcrf = 'transformer-crf' | |||
transformer_softmax = 'transformer-softmax' | |||
lcrf = 'lstm-crf' | |||
gcnncrf = 'gcnn-crf' | |||
bart = 'bart' | |||
gpt3 = 'gpt3' | |||
plug = 'plug' | |||
bert_for_ds = 'bert-for-document-segmentation' | |||
ponet = 'ponet' | |||
# audio models | |||
sambert_hifigan = 'sambert-hifigan' | |||
@@ -70,6 +79,7 @@ class Models(object): | |||
gemm = 'gemm-generative-multi-modal' | |||
mplug = 'mplug' | |||
diffusion = 'diffusion-text-to-image-synthesis' | |||
multi_stage_diffusion = 'multi-stage-diffusion-text-to-image-synthesis' | |||
team = 'team-multi-modal-similarity' | |||
video_clip = 'video-clip-multi-modal-embedding' | |||
@@ -77,6 +87,7 @@ class Models(object): | |||
class TaskModels(object): | |||
# nlp task | |||
text_classification = 'text-classification' | |||
token_classification = 'token-classification' | |||
information_extraction = 'information-extraction' | |||
@@ -87,6 +98,8 @@ class Heads(object): | |||
bert_mlm = 'bert-mlm' | |||
# roberta mlm | |||
roberta_mlm = 'roberta-mlm' | |||
# token cls | |||
token_classification = 'token-classification' | |||
information_extraction = 'information-extraction' | |||
@@ -121,8 +134,11 @@ class Pipelines(object): | |||
salient_detection = 'u2net-salient-detection' | |||
image_classification = 'image-classification' | |||
face_detection = 'resnet-face-detection-scrfd10gkps' | |||
ulfd_face_detection = 'manual-face-detection-ulfd' | |||
facial_expression_recognition = 'vgg19-facial-expression-recognition-fer' | |||
retina_face_detection = 'resnet50-face-detection-retinaface' | |||
mog_face_detection = 'resnet101-face-detection-cvpr22papermogface' | |||
mtcnn_face_detection = 'manual-face-detection-mtcnn' | |||
live_category = 'live-category' | |||
general_image_classification = 'vit-base_image-classification_ImageNet-labels' | |||
daily_image_classification = 'vit-base_image-classification_Dailylife-labels' | |||
@@ -155,16 +171,19 @@ class Pipelines(object): | |||
text_driven_segmentation = 'text-driven-segmentation' | |||
movie_scene_segmentation = 'resnet50-bert-movie-scene-segmentation' | |||
shop_segmentation = 'shop-segmentation' | |||
video_inpainting = 'video-inpainting' | |||
# nlp tasks | |||
sentence_similarity = 'sentence-similarity' | |||
word_segmentation = 'word-segmentation' | |||
part_of_speech = 'part-of-speech' | |||
named_entity_recognition = 'named-entity-recognition' | |||
text_generation = 'text-generation' | |||
sentiment_analysis = 'sentiment-analysis' | |||
sentiment_classification = 'sentiment-classification' | |||
text_classification = 'text-classification' | |||
fill_mask = 'fill-mask' | |||
fill_mask_ponet = 'fill-mask-ponet' | |||
csanmt_translation = 'csanmt-translation' | |||
nli = 'nli' | |||
dialog_intent_prediction = 'dialog-intent-prediction' | |||
@@ -172,8 +191,12 @@ class Pipelines(object): | |||
dialog_state_tracking = 'dialog-state-tracking' | |||
zero_shot_classification = 'zero-shot-classification' | |||
text_error_correction = 'text-error-correction' | |||
plug_generation = 'plug-generation' | |||
faq_question_answering = 'faq-question-answering' | |||
conversational_text_to_sql = 'conversational-text-to-sql' | |||
table_question_answering_pipeline = 'table-question-answering-pipeline' | |||
sentence_embedding = 'sentence-embedding' | |||
passage_ranking = 'passage-ranking' | |||
relation_extraction = 'relation-extraction' | |||
document_segmentation = 'document-segmentation' | |||
@@ -223,8 +246,11 @@ class Trainers(object): | |||
# nlp trainers | |||
bert_sentiment_analysis = 'bert-sentiment-analysis' | |||
dialog_modeling_trainer = 'dialog-modeling-trainer' | |||
dialog_intent_trainer = 'dialog-intent-trainer' | |||
nlp_base_trainer = 'nlp-base-trainer' | |||
nlp_veco_trainer = 'nlp-veco-trainer' | |||
nlp_passage_ranking_trainer = 'nlp-passage-ranking-trainer' | |||
# audio trainers | |||
speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k' | |||
@@ -252,6 +278,7 @@ class Preprocessors(object): | |||
# nlp preprocessor | |||
sen_sim_tokenizer = 'sen-sim-tokenizer' | |||
cross_encoder_tokenizer = 'cross-encoder-tokenizer' | |||
bert_seq_cls_tokenizer = 'bert-seq-cls-tokenizer' | |||
text_gen_tokenizer = 'text-gen-tokenizer' | |||
token_cls_tokenizer = 'token-cls-tokenizer' | |||
@@ -264,10 +291,15 @@ class Preprocessors(object): | |||
sbert_token_cls_tokenizer = 'sbert-token-cls-tokenizer' | |||
zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer' | |||
text_error_correction = 'text-error-correction' | |||
sentence_embedding = 'sentence-embedding' | |||
passage_ranking = 'passage-ranking' | |||
sequence_labeling_tokenizer = 'sequence-labeling-tokenizer' | |||
word_segment_text_to_label_preprocessor = 'word-segment-text-to-label-preprocessor' | |||
fill_mask = 'fill-mask' | |||
fill_mask_ponet = 'fill-mask-ponet' | |||
faq_question_answering_preprocessor = 'faq-question-answering-preprocessor' | |||
conversational_text_to_sql = 'conversational-text-to-sql' | |||
table_question_answering_preprocessor = 'table-question-answering-preprocessor' | |||
re_tokenizer = 're-tokenizer' | |||
document_segmentation = 'document-segmentation' | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from typing import Dict | |||
from modelscope.metainfo import Metrics | |||
@@ -14,9 +14,9 @@ from .builder import METRICS, MetricKeys | |||
@METRICS.register_module( | |||
group_key=default_group, module_name=Metrics.seq_cls_metric) | |||
class SequenceClassificationMetric(Metric): | |||
"""The metric computation class for sequence classification classes. | |||
"""The metric computation class for sequence classification tasks. | |||
This metric class calculates accuracy for the whole input batches. | |||
This metric class calculates accuracy of the whole input batches. | |||
""" | |||
def __init__(self, *args, **kwargs): | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import torch.nn as nn | |||
from .layer_base import LayerBase | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import numpy as np | |||
import torch as th | |||
import torch.nn as nn | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import numpy as np | |||
import torch as th | |||
import torch.nn as nn | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import abc | |||
import re | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import numpy as np | |||
import torch as th | |||
import torch.nn as nn | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import torch | |||
import torch.nn.functional as F | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import math | |||
import torch | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
@@ -1,9 +1,10 @@ | |||
""" | |||
The implementation of class ComplexConv2d, ComplexConvTranspose2d and ComplexBatchNorm2d | |||
here is modified based on Jongho Choi(sweetcocoa@snu.ac.kr / Seoul National Univ., ESTsoft ) | |||
and publicly available at https://github.com/sweetcocoa/DeepComplexUNetPyTorch | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
# | |||
# The implementation of class ComplexConv2d, ComplexConvTranspose2d and | |||
# ComplexBatchNorm2d here is modified based on Jongho Choi(sweetcocoa@snu.ac.kr | |||
# / Seoul National Univ., ESTsoft ) and publicly available at | |||
# https://github.com/sweetcocoa/DeepComplexUNetPyTorch | |||
""" | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
@@ -1,8 +1,10 @@ | |||
""" | |||
The implementation here is modified based on | |||
Jongho Choi(sweetcocoa@snu.ac.kr / Seoul National Univ., ESTsoft ) | |||
and publicly available at https://github.com/sweetcocoa/DeepComplexUNetPyTorch | |||
""" | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
# | |||
# The implementation here is modified based on | |||
# Jongho Choi(sweetcocoa@snu.ac.kr / Seoul National Univ., ESTsoft ) | |||
# and publicly available at | |||
# https://github.com/sweetcocoa/DeepComplexUNetPyTorch | |||
import torch | |||
import torch.nn as nn | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import numpy as np | |||
import torch | |||
import torch.nn as nn | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import os | |||
from typing import Dict | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import math | |||
import struct | |||
from enum import Enum | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from .base_head import * # noqa F403 | |||
from .base_model import * # noqa F403 | |||
from .base_torch_head import * # noqa F403 | |||
@@ -1,6 +1,6 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from abc import ABC, abstractmethod | |||
from typing import Dict, Union | |||
from typing import Any, Dict, Union | |||
from modelscope.models.base.base_model import Model | |||
from modelscope.utils.config import ConfigDict | |||
@@ -22,25 +22,20 @@ class Head(ABC): | |||
self.config = ConfigDict(kwargs) | |||
@abstractmethod | |||
def forward(self, input: Input) -> Dict[str, Tensor]: | |||
def forward(self, *args, **kwargs) -> Dict[str, Any]: | |||
""" | |||
This method will use the output from backbone model to do any | |||
downstream tasks | |||
Args: | |||
input: The tensor output or a model from backbone model | |||
(text generation need a model as input) | |||
Returns: The output from downstream taks | |||
downstream tasks. Recieve The output from backbone model. | |||
Returns (Dict[str, Any]): The output from downstream task. | |||
""" | |||
pass | |||
@abstractmethod | |||
def compute_loss(self, outputs: Dict[str, Tensor], | |||
labels) -> Dict[str, Tensor]: | |||
def compute_loss(self, *args, **kwargs) -> Dict[str, Any]: | |||
""" | |||
compute loss for head during the finetuning | |||
compute loss for head during the finetuning. | |||
Args: | |||
outputs (Dict[str, Tensor]): the output from the model forward | |||
Returns: the loss(Dict[str, Tensor]): | |||
Returns (Dict[str, Any]): The loss dict | |||
""" | |||
pass |
@@ -2,7 +2,7 @@ | |||
import os | |||
import os.path as osp | |||
from abc import ABC, abstractmethod | |||
from typing import Callable, Dict, List, Optional, Union | |||
from typing import Any, Callable, Dict, List, Optional, Union | |||
from modelscope.hub.snapshot_download import snapshot_download | |||
from modelscope.models.builder import build_model | |||
@@ -10,8 +10,6 @@ from modelscope.utils.checkpoint import save_pretrained | |||
from modelscope.utils.config import Config | |||
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile | |||
from modelscope.utils.device import device_placement, verify_device | |||
from modelscope.utils.file_utils import func_receive_dict_inputs | |||
from modelscope.utils.hub import parse_label_mapping | |||
from modelscope.utils.logger import get_logger | |||
logger = get_logger() | |||
@@ -27,35 +25,31 @@ class Model(ABC): | |||
verify_device(device_name) | |||
self._device_name = device_name | |||
def __call__(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | |||
return self.postprocess(self.forward(input)) | |||
def __call__(self, *args, **kwargs) -> Dict[str, Any]: | |||
return self.postprocess(self.forward(*args, **kwargs)) | |||
@abstractmethod | |||
def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | |||
def forward(self, *args, **kwargs) -> Dict[str, Any]: | |||
""" | |||
Run the forward pass for a model. | |||
Args: | |||
input (Dict[str, Tensor]): the dict of the model inputs for the forward method | |||
Returns: | |||
Dict[str, Tensor]: output from the model forward pass | |||
Dict[str, Any]: output from the model forward pass | |||
""" | |||
pass | |||
def postprocess(self, input: Dict[str, Tensor], | |||
**kwargs) -> Dict[str, Tensor]: | |||
def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: | |||
""" Model specific postprocess and convert model output to | |||
standard model outputs. | |||
Args: | |||
input: input data | |||
inputs: input data | |||
Return: | |||
dict of results: a dict containing outputs of model, each | |||
output should have the standard output name. | |||
""" | |||
return input | |||
return inputs | |||
@classmethod | |||
def _instantiate(cls, **kwargs): | |||
@@ -97,7 +91,6 @@ class Model(ABC): | |||
osp.join(local_model_dir, ModelFile.CONFIGURATION)) | |||
task_name = cfg.task | |||
model_cfg = cfg.model | |||
framework = cfg.framework | |||
if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'): | |||
model_cfg.type = model_cfg.model_type | |||
@@ -107,9 +100,8 @@ class Model(ABC): | |||
model_cfg[k] = v | |||
if device is not None: | |||
model_cfg.device = device | |||
with device_placement(framework, device): | |||
model = build_model( | |||
model_cfg, task_name=task_name, default_args=kwargs) | |||
model = build_model( | |||
model_cfg, task_name=task_name, default_args=kwargs) | |||
else: | |||
model = build_model( | |||
model_cfg, task_name=task_name, default_args=kwargs) | |||
@@ -1,5 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from typing import Dict | |||
from typing import Any, Dict | |||
import torch | |||
@@ -18,10 +18,8 @@ class TorchHead(Head, torch.nn.Module): | |||
super().__init__(**kwargs) | |||
torch.nn.Module.__init__(self) | |||
def forward(self, inputs: Dict[str, | |||
torch.Tensor]) -> Dict[str, torch.Tensor]: | |||
def forward(self, *args, **kwargs) -> Dict[str, Any]: | |||
raise NotImplementedError | |||
def compute_loss(self, outputs: Dict[str, torch.Tensor], | |||
labels) -> Dict[str, torch.Tensor]: | |||
def compute_loss(self, *args, **kwargs) -> Dict[str, Any]: | |||
raise NotImplementedError |
@@ -1,6 +1,6 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from typing import Any, Dict, Optional, Union | |||
from typing import Any, Dict | |||
import torch | |||
from torch import nn | |||
@@ -21,15 +21,14 @@ class TorchModel(Model, torch.nn.Module): | |||
super().__init__(model_dir, *args, **kwargs) | |||
torch.nn.Module.__init__(self) | |||
def __call__(self, input: Dict[str, | |||
torch.Tensor]) -> Dict[str, torch.Tensor]: | |||
def __call__(self, *args, **kwargs) -> Dict[str, Any]: | |||
# Adapting a model with only one dict arg, and the arg name must be input or inputs | |||
if func_receive_dict_inputs(self.forward): | |||
return self.postprocess(self.forward(input)) | |||
return self.postprocess(self.forward(args[0], **kwargs)) | |||
else: | |||
return self.postprocess(self.forward(**input)) | |||
return self.postprocess(self.forward(*args, **kwargs)) | |||
def forward(self, inputs: Dict[str, | |||
torch.Tensor]) -> Dict[str, torch.Tensor]: | |||
def forward(self, *args, **kwargs) -> Dict[str, Any]: | |||
raise NotImplementedError | |||
def post_init(self): | |||
@@ -1,3 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import os | |||
import os.path as osp | |||
import shutil | |||
@@ -4,11 +4,16 @@ from typing import TYPE_CHECKING | |||
from modelscope.utils.import_utils import LazyImportModule | |||
if TYPE_CHECKING: | |||
from .mogface import MogFaceDetector | |||
from .mtcnn import MtcnnFaceDetector | |||
from .retinaface import RetinaFaceDetection | |||
from .ulfd_slim import UlfdFaceDetector | |||
else: | |||
_import_structure = { | |||
'ulfd_slim': ['UlfdFaceDetector'], | |||
'retinaface': ['RetinaFaceDetection'], | |||
'mtcnn': ['MtcnnFaceDetector'], | |||
'mogface': ['MogFaceDetector'] | |||
} | |||
import sys | |||
@@ -1,5 +1,4 @@ | |||
""" | |||
mmdet_patch is based on | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet, | |||
all duplicate functions from official mmdetection are removed. | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet | |||
""" |
@@ -1,3 +1,7 @@ | |||
""" | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox | |||
""" | |||
from .transforms import bbox2result, distance2kps, kps2distance | |||
__all__ = ['bbox2result', 'distance2kps', 'kps2distance'] |
@@ -1,5 +1,6 @@ | |||
""" | |||
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox/transforms.py | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox/transforms.py | |||
""" | |||
import numpy as np | |||
import torch | |||
@@ -1,3 +1,7 @@ | |||
""" | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py | |||
""" | |||
from .bbox_nms import multiclass_nms | |||
__all__ = ['multiclass_nms'] |
@@ -1,5 +1,6 @@ | |||
""" | |||
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py | |||
""" | |||
import torch | |||
@@ -1,3 +1,7 @@ | |||
""" | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets | |||
""" | |||
from .retinaface import RetinaFaceDataset | |||
__all__ = ['RetinaFaceDataset'] |
@@ -1,3 +1,7 @@ | |||
""" | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines | |||
""" | |||
from .transforms import RandomSquareCrop | |||
__all__ = ['RandomSquareCrop'] |
@@ -1,5 +1,6 @@ | |||
""" | |||
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/transforms.py | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/transforms.py | |||
""" | |||
import numpy as np | |||
from mmdet.datasets.builder import PIPELINES | |||
@@ -1,5 +1,6 @@ | |||
""" | |||
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/retinaface.py | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/retinaface.py | |||
""" | |||
import numpy as np | |||
from mmdet.datasets.builder import DATASETS | |||
@@ -1,2 +1,6 @@ | |||
""" | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models | |||
""" | |||
from .dense_heads import * # noqa: F401,F403 | |||
from .detectors import * # noqa: F401,F403 |
@@ -1,3 +1,7 @@ | |||
""" | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones | |||
""" | |||
from .resnet import ResNetV1e | |||
__all__ = ['ResNetV1e'] |
@@ -1,5 +1,6 @@ | |||
""" | |||
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones/resnet.py | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones/resnet.py | |||
""" | |||
import torch.nn as nn | |||
import torch.utils.checkpoint as cp | |||
@@ -1,3 +1,7 @@ | |||
""" | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads | |||
""" | |||
from .scrfd_head import SCRFDHead | |||
__all__ = ['SCRFDHead'] |
@@ -1,5 +1,6 @@ | |||
""" | |||
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py | |||
""" | |||
import numpy as np | |||
import torch | |||
@@ -1,3 +1,7 @@ | |||
""" | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors | |||
""" | |||
from .scrfd import SCRFD | |||
__all__ = ['SCRFD'] |
@@ -1,5 +1,6 @@ | |||
""" | |||
based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors/scrfd.py | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors/scrfd.py | |||
""" | |||
import torch | |||
from mmdet.models.builder import DETECTORS | |||
@@ -0,0 +1 @@ | |||
from .models.detectors import MogFaceDetector |
@@ -0,0 +1,96 @@ | |||
import os | |||
import cv2 | |||
import numpy as np | |||
import torch | |||
import torch.backends.cudnn as cudnn | |||
from modelscope.metainfo import Models | |||
from modelscope.models.base import TorchModel | |||
from modelscope.models.builder import MODELS | |||
from modelscope.utils.constant import Tasks | |||
from .mogface import MogFace | |||
from .utils import MogPriorBox, mogdecode, py_cpu_nms | |||
@MODELS.register_module(Tasks.face_detection, module_name=Models.mogface) | |||
class MogFaceDetector(TorchModel): | |||
def __init__(self, model_path, device='cuda'): | |||
super().__init__(model_path) | |||
torch.set_grad_enabled(False) | |||
cudnn.benchmark = True | |||
self.model_path = model_path | |||
self.device = device | |||
self.net = MogFace() | |||
self.load_model() | |||
self.net = self.net.to(device) | |||
self.mean = np.array([[104, 117, 123]]) | |||
def load_model(self, load_to_cpu=False): | |||
pretrained_dict = torch.load( | |||
self.model_path, map_location=torch.device('cpu')) | |||
self.net.load_state_dict(pretrained_dict, strict=False) | |||
self.net.eval() | |||
def forward(self, input): | |||
img_raw = input['img'] | |||
img = np.array(img_raw.cpu().detach()) | |||
img = img[:, :, ::-1] | |||
im_height, im_width = img.shape[:2] | |||
ss = 1.0 | |||
# tricky | |||
if max(im_height, im_width) > 1500: | |||
ss = 1000.0 / max(im_height, im_width) | |||
img = cv2.resize(img, (0, 0), fx=ss, fy=ss) | |||
im_height, im_width = img.shape[:2] | |||
scale = torch.Tensor( | |||
[img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) | |||
img -= np.array([[103.53, 116.28, 123.675]]) | |||
img /= np.array([[57.375, 57.120003, 58.395]]) | |||
img /= 255 | |||
img = img[:, :, ::-1].copy() | |||
img = img.transpose(2, 0, 1) | |||
img = torch.from_numpy(img).unsqueeze(0) | |||
img = img.to(self.device) | |||
scale = scale.to(self.device) | |||
conf, loc = self.net(img) # forward pass | |||
confidence_threshold = 0.82 | |||
nms_threshold = 0.4 | |||
top_k = 5000 | |||
keep_top_k = 750 | |||
priorbox = MogPriorBox(scale_list=[0.68]) | |||
priors = priorbox(im_height, im_width) | |||
priors = torch.tensor(priors).to(self.device) | |||
prior_data = priors.data | |||
boxes = mogdecode(loc.data.squeeze(0), prior_data) | |||
boxes = boxes.cpu().numpy() | |||
scores = conf.squeeze(0).data.cpu().numpy()[:, 0] | |||
# ignore low scores | |||
inds = np.where(scores > confidence_threshold)[0] | |||
boxes = boxes[inds] | |||
scores = scores[inds] | |||
# keep top-K before NMS | |||
order = scores.argsort()[::-1][:top_k] | |||
boxes = boxes[order] | |||
scores = scores[order] | |||
# do NMS | |||
dets = np.hstack((boxes, scores[:, np.newaxis])).astype( | |||
np.float32, copy=False) | |||
keep = py_cpu_nms(dets, nms_threshold) | |||
dets = dets[keep, :] | |||
# keep top-K faster NMS | |||
dets = dets[:keep_top_k, :] | |||
return dets / ss |
@@ -0,0 +1,135 @@ | |||
# -------------------------------------------------------- | |||
# The implementation is also open-sourced by the authors as Yang Liu, and is available publicly on | |||
# https://github.com/damo-cv/MogFace | |||
# -------------------------------------------------------- | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
from .mogprednet import MogPredNet | |||
from .resnet import ResNet | |||
class MogFace(nn.Module): | |||
def __init__(self): | |||
super(MogFace, self).__init__() | |||
self.backbone = ResNet(depth=101) | |||
self.fpn = LFPN() | |||
self.pred_net = MogPredNet() | |||
def forward(self, x): | |||
feature_list = self.backbone(x) | |||
fpn_list = self.fpn(feature_list) | |||
pyramid_feature_list = fpn_list[0] | |||
conf, loc = self.pred_net(pyramid_feature_list) | |||
return conf, loc | |||
class FeatureFusion(nn.Module): | |||
def __init__(self, lat_ch=256, **channels): | |||
super(FeatureFusion, self).__init__() | |||
self.main_conv = nn.Conv2d(channels['main'], lat_ch, kernel_size=1) | |||
def forward(self, up, main): | |||
main = self.main_conv(main) | |||
_, _, H, W = main.size() | |||
res = F.upsample(up, scale_factor=2, mode='bilinear') | |||
if res.size(2) != main.size(2) or res.size(3) != main.size(3): | |||
res = res[:, :, 0:H, 0:W] | |||
res = res + main | |||
return res | |||
class LFPN(nn.Module): | |||
def __init__(self, | |||
c2_out_ch=256, | |||
c3_out_ch=512, | |||
c4_out_ch=1024, | |||
c5_out_ch=2048, | |||
c6_mid_ch=512, | |||
c6_out_ch=512, | |||
c7_mid_ch=128, | |||
c7_out_ch=256, | |||
out_dsfd_ft=True): | |||
super(LFPN, self).__init__() | |||
self.out_dsfd_ft = out_dsfd_ft | |||
if self.out_dsfd_ft: | |||
dsfd_module = [] | |||
dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1)) | |||
dsfd_module.append(nn.Conv2d(512, 256, kernel_size=3, padding=1)) | |||
dsfd_module.append(nn.Conv2d(1024, 256, kernel_size=3, padding=1)) | |||
dsfd_module.append(nn.Conv2d(2048, 256, kernel_size=3, padding=1)) | |||
dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1)) | |||
dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1)) | |||
self.dsfd_modules = nn.ModuleList(dsfd_module) | |||
c6_input_ch = c5_out_ch | |||
self.c6 = nn.Sequential(*[ | |||
nn.Conv2d( | |||
c6_input_ch, | |||
c6_mid_ch, | |||
kernel_size=1, | |||
), | |||
nn.BatchNorm2d(c6_mid_ch), | |||
nn.ReLU(inplace=True), | |||
nn.Conv2d( | |||
c6_mid_ch, c6_out_ch, kernel_size=3, padding=1, stride=2), | |||
nn.BatchNorm2d(c6_out_ch), | |||
nn.ReLU(inplace=True) | |||
]) | |||
self.c7 = nn.Sequential(*[ | |||
nn.Conv2d( | |||
c6_out_ch, | |||
c7_mid_ch, | |||
kernel_size=1, | |||
), | |||
nn.BatchNorm2d(c7_mid_ch), | |||
nn.ReLU(inplace=True), | |||
nn.Conv2d( | |||
c7_mid_ch, c7_out_ch, kernel_size=3, padding=1, stride=2), | |||
nn.BatchNorm2d(c7_out_ch), | |||
nn.ReLU(inplace=True) | |||
]) | |||
self.p2_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1) | |||
self.p3_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1) | |||
self.p4_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1) | |||
self.c5_lat = nn.Conv2d(c6_input_ch, 256, kernel_size=3, padding=1) | |||
self.c6_lat = nn.Conv2d(c6_out_ch, 256, kernel_size=3, padding=1) | |||
self.c7_lat = nn.Conv2d(c7_out_ch, 256, kernel_size=3, padding=1) | |||
self.ff_c5_c4 = FeatureFusion(main=c4_out_ch) | |||
self.ff_c4_c3 = FeatureFusion(main=c3_out_ch) | |||
self.ff_c3_c2 = FeatureFusion(main=c2_out_ch) | |||
def forward(self, feature_list): | |||
c2, c3, c4, c5 = feature_list | |||
c6 = self.c6(c5) | |||
c7 = self.c7(c6) | |||
c5 = self.c5_lat(c5) | |||
c6 = self.c6_lat(c6) | |||
c7 = self.c7_lat(c7) | |||
if self.out_dsfd_ft: | |||
dsfd_fts = [] | |||
dsfd_fts.append(self.dsfd_modules[0](c2)) | |||
dsfd_fts.append(self.dsfd_modules[1](c3)) | |||
dsfd_fts.append(self.dsfd_modules[2](c4)) | |||
dsfd_fts.append(self.dsfd_modules[3](feature_list[-1])) | |||
dsfd_fts.append(self.dsfd_modules[4](c6)) | |||
dsfd_fts.append(self.dsfd_modules[5](c7)) | |||
p4 = self.ff_c5_c4(c5, c4) | |||
p3 = self.ff_c4_c3(p4, c3) | |||
p2 = self.ff_c3_c2(p3, c2) | |||
p2 = self.p2_lat(p2) | |||
p3 = self.p3_lat(p3) | |||
p4 = self.p4_lat(p4) | |||
if self.out_dsfd_ft: | |||
return ([p2, p3, p4, c5, c6, c7], dsfd_fts) |
@@ -0,0 +1,164 @@ | |||
# -------------------------------------------------------- | |||
# The implementation is also open-sourced by the authors as Yang Liu, and is available publicly on | |||
# https://github.com/damo-cv/MogFace | |||
# -------------------------------------------------------- | |||
import math | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
class conv_bn(nn.Module): | |||
"""docstring for conv""" | |||
def __init__(self, in_plane, out_plane, kernel_size, stride, padding): | |||
super(conv_bn, self).__init__() | |||
self.conv1 = nn.Conv2d( | |||
in_plane, | |||
out_plane, | |||
kernel_size=kernel_size, | |||
stride=stride, | |||
padding=padding) | |||
self.bn1 = nn.BatchNorm2d(out_plane) | |||
def forward(self, x): | |||
x = self.conv1(x) | |||
return self.bn1(x) | |||
class SSHContext(nn.Module): | |||
def __init__(self, channels, Xchannels=256): | |||
super(SSHContext, self).__init__() | |||
self.conv1 = nn.Conv2d( | |||
channels, Xchannels, kernel_size=3, stride=1, padding=1) | |||
self.conv2 = nn.Conv2d( | |||
channels, | |||
Xchannels // 2, | |||
kernel_size=3, | |||
dilation=2, | |||
stride=1, | |||
padding=2) | |||
self.conv2_1 = nn.Conv2d( | |||
Xchannels // 2, Xchannels // 2, kernel_size=3, stride=1, padding=1) | |||
self.conv2_2 = nn.Conv2d( | |||
Xchannels // 2, | |||
Xchannels // 2, | |||
kernel_size=3, | |||
dilation=2, | |||
stride=1, | |||
padding=2) | |||
self.conv2_2_1 = nn.Conv2d( | |||
Xchannels // 2, Xchannels // 2, kernel_size=3, stride=1, padding=1) | |||
def forward(self, x): | |||
x1 = F.relu(self.conv1(x), inplace=True) | |||
x2 = F.relu(self.conv2(x), inplace=True) | |||
x2_1 = F.relu(self.conv2_1(x2), inplace=True) | |||
x2_2 = F.relu(self.conv2_2(x2), inplace=True) | |||
x2_2 = F.relu(self.conv2_2_1(x2_2), inplace=True) | |||
return torch.cat([x1, x2_1, x2_2], 1) | |||
class DeepHead(nn.Module): | |||
def __init__(self, | |||
in_channel=256, | |||
out_channel=256, | |||
use_gn=False, | |||
num_conv=4): | |||
super(DeepHead, self).__init__() | |||
self.use_gn = use_gn | |||
self.num_conv = num_conv | |||
self.conv1 = nn.Conv2d(in_channel, out_channel, 3, 1, 1) | |||
self.conv2 = nn.Conv2d(out_channel, out_channel, 3, 1, 1) | |||
self.conv3 = nn.Conv2d(out_channel, out_channel, 3, 1, 1) | |||
self.conv4 = nn.Conv2d(out_channel, out_channel, 3, 1, 1) | |||
if self.use_gn: | |||
self.gn1 = nn.GroupNorm(16, out_channel) | |||
self.gn2 = nn.GroupNorm(16, out_channel) | |||
self.gn3 = nn.GroupNorm(16, out_channel) | |||
self.gn4 = nn.GroupNorm(16, out_channel) | |||
def forward(self, x): | |||
if self.use_gn: | |||
x1 = F.relu(self.gn1(self.conv1(x)), inplace=True) | |||
x2 = F.relu(self.gn2(self.conv1(x1)), inplace=True) | |||
x3 = F.relu(self.gn3(self.conv1(x2)), inplace=True) | |||
x4 = F.relu(self.gn4(self.conv1(x3)), inplace=True) | |||
else: | |||
x1 = F.relu(self.conv1(x), inplace=True) | |||
x2 = F.relu(self.conv1(x1), inplace=True) | |||
if self.num_conv == 2: | |||
return x2 | |||
x3 = F.relu(self.conv1(x2), inplace=True) | |||
x4 = F.relu(self.conv1(x3), inplace=True) | |||
return x4 | |||
class MogPredNet(nn.Module): | |||
def __init__(self, | |||
num_anchor_per_pixel=1, | |||
num_classes=1, | |||
input_ch_list=[256, 256, 256, 256, 256, 256], | |||
use_deep_head=True, | |||
deep_head_with_gn=True, | |||
use_ssh=True, | |||
deep_head_ch=512): | |||
super(MogPredNet, self).__init__() | |||
self.num_classes = num_classes | |||
self.use_deep_head = use_deep_head | |||
self.deep_head_with_gn = deep_head_with_gn | |||
self.use_ssh = use_ssh | |||
self.deep_head_ch = deep_head_ch | |||
if self.use_ssh: | |||
self.conv_SSH = SSHContext(input_ch_list[0], | |||
self.deep_head_ch // 2) | |||
if self.use_deep_head: | |||
if self.deep_head_with_gn: | |||
self.deep_loc_head = DeepHead( | |||
self.deep_head_ch, self.deep_head_ch, use_gn=True) | |||
self.deep_cls_head = DeepHead( | |||
self.deep_head_ch, self.deep_head_ch, use_gn=True) | |||
self.pred_cls = nn.Conv2d(self.deep_head_ch, | |||
1 * num_anchor_per_pixel, 3, 1, 1) | |||
self.pred_loc = nn.Conv2d(self.deep_head_ch, | |||
4 * num_anchor_per_pixel, 3, 1, 1) | |||
self.sigmoid = nn.Sigmoid() | |||
def forward(self, pyramid_feature_list, dsfd_ft_list=None): | |||
loc = [] | |||
conf = [] | |||
if self.use_deep_head: | |||
for x in pyramid_feature_list: | |||
if self.use_ssh: | |||
x = self.conv_SSH(x) | |||
x_cls = self.deep_cls_head(x) | |||
x_loc = self.deep_loc_head(x) | |||
conf.append( | |||
self.pred_cls(x_cls).permute(0, 2, 3, 1).contiguous()) | |||
loc.append( | |||
self.pred_loc(x_loc).permute(0, 2, 3, 1).contiguous()) | |||
loc = torch.cat([o.view(o.size(0), -1, 4) for o in loc], 1) | |||
conf = torch.cat( | |||
[o.view(o.size(0), -1, self.num_classes) for o in conf], 1) | |||
output = ( | |||
self.sigmoid(conf.view(conf.size(0), -1, self.num_classes)), | |||
loc.view(loc.size(0), -1, 4), | |||
) | |||
return output |
@@ -0,0 +1,193 @@ | |||
# The implementation is modified from original resent implementaiton, which is | |||
# also open-sourced by the authors as Yang Liu, | |||
# and is available publicly on https://github.com/damo-cv/MogFace | |||
import torch.nn as nn | |||
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): | |||
"""3x3 convolution with padding""" | |||
return nn.Conv2d( | |||
in_planes, | |||
out_planes, | |||
kernel_size=3, | |||
stride=stride, | |||
padding=dilation, | |||
groups=groups, | |||
bias=False, | |||
dilation=dilation) | |||
def conv1x1(in_planes, out_planes, stride=1): | |||
"""1x1 convolution""" | |||
return nn.Conv2d( | |||
in_planes, out_planes, kernel_size=1, stride=stride, bias=False) | |||
class Bottleneck(nn.Module): | |||
expansion = 4 | |||
def __init__(self, | |||
inplanes, | |||
planes, | |||
stride=1, | |||
downsample=None, | |||
groups=1, | |||
base_width=64, | |||
dilation=1, | |||
norm_layer=None): | |||
super(Bottleneck, self).__init__() | |||
if norm_layer is None: | |||
norm_layer = nn.BatchNorm2d | |||
width = int(planes * (base_width / 64.)) * groups | |||
# Both self.conv2 and self.downsample layers downsample the input when stride != 1 | |||
self.conv1 = conv1x1(inplanes, width) | |||
self.bn1 = norm_layer(width) | |||
self.conv2 = conv3x3(width, width, stride, groups, dilation) | |||
self.bn2 = norm_layer(width) | |||
self.conv3 = conv1x1(width, planes * self.expansion) | |||
self.bn3 = norm_layer(planes * self.expansion) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
identity = x | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.relu(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
out = self.relu(out) | |||
out = self.conv3(out) | |||
out = self.bn3(out) | |||
if self.downsample is not None: | |||
identity = self.downsample(x) | |||
out += identity | |||
out = self.relu(out) | |||
return out | |||
class ResNet(nn.Module): | |||
def __init__(self, | |||
depth=50, | |||
groups=1, | |||
width_per_group=64, | |||
replace_stride_with_dilation=None, | |||
norm_layer=None, | |||
inplanes=64, | |||
shrink_ch_ratio=1): | |||
super(ResNet, self).__init__() | |||
if norm_layer is None: | |||
norm_layer = nn.BatchNorm2d | |||
self._norm_layer = norm_layer | |||
if depth == 50: | |||
block = Bottleneck | |||
layers = [3, 4, 6, 3] | |||
elif depth == 101: | |||
block = Bottleneck | |||
layers = [3, 4, 23, 3] | |||
elif depth == 152: | |||
block = Bottleneck | |||
layers = [3, 4, 36, 3] | |||
elif depth == 18: | |||
block = BasicBlock | |||
layers = [2, 2, 2, 2] | |||
else: | |||
raise ValueError('only support depth in [18, 50, 101, 152]') | |||
shrink_input_ch = int(inplanes * shrink_ch_ratio) | |||
self.inplanes = int(inplanes * shrink_ch_ratio) | |||
if shrink_ch_ratio == 0.125: | |||
layers = [2, 3, 3, 3] | |||
self.dilation = 1 | |||
if replace_stride_with_dilation is None: | |||
# each element in the tuple indicates if we should replace | |||
# the 2x2 stride with a dilated convolution instead | |||
replace_stride_with_dilation = [False, False, False] | |||
if len(replace_stride_with_dilation) != 3: | |||
raise ValueError('replace_stride_with_dilation should be None ' | |||
'or a 3-element tuple, got {}'.format( | |||
replace_stride_with_dilation)) | |||
self.groups = groups | |||
self.base_width = width_per_group | |||
self.conv1 = nn.Conv2d( | |||
3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) | |||
self.bn1 = norm_layer(self.inplanes) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
self.layer1 = self._make_layer(block, shrink_input_ch, layers[0]) | |||
self.layer2 = self._make_layer( | |||
block, | |||
shrink_input_ch * 2, | |||
layers[1], | |||
stride=2, | |||
dilate=replace_stride_with_dilation[0]) | |||
self.layer3 = self._make_layer( | |||
block, | |||
shrink_input_ch * 4, | |||
layers[2], | |||
stride=2, | |||
dilate=replace_stride_with_dilation[1]) | |||
self.layer4 = self._make_layer( | |||
block, | |||
shrink_input_ch * 8, | |||
layers[3], | |||
stride=2, | |||
dilate=replace_stride_with_dilation[2]) | |||
def _make_layer(self, block, planes, blocks, stride=1, dilate=False): | |||
norm_layer = self._norm_layer | |||
downsample = None | |||
previous_dilation = self.dilation | |||
if dilate: | |||
self.dilation *= stride | |||
stride = 1 | |||
if stride != 1 or self.inplanes != planes * block.expansion: | |||
downsample = nn.Sequential( | |||
conv1x1(self.inplanes, planes * block.expansion, stride), | |||
norm_layer(planes * block.expansion), | |||
) | |||
layers = [] | |||
layers.append( | |||
block(self.inplanes, planes, stride, downsample, self.groups, | |||
self.base_width, previous_dilation, norm_layer)) | |||
self.inplanes = planes * block.expansion | |||
for _ in range(1, blocks): | |||
layers.append( | |||
block( | |||
self.inplanes, | |||
planes, | |||
groups=self.groups, | |||
base_width=self.base_width, | |||
dilation=self.dilation, | |||
norm_layer=norm_layer)) | |||
return nn.Sequential(*layers) | |||
def forward(self, x): | |||
x = self.conv1(x) | |||
x = self.bn1(x) | |||
x = self.relu(x) | |||
x = self.maxpool(x) | |||
four_conv_layer = [] | |||
x = self.layer1(x) | |||
four_conv_layer.append(x) | |||
x = self.layer2(x) | |||
four_conv_layer.append(x) | |||
x = self.layer3(x) | |||
four_conv_layer.append(x) | |||
x = self.layer4(x) | |||
four_conv_layer.append(x) | |||
return four_conv_layer |
@@ -0,0 +1,212 @@ | |||
# Modified from https://github.com/biubug6/Pytorch_Retinaface | |||
import math | |||
from itertools import product as product | |||
from math import ceil | |||
import numpy as np | |||
import torch | |||
def transform_anchor(anchors): | |||
""" | |||
from [x0, x1, y0, y1] to [c_x, cy, w, h] | |||
x1 = x0 + w - 1 | |||
c_x = (x0 + x1) / 2 = (2x0 + w - 1) / 2 = x0 + (w - 1) / 2 | |||
""" | |||
return np.concatenate(((anchors[:, :2] + anchors[:, 2:]) / 2, | |||
anchors[:, 2:] - anchors[:, :2] + 1), | |||
axis=1) | |||
def normalize_anchor(anchors): | |||
""" | |||
from [c_x, cy, w, h] to [x0, x1, y0, y1] | |||
""" | |||
item_1 = anchors[:, :2] - (anchors[:, 2:] - 1) / 2 | |||
item_2 = anchors[:, :2] + (anchors[:, 2:] - 1) / 2 | |||
return np.concatenate((item_1, item_2), axis=1) | |||
class MogPriorBox(object): | |||
""" | |||
both for fpn and single layer, single layer need to test | |||
return (np.array) [num_anchros, 4] [x0, y0, x1, y1] | |||
""" | |||
def __init__(self, | |||
scale_list=[1.], | |||
aspect_ratio_list=[1.0], | |||
stride_list=[4, 8, 16, 32, 64, 128], | |||
anchor_size_list=[16, 32, 64, 128, 256, 512]): | |||
self.scale_list = scale_list | |||
self.aspect_ratio_list = aspect_ratio_list | |||
self.stride_list = stride_list | |||
self.anchor_size_list = anchor_size_list | |||
def __call__(self, img_height, img_width): | |||
final_anchor_list = [] | |||
for idx, stride in enumerate(self.stride_list): | |||
anchor_list = [] | |||
cur_img_height = img_height | |||
cur_img_width = img_width | |||
tmp_stride = stride | |||
while tmp_stride != 1: | |||
tmp_stride = tmp_stride // 2 | |||
cur_img_height = (cur_img_height + 1) // 2 | |||
cur_img_width = (cur_img_width + 1) // 2 | |||
for i in range(cur_img_height): | |||
for j in range(cur_img_width): | |||
for scale in self.scale_list: | |||
cx = (j + 0.5) * stride | |||
cy = (i + 0.5) * stride | |||
side_x = self.anchor_size_list[idx] * scale | |||
side_y = self.anchor_size_list[idx] * scale | |||
for ratio in self.aspect_ratio_list: | |||
anchor_list.append([ | |||
cx, cy, side_x / math.sqrt(ratio), | |||
side_y * math.sqrt(ratio) | |||
]) | |||
final_anchor_list.append(anchor_list) | |||
final_anchor_arr = np.concatenate(final_anchor_list, axis=0) | |||
normalized_anchor_arr = normalize_anchor(final_anchor_arr).astype( | |||
'float32') | |||
transformed_anchor = transform_anchor(normalized_anchor_arr) | |||
return transformed_anchor | |||
class PriorBox(object): | |||
def __init__(self, cfg, image_size=None, phase='train'): | |||
super(PriorBox, self).__init__() | |||
self.min_sizes = cfg['min_sizes'] | |||
self.steps = cfg['steps'] | |||
self.clip = cfg['clip'] | |||
self.image_size = image_size | |||
self.feature_maps = [[ | |||
ceil(self.image_size[0] / step), | |||
ceil(self.image_size[1] / step) | |||
] for step in self.steps] | |||
self.name = 's' | |||
def forward(self): | |||
anchors = [] | |||
for k, f in enumerate(self.feature_maps): | |||
min_sizes = self.min_sizes[k] | |||
for i, j in product(range(f[0]), range(f[1])): | |||
for min_size in min_sizes: | |||
s_kx = min_size / self.image_size[1] | |||
s_ky = min_size / self.image_size[0] | |||
dense_cx = [ | |||
x * self.steps[k] / self.image_size[1] | |||
for x in [j + 0.5] | |||
] | |||
dense_cy = [ | |||
y * self.steps[k] / self.image_size[0] | |||
for y in [i + 0.5] | |||
] | |||
for cy, cx in product(dense_cy, dense_cx): | |||
anchors += [cx, cy, s_kx, s_ky] | |||
# back to torch land | |||
output = torch.Tensor(anchors).view(-1, 4) | |||
if self.clip: | |||
output.clamp_(max=1, min=0) | |||
return output | |||
def py_cpu_nms(dets, thresh): | |||
"""Pure Python NMS baseline.""" | |||
x1 = dets[:, 0] | |||
y1 = dets[:, 1] | |||
x2 = dets[:, 2] | |||
y2 = dets[:, 3] | |||
scores = dets[:, 4] | |||
areas = (x2 - x1 + 1) * (y2 - y1 + 1) | |||
order = scores.argsort()[::-1] | |||
keep = [] | |||
while order.size > 0: | |||
i = order[0] | |||
keep.append(i) | |||
xx1 = np.maximum(x1[i], x1[order[1:]]) | |||
yy1 = np.maximum(y1[i], y1[order[1:]]) | |||
xx2 = np.minimum(x2[i], x2[order[1:]]) | |||
yy2 = np.minimum(y2[i], y2[order[1:]]) | |||
w = np.maximum(0.0, xx2 - xx1 + 1) | |||
h = np.maximum(0.0, yy2 - yy1 + 1) | |||
inter = w * h | |||
ovr = inter / (areas[i] + areas[order[1:]] - inter) | |||
inds = np.where(ovr <= thresh)[0] | |||
order = order[inds + 1] | |||
return keep | |||
def mogdecode(loc, anchors): | |||
""" | |||
loc: torch.Tensor | |||
anchors: 2-d, torch.Tensor (cx, cy, w, h) | |||
boxes: 2-d, torch.Tensor (x0, y0, x1, y1) | |||
""" | |||
boxes = torch.cat((anchors[:, :2] + loc[:, :2] * anchors[:, 2:], | |||
anchors[:, 2:] * torch.exp(loc[:, 2:])), 1) | |||
boxes[:, 0] -= (boxes[:, 2] - 1) / 2 | |||
boxes[:, 1] -= (boxes[:, 3] - 1) / 2 | |||
boxes[:, 2] += boxes[:, 0] - 1 | |||
boxes[:, 3] += boxes[:, 1] - 1 | |||
return boxes | |||
# Adapted from https://github.com/Hakuyume/chainer-ssd | |||
def decode(loc, priors, variances): | |||
"""Decode locations from predictions using priors to undo | |||
the encoding we did for offset regression at train time. | |||
Args: | |||
loc (tensor): location predictions for loc layers, | |||
Shape: [num_priors,4] | |||
priors (tensor): Prior boxes in center-offset form. | |||
Shape: [num_priors,4]. | |||
variances: (list[float]) Variances of priorboxes | |||
Return: | |||
decoded bounding box predictions | |||
""" | |||
boxes = torch.cat( | |||
(priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], | |||
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) | |||
boxes[:, :2] -= boxes[:, 2:] / 2 | |||
boxes[:, 2:] += boxes[:, :2] | |||
return boxes | |||
def decode_landm(pre, priors, variances): | |||
"""Decode landm from predictions using priors to undo | |||
the encoding we did for offset regression at train time. | |||
Args: | |||
pre (tensor): landm predictions for loc layers, | |||
Shape: [num_priors,10] | |||
priors (tensor): Prior boxes in center-offset form. | |||
Shape: [num_priors,4]. | |||
variances: (list[float]) Variances of priorboxes | |||
Return: | |||
decoded landm predictions | |||
""" | |||
a = priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:] | |||
b = priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:] | |||
c = priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:] | |||
d = priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:] | |||
e = priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:] | |||
landms = torch.cat((a, b, c, d, e), dim=1) | |||
return landms |
@@ -0,0 +1 @@ | |||
from .models.detector import MtcnnFaceDetector |
@@ -0,0 +1,240 @@ | |||
# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch | |||
import numpy as np | |||
from PIL import Image | |||
def nms(boxes, overlap_threshold=0.5, mode='union'): | |||
"""Non-maximum suppression. | |||
Arguments: | |||
boxes: a float numpy array of shape [n, 5], | |||
where each row is (xmin, ymin, xmax, ymax, score). | |||
overlap_threshold: a float number. | |||
mode: 'union' or 'min'. | |||
Returns: | |||
list with indices of the selected boxes | |||
""" | |||
# if there are no boxes, return the empty list | |||
if len(boxes) == 0: | |||
return [] | |||
# list of picked indices | |||
pick = [] | |||
# grab the coordinates of the bounding boxes | |||
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] | |||
area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0) | |||
ids = np.argsort(score) # in increasing order | |||
while len(ids) > 0: | |||
# grab index of the largest value | |||
last = len(ids) - 1 | |||
i = ids[last] | |||
pick.append(i) | |||
# compute intersections | |||
# of the box with the largest score | |||
# with the rest of boxes | |||
# left top corner of intersection boxes | |||
ix1 = np.maximum(x1[i], x1[ids[:last]]) | |||
iy1 = np.maximum(y1[i], y1[ids[:last]]) | |||
# right bottom corner of intersection boxes | |||
ix2 = np.minimum(x2[i], x2[ids[:last]]) | |||
iy2 = np.minimum(y2[i], y2[ids[:last]]) | |||
# width and height of intersection boxes | |||
w = np.maximum(0.0, ix2 - ix1 + 1.0) | |||
h = np.maximum(0.0, iy2 - iy1 + 1.0) | |||
# intersections' areas | |||
inter = w * h | |||
if mode == 'min': | |||
overlap = inter / np.minimum(area[i], area[ids[:last]]) | |||
elif mode == 'union': | |||
# intersection over union (IoU) | |||
overlap = inter / (area[i] + area[ids[:last]] - inter) | |||
# delete all boxes where overlap is too big | |||
ids = np.delete( | |||
ids, | |||
np.concatenate([[last], | |||
np.where(overlap > overlap_threshold)[0]])) | |||
return pick | |||
def convert_to_square(bboxes): | |||
"""Convert bounding boxes to a square form. | |||
Arguments: | |||
bboxes: a float numpy array of shape [n, 5]. | |||
Returns: | |||
a float numpy array of shape [n, 5], | |||
squared bounding boxes. | |||
""" | |||
square_bboxes = np.zeros_like(bboxes) | |||
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] | |||
h = y2 - y1 + 1.0 | |||
w = x2 - x1 + 1.0 | |||
max_side = np.maximum(h, w) | |||
square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5 | |||
square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5 | |||
square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 | |||
square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 | |||
return square_bboxes | |||
def calibrate_box(bboxes, offsets): | |||
"""Transform bounding boxes to be more like true bounding boxes. | |||
'offsets' is one of the outputs of the nets. | |||
Arguments: | |||
bboxes: a float numpy array of shape [n, 5]. | |||
offsets: a float numpy array of shape [n, 4]. | |||
Returns: | |||
a float numpy array of shape [n, 5]. | |||
""" | |||
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] | |||
w = x2 - x1 + 1.0 | |||
h = y2 - y1 + 1.0 | |||
w = np.expand_dims(w, 1) | |||
h = np.expand_dims(h, 1) | |||
# this is what happening here: | |||
# tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] | |||
# x1_true = x1 + tx1*w | |||
# y1_true = y1 + ty1*h | |||
# x2_true = x2 + tx2*w | |||
# y2_true = y2 + ty2*h | |||
# below is just more compact form of this | |||
# are offsets always such that | |||
# x1 < x2 and y1 < y2 ? | |||
translation = np.hstack([w, h, w, h]) * offsets | |||
bboxes[:, 0:4] = bboxes[:, 0:4] + translation | |||
return bboxes | |||
def get_image_boxes(bounding_boxes, img, size=24): | |||
"""Cut out boxes from the image. | |||
Arguments: | |||
bounding_boxes: a float numpy array of shape [n, 5]. | |||
img: an instance of PIL.Image. | |||
size: an integer, size of cutouts. | |||
Returns: | |||
a float numpy array of shape [n, 3, size, size]. | |||
""" | |||
num_boxes = len(bounding_boxes) | |||
width, height = img.size | |||
[dy, edy, dx, edx, y, ey, x, ex, w, | |||
h] = correct_bboxes(bounding_boxes, width, height) | |||
img_boxes = np.zeros((num_boxes, 3, size, size), 'float32') | |||
for i in range(num_boxes): | |||
img_box = np.zeros((h[i], w[i], 3), 'uint8') | |||
img_array = np.asarray(img, 'uint8') | |||
img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\ | |||
img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] | |||
# resize | |||
img_box = Image.fromarray(img_box) | |||
img_box = img_box.resize((size, size), Image.BILINEAR) | |||
img_box = np.asarray(img_box, 'float32') | |||
img_boxes[i, :, :, :] = _preprocess(img_box) | |||
return img_boxes | |||
def correct_bboxes(bboxes, width, height): | |||
"""Crop boxes that are too big and get coordinates | |||
with respect to cutouts. | |||
Arguments: | |||
bboxes: a float numpy array of shape [n, 5], | |||
where each row is (xmin, ymin, xmax, ymax, score). | |||
width: a float number. | |||
height: a float number. | |||
Returns: | |||
dy, dx, edy, edx: a int numpy arrays of shape [n], | |||
coordinates of the boxes with respect to the cutouts. | |||
y, x, ey, ex: a int numpy arrays of shape [n], | |||
corrected ymin, xmin, ymax, xmax. | |||
h, w: a int numpy arrays of shape [n], | |||
just heights and widths of boxes. | |||
in the following order: | |||
[dy, edy, dx, edx, y, ey, x, ex, w, h]. | |||
""" | |||
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] | |||
w, h = x2 - x1 + 1.0, y2 - y1 + 1.0 | |||
num_boxes = bboxes.shape[0] | |||
# 'e' stands for end | |||
# (x, y) -> (ex, ey) | |||
x, y, ex, ey = x1, y1, x2, y2 | |||
# we need to cut out a box from the image. | |||
# (x, y, ex, ey) are corrected coordinates of the box | |||
# in the image. | |||
# (dx, dy, edx, edy) are coordinates of the box in the cutout | |||
# from the image. | |||
dx, dy = np.zeros((num_boxes, )), np.zeros((num_boxes, )) | |||
edx, edy = w.copy() - 1.0, h.copy() - 1.0 | |||
# if box's bottom right corner is too far right | |||
ind = np.where(ex > width - 1.0)[0] | |||
edx[ind] = w[ind] + width - 2.0 - ex[ind] | |||
ex[ind] = width - 1.0 | |||
# if box's bottom right corner is too low | |||
ind = np.where(ey > height - 1.0)[0] | |||
edy[ind] = h[ind] + height - 2.0 - ey[ind] | |||
ey[ind] = height - 1.0 | |||
# if box's top left corner is too far left | |||
ind = np.where(x < 0.0)[0] | |||
dx[ind] = 0.0 - x[ind] | |||
x[ind] = 0.0 | |||
# if box's top left corner is too high | |||
ind = np.where(y < 0.0)[0] | |||
dy[ind] = 0.0 - y[ind] | |||
y[ind] = 0.0 | |||
return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h] | |||
return_list = [i.astype('int32') for i in return_list] | |||
return return_list | |||
def _preprocess(img): | |||
"""Preprocessing step before feeding the network. | |||
Arguments: | |||
img: a float numpy array of shape [h, w, c]. | |||
Returns: | |||
a float numpy array of shape [1, c, h, w]. | |||
""" | |||
img = img.transpose((2, 0, 1)) | |||
img = np.expand_dims(img, 0) | |||
img = (img - 127.5) * 0.0078125 | |||
return img |
@@ -0,0 +1,149 @@ | |||
# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch | |||
import os | |||
import numpy as np | |||
import torch | |||
import torch.backends.cudnn as cudnn | |||
from PIL import Image | |||
from torch.autograd import Variable | |||
from modelscope.metainfo import Models | |||
from modelscope.models.base import TorchModel | |||
from modelscope.models.builder import MODELS | |||
from modelscope.utils.constant import Tasks | |||
from .box_utils import calibrate_box, convert_to_square, get_image_boxes, nms | |||
from .first_stage import run_first_stage | |||
from .get_nets import ONet, PNet, RNet | |||
@MODELS.register_module(Tasks.face_detection, module_name=Models.mtcnn) | |||
class MtcnnFaceDetector(TorchModel): | |||
def __init__(self, model_path, device='cuda'): | |||
super().__init__(model_path) | |||
torch.set_grad_enabled(False) | |||
cudnn.benchmark = True | |||
self.model_path = model_path | |||
self.device = device | |||
self.pnet = PNet(model_path=os.path.join(self.model_path, 'pnet.npy')) | |||
self.rnet = RNet(model_path=os.path.join(self.model_path, 'rnet.npy')) | |||
self.onet = ONet(model_path=os.path.join(self.model_path, 'onet.npy')) | |||
self.pnet = self.pnet.to(device) | |||
self.rnet = self.rnet.to(device) | |||
self.onet = self.onet.to(device) | |||
def forward(self, input): | |||
image = Image.fromarray(np.uint8(input['img'].cpu().numpy())) | |||
pnet = self.pnet | |||
rnet = self.rnet | |||
onet = self.onet | |||
onet.eval() | |||
min_face_size = 20.0 | |||
thresholds = [0.7, 0.8, 0.9] | |||
nms_thresholds = [0.7, 0.7, 0.7] | |||
# BUILD AN IMAGE PYRAMID | |||
width, height = image.size | |||
min_length = min(height, width) | |||
min_detection_size = 12 | |||
factor = 0.707 # sqrt(0.5) | |||
# scales for scaling the image | |||
scales = [] | |||
m = min_detection_size / min_face_size | |||
min_length *= m | |||
factor_count = 0 | |||
while min_length > min_detection_size: | |||
scales.append(m * factor**factor_count) | |||
min_length *= factor | |||
factor_count += 1 | |||
# STAGE 1 | |||
# it will be returned | |||
bounding_boxes = [] | |||
# run P-Net on different scales | |||
for s in scales: | |||
boxes = run_first_stage( | |||
image, | |||
pnet, | |||
scale=s, | |||
threshold=thresholds[0], | |||
device=self.device) | |||
bounding_boxes.append(boxes) | |||
# collect boxes (and offsets, and scores) from different scales | |||
bounding_boxes = [i for i in bounding_boxes if i is not None] | |||
bounding_boxes = np.vstack(bounding_boxes) | |||
keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) | |||
bounding_boxes = bounding_boxes[keep] | |||
# use offsets predicted by pnet to transform bounding boxes | |||
bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], | |||
bounding_boxes[:, 5:]) | |||
# shape [n_boxes, 5] | |||
bounding_boxes = convert_to_square(bounding_boxes) | |||
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) | |||
# STAGE 2 | |||
img_boxes = get_image_boxes(bounding_boxes, image, size=24) | |||
img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True) | |||
output = rnet(img_boxes.to(self.device)) | |||
offsets = output[0].cpu().data.numpy() # shape [n_boxes, 4] | |||
probs = output[1].cpu().data.numpy() # shape [n_boxes, 2] | |||
keep = np.where(probs[:, 1] > thresholds[1])[0] | |||
bounding_boxes = bounding_boxes[keep] | |||
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) | |||
offsets = offsets[keep] | |||
keep = nms(bounding_boxes, nms_thresholds[1]) | |||
bounding_boxes = bounding_boxes[keep] | |||
bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) | |||
bounding_boxes = convert_to_square(bounding_boxes) | |||
bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) | |||
# STAGE 3 | |||
img_boxes = get_image_boxes(bounding_boxes, image, size=48) | |||
if len(img_boxes) == 0: | |||
return [], [] | |||
img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True) | |||
output = onet(img_boxes.to(self.device)) | |||
landmarks = output[0].cpu().data.numpy() # shape [n_boxes, 10] | |||
offsets = output[1].cpu().data.numpy() # shape [n_boxes, 4] | |||
probs = output[2].cpu().data.numpy() # shape [n_boxes, 2] | |||
keep = np.where(probs[:, 1] > thresholds[2])[0] | |||
bounding_boxes = bounding_boxes[keep] | |||
bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) | |||
offsets = offsets[keep] | |||
landmarks = landmarks[keep] | |||
# compute landmark points | |||
width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 | |||
height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 | |||
xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] | |||
landmarks[:, 0:5] = np.expand_dims( | |||
xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5] | |||
landmarks[:, 5:10] = np.expand_dims( | |||
ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10] | |||
bounding_boxes = calibrate_box(bounding_boxes, offsets) | |||
keep = nms(bounding_boxes, nms_thresholds[2], mode='min') | |||
bounding_boxes = bounding_boxes[keep] | |||
landmarks = landmarks[keep] | |||
landmarks = landmarks.reshape(-1, 2, 5).transpose( | |||
(0, 2, 1)).reshape(-1, 10) | |||
return bounding_boxes, landmarks |
@@ -0,0 +1,100 @@ | |||
# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch | |||
import math | |||
import numpy as np | |||
import torch | |||
from PIL import Image | |||
from torch.autograd import Variable | |||
from .box_utils import _preprocess, nms | |||
def run_first_stage(image, net, scale, threshold, device='cuda'): | |||
"""Run P-Net, generate bounding boxes, and do NMS. | |||
Arguments: | |||
image: an instance of PIL.Image. | |||
net: an instance of pytorch's nn.Module, P-Net. | |||
scale: a float number, | |||
scale width and height of the image by this number. | |||
threshold: a float number, | |||
threshold on the probability of a face when generating | |||
bounding boxes from predictions of the net. | |||
Returns: | |||
a float numpy array of shape [n_boxes, 9], | |||
bounding boxes with scores and offsets (4 + 1 + 4). | |||
""" | |||
# scale the image and convert it to a float array | |||
width, height = image.size | |||
sw, sh = math.ceil(width * scale), math.ceil(height * scale) | |||
img = image.resize((sw, sh), Image.BILINEAR) | |||
img = np.asarray(img, 'float32') | |||
img = Variable( | |||
torch.FloatTensor(_preprocess(img)), volatile=True).to(device) | |||
output = net(img) | |||
probs = output[1].cpu().data.numpy()[0, 1, :, :] | |||
offsets = output[0].cpu().data.numpy() | |||
# probs: probability of a face at each sliding window | |||
# offsets: transformations to true bounding boxes | |||
boxes = _generate_bboxes(probs, offsets, scale, threshold) | |||
if len(boxes) == 0: | |||
return None | |||
keep = nms(boxes[:, 0:5], overlap_threshold=0.5) | |||
return boxes[keep] | |||
def _generate_bboxes(probs, offsets, scale, threshold): | |||
"""Generate bounding boxes at places | |||
where there is probably a face. | |||
Arguments: | |||
probs: a float numpy array of shape [n, m]. | |||
offsets: a float numpy array of shape [1, 4, n, m]. | |||
scale: a float number, | |||
width and height of the image were scaled by this number. | |||
threshold: a float number. | |||
Returns: | |||
a float numpy array of shape [n_boxes, 9] | |||
""" | |||
# applying P-Net is equivalent, in some sense, to | |||
# moving 12x12 window with stride 2 | |||
stride = 2 | |||
cell_size = 12 | |||
# indices of boxes where there is probably a face | |||
inds = np.where(probs > threshold) | |||
if inds[0].size == 0: | |||
return np.array([]) | |||
# transformations of bounding boxes | |||
tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)] | |||
# they are defined as: | |||
# w = x2 - x1 + 1 | |||
# h = y2 - y1 + 1 | |||
# x1_true = x1 + tx1*w | |||
# x2_true = x2 + tx2*w | |||
# y1_true = y1 + ty1*h | |||
# y2_true = y2 + ty2*h | |||
offsets = np.array([tx1, ty1, tx2, ty2]) | |||
score = probs[inds[0], inds[1]] | |||
# P-Net is applied to scaled images | |||
# so we need to rescale bounding boxes back | |||
bounding_boxes = np.vstack([ | |||
np.round((stride * inds[1] + 1.0) / scale), | |||
np.round((stride * inds[0] + 1.0) / scale), | |||
np.round((stride * inds[1] + 1.0 + cell_size) / scale), | |||
np.round((stride * inds[0] + 1.0 + cell_size) / scale), score, offsets | |||
]) | |||
# why one is added? | |||
return bounding_boxes.T |
@@ -0,0 +1,160 @@ | |||
# The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch | |||
from collections import OrderedDict | |||
import numpy as np | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
class Flatten(nn.Module): | |||
def __init__(self): | |||
super(Flatten, self).__init__() | |||
def forward(self, x): | |||
""" | |||
Arguments: | |||
x: a float tensor with shape [batch_size, c, h, w]. | |||
Returns: | |||
a float tensor with shape [batch_size, c*h*w]. | |||
""" | |||
# without this pretrained model isn't working | |||
x = x.transpose(3, 2).contiguous() | |||
return x.view(x.size(0), -1) | |||
class PNet(nn.Module): | |||
def __init__(self, model_path=None): | |||
super(PNet, self).__init__() | |||
# suppose we have input with size HxW, then | |||
# after first layer: H - 2, | |||
# after pool: ceil((H - 2)/2), | |||
# after second conv: ceil((H - 2)/2) - 2, | |||
# after last conv: ceil((H - 2)/2) - 4, | |||
# and the same for W | |||
self.features = nn.Sequential( | |||
OrderedDict([('conv1', nn.Conv2d(3, 10, 3, 1)), | |||
('prelu1', nn.PReLU(10)), | |||
('pool1', nn.MaxPool2d(2, 2, ceil_mode=True)), | |||
('conv2', nn.Conv2d(10, 16, 3, 1)), | |||
('prelu2', nn.PReLU(16)), | |||
('conv3', nn.Conv2d(16, 32, 3, 1)), | |||
('prelu3', nn.PReLU(32))])) | |||
self.conv4_1 = nn.Conv2d(32, 2, 1, 1) | |||
self.conv4_2 = nn.Conv2d(32, 4, 1, 1) | |||
weights = np.load(model_path, allow_pickle=True)[()] | |||
for n, p in self.named_parameters(): | |||
p.data = torch.FloatTensor(weights[n]) | |||
def forward(self, x): | |||
""" | |||
Arguments: | |||
x: a float tensor with shape [batch_size, 3, h, w]. | |||
Returns: | |||
b: a float tensor with shape [batch_size, 4, h', w']. | |||
a: a float tensor with shape [batch_size, 2, h', w']. | |||
""" | |||
x = self.features(x) | |||
a = self.conv4_1(x) | |||
b = self.conv4_2(x) | |||
a = F.softmax(a) | |||
return b, a | |||
class RNet(nn.Module): | |||
def __init__(self, model_path=None): | |||
super(RNet, self).__init__() | |||
self.features = nn.Sequential( | |||
OrderedDict([('conv1', nn.Conv2d(3, 28, 3, 1)), | |||
('prelu1', nn.PReLU(28)), | |||
('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)), | |||
('conv2', nn.Conv2d(28, 48, 3, 1)), | |||
('prelu2', nn.PReLU(48)), | |||
('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)), | |||
('conv3', nn.Conv2d(48, 64, 2, 1)), | |||
('prelu3', nn.PReLU(64)), ('flatten', Flatten()), | |||
('conv4', nn.Linear(576, 128)), | |||
('prelu4', nn.PReLU(128))])) | |||
self.conv5_1 = nn.Linear(128, 2) | |||
self.conv5_2 = nn.Linear(128, 4) | |||
weights = np.load(model_path, allow_pickle=True)[()] | |||
for n, p in self.named_parameters(): | |||
p.data = torch.FloatTensor(weights[n]) | |||
def forward(self, x): | |||
""" | |||
Arguments: | |||
x: a float tensor with shape [batch_size, 3, h, w]. | |||
Returns: | |||
b: a float tensor with shape [batch_size, 4]. | |||
a: a float tensor with shape [batch_size, 2]. | |||
""" | |||
x = self.features(x) | |||
a = self.conv5_1(x) | |||
b = self.conv5_2(x) | |||
a = F.softmax(a) | |||
return b, a | |||
class ONet(nn.Module): | |||
def __init__(self, model_path=None): | |||
super(ONet, self).__init__() | |||
self.features = nn.Sequential( | |||
OrderedDict([ | |||
('conv1', nn.Conv2d(3, 32, 3, 1)), | |||
('prelu1', nn.PReLU(32)), | |||
('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)), | |||
('conv2', nn.Conv2d(32, 64, 3, 1)), | |||
('prelu2', nn.PReLU(64)), | |||
('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)), | |||
('conv3', nn.Conv2d(64, 64, 3, 1)), | |||
('prelu3', nn.PReLU(64)), | |||
('pool3', nn.MaxPool2d(2, 2, ceil_mode=True)), | |||
('conv4', nn.Conv2d(64, 128, 2, 1)), | |||
('prelu4', nn.PReLU(128)), | |||
('flatten', Flatten()), | |||
('conv5', nn.Linear(1152, 256)), | |||
('drop5', nn.Dropout(0.25)), | |||
('prelu5', nn.PReLU(256)), | |||
])) | |||
self.conv6_1 = nn.Linear(256, 2) | |||
self.conv6_2 = nn.Linear(256, 4) | |||
self.conv6_3 = nn.Linear(256, 10) | |||
weights = np.load(model_path, allow_pickle=True)[()] | |||
for n, p in self.named_parameters(): | |||
p.data = torch.FloatTensor(weights[n]) | |||
def forward(self, x): | |||
""" | |||
Arguments: | |||
x: a float tensor with shape [batch_size, 3, h, w]. | |||
Returns: | |||
c: a float tensor with shape [batch_size, 10]. | |||
b: a float tensor with shape [batch_size, 4]. | |||
a: a float tensor with shape [batch_size, 2]. | |||
""" | |||
x = self.features(x) | |||
a = self.conv6_1(x) | |||
b = self.conv6_2(x) | |||
c = self.conv6_3(x) | |||
a = F.softmax(a) | |||
return c, b, a |
@@ -0,0 +1 @@ | |||
from .detection import UlfdFaceDetector |
@@ -0,0 +1,44 @@ | |||
# The implementation is based on ULFD, available at | |||
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
import os | |||
import cv2 | |||
import numpy as np | |||
import torch | |||
import torch.backends.cudnn as cudnn | |||
import torch.nn.functional as F | |||
from modelscope.metainfo import Models | |||
from modelscope.models.base import Tensor, TorchModel | |||
from modelscope.models.builder import MODELS | |||
from modelscope.utils.constant import ModelFile, Tasks | |||
from .vision.ssd.fd_config import define_img_size | |||
from .vision.ssd.mb_tiny_fd import (create_mb_tiny_fd, | |||
create_mb_tiny_fd_predictor) | |||
define_img_size(640) | |||
@MODELS.register_module(Tasks.face_detection, module_name=Models.ulfd) | |||
class UlfdFaceDetector(TorchModel): | |||
def __init__(self, model_path, device='cuda'): | |||
super().__init__(model_path) | |||
torch.set_grad_enabled(False) | |||
cudnn.benchmark = True | |||
self.model_path = model_path | |||
self.device = device | |||
self.net = create_mb_tiny_fd(2, is_test=True, device=device) | |||
self.predictor = create_mb_tiny_fd_predictor( | |||
self.net, candidate_size=1500, device=device) | |||
self.net.load(model_path) | |||
self.net = self.net.to(device) | |||
def forward(self, input): | |||
img_raw = input['img'] | |||
img = np.array(img_raw.cpu().detach()) | |||
img = img[:, :, ::-1] | |||
prob_th = 0.85 | |||
keep_top_k = 750 | |||
boxes, labels, probs = self.predictor.predict(img, keep_top_k, prob_th) | |||
return boxes, probs |
@@ -0,0 +1,124 @@ | |||
# The implementation is based on ULFD, available at | |||
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
import math | |||
import torch | |||
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): | |||
""" | |||
Args: | |||
box_scores (N, 5): boxes in corner-form and probabilities. | |||
iou_threshold: intersection over union threshold. | |||
top_k: keep top_k results. If k <= 0, keep all the results. | |||
candidate_size: only consider the candidates with the highest scores. | |||
Returns: | |||
picked: a list of indexes of the kept boxes | |||
""" | |||
scores = box_scores[:, -1] | |||
boxes = box_scores[:, :-1] | |||
picked = [] | |||
_, indexes = scores.sort(descending=True) | |||
indexes = indexes[:candidate_size] | |||
while len(indexes) > 0: | |||
current = indexes[0] | |||
picked.append(current.item()) | |||
if 0 < top_k == len(picked) or len(indexes) == 1: | |||
break | |||
current_box = boxes[current, :] | |||
indexes = indexes[1:] | |||
rest_boxes = boxes[indexes, :] | |||
iou = iou_of( | |||
rest_boxes, | |||
current_box.unsqueeze(0), | |||
) | |||
indexes = indexes[iou <= iou_threshold] | |||
return box_scores[picked, :] | |||
def nms(box_scores, | |||
nms_method=None, | |||
score_threshold=None, | |||
iou_threshold=None, | |||
sigma=0.5, | |||
top_k=-1, | |||
candidate_size=200): | |||
return hard_nms( | |||
box_scores, iou_threshold, top_k, candidate_size=candidate_size) | |||
def generate_priors(feature_map_list, | |||
shrinkage_list, | |||
image_size, | |||
min_boxes, | |||
clamp=True) -> torch.Tensor: | |||
priors = [] | |||
for index in range(0, len(feature_map_list[0])): | |||
scale_w = image_size[0] / shrinkage_list[0][index] | |||
scale_h = image_size[1] / shrinkage_list[1][index] | |||
for j in range(0, feature_map_list[1][index]): | |||
for i in range(0, feature_map_list[0][index]): | |||
x_center = (i + 0.5) / scale_w | |||
y_center = (j + 0.5) / scale_h | |||
for min_box in min_boxes[index]: | |||
w = min_box / image_size[0] | |||
h = min_box / image_size[1] | |||
priors.append([x_center, y_center, w, h]) | |||
priors = torch.tensor(priors) | |||
if clamp: | |||
torch.clamp(priors, 0.0, 1.0, out=priors) | |||
return priors | |||
def convert_locations_to_boxes(locations, priors, center_variance, | |||
size_variance): | |||
# priors can have one dimension less. | |||
if priors.dim() + 1 == locations.dim(): | |||
priors = priors.unsqueeze(0) | |||
a = locations[..., :2] * center_variance * priors[..., | |||
2:] + priors[..., :2] | |||
b = torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:] | |||
return torch.cat([a, b], dim=locations.dim() - 1) | |||
def center_form_to_corner_form(locations): | |||
a = locations[..., :2] - locations[..., 2:] / 2 | |||
b = locations[..., :2] + locations[..., 2:] / 2 | |||
return torch.cat([a, b], locations.dim() - 1) | |||
def iou_of(boxes0, boxes1, eps=1e-5): | |||
"""Return intersection-over-union (Jaccard index) of boxes. | |||
Args: | |||
boxes0 (N, 4): ground truth boxes. | |||
boxes1 (N or 1, 4): predicted boxes. | |||
eps: a small number to avoid 0 as denominator. | |||
Returns: | |||
iou (N): IoU values. | |||
""" | |||
overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2]) | |||
overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:]) | |||
overlap_area = area_of(overlap_left_top, overlap_right_bottom) | |||
area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) | |||
area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) | |||
return overlap_area / (area0 + area1 - overlap_area + eps) | |||
def area_of(left_top, right_bottom) -> torch.Tensor: | |||
"""Compute the areas of rectangles given two corners. | |||
Args: | |||
left_top (N, 2): left top corner. | |||
right_bottom (N, 2): right bottom corner. | |||
Returns: | |||
area (N): return the area. | |||
""" | |||
hw = torch.clamp(right_bottom - left_top, min=0.0) | |||
return hw[..., 0] * hw[..., 1] |
@@ -0,0 +1,49 @@ | |||
# The implementation is based on ULFD, available at | |||
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
class Mb_Tiny(nn.Module): | |||
def __init__(self, num_classes=2): | |||
super(Mb_Tiny, self).__init__() | |||
self.base_channel = 8 * 2 | |||
def conv_bn(inp, oup, stride): | |||
return nn.Sequential( | |||
nn.Conv2d(inp, oup, 3, stride, 1, bias=False), | |||
nn.BatchNorm2d(oup), nn.ReLU(inplace=True)) | |||
def conv_dw(inp, oup, stride): | |||
return nn.Sequential( | |||
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), | |||
nn.BatchNorm2d(inp), | |||
nn.ReLU(inplace=True), | |||
nn.Conv2d(inp, oup, 1, 1, 0, bias=False), | |||
nn.BatchNorm2d(oup), | |||
nn.ReLU(inplace=True), | |||
) | |||
self.model = nn.Sequential( | |||
conv_bn(3, self.base_channel, 2), # 160*120 | |||
conv_dw(self.base_channel, self.base_channel * 2, 1), | |||
conv_dw(self.base_channel * 2, self.base_channel * 2, 2), # 80*60 | |||
conv_dw(self.base_channel * 2, self.base_channel * 2, 1), | |||
conv_dw(self.base_channel * 2, self.base_channel * 4, 2), # 40*30 | |||
conv_dw(self.base_channel * 4, self.base_channel * 4, 1), | |||
conv_dw(self.base_channel * 4, self.base_channel * 4, 1), | |||
conv_dw(self.base_channel * 4, self.base_channel * 4, 1), | |||
conv_dw(self.base_channel * 4, self.base_channel * 8, 2), # 20*15 | |||
conv_dw(self.base_channel * 8, self.base_channel * 8, 1), | |||
conv_dw(self.base_channel * 8, self.base_channel * 8, 1), | |||
conv_dw(self.base_channel * 8, self.base_channel * 16, 2), # 10*8 | |||
conv_dw(self.base_channel * 16, self.base_channel * 16, 1)) | |||
self.fc = nn.Linear(1024, num_classes) | |||
def forward(self, x): | |||
x = self.model(x) | |||
x = F.avg_pool2d(x, 7) | |||
x = x.view(-1, 1024) | |||
x = self.fc(x) | |||
return x |
@@ -0,0 +1,18 @@ | |||
# The implementation is based on ULFD, available at | |||
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
from ..transforms import Compose, Resize, SubtractMeans, ToTensor | |||
class PredictionTransform: | |||
def __init__(self, size, mean=0.0, std=1.0): | |||
self.transform = Compose([ | |||
Resize(size), | |||
SubtractMeans(mean), lambda img, boxes=None, labels=None: | |||
(img / std, boxes, labels), | |||
ToTensor() | |||
]) | |||
def __call__(self, image): | |||
image, _, _ = self.transform(image) | |||
return image |
@@ -0,0 +1,49 @@ | |||
# The implementation is based on ULFD, available at | |||
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
import numpy as np | |||
from ..box_utils import generate_priors | |||
image_mean_test = image_mean = np.array([127, 127, 127]) | |||
image_std = 128.0 | |||
iou_threshold = 0.3 | |||
center_variance = 0.1 | |||
size_variance = 0.2 | |||
min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]] | |||
shrinkage_list = [] | |||
image_size = [320, 240] # default input size 320*240 | |||
feature_map_w_h_list = [[40, 20, 10, 5], [30, 15, 8, | |||
4]] # default feature map size | |||
priors = [] | |||
def define_img_size(size): | |||
global image_size, feature_map_w_h_list, priors | |||
img_size_dict = { | |||
128: [128, 96], | |||
160: [160, 120], | |||
320: [320, 240], | |||
480: [480, 360], | |||
640: [640, 480], | |||
1280: [1280, 960] | |||
} | |||
image_size = img_size_dict[size] | |||
feature_map_w_h_list_dict = { | |||
128: [[16, 8, 4, 2], [12, 6, 3, 2]], | |||
160: [[20, 10, 5, 3], [15, 8, 4, 2]], | |||
320: [[40, 20, 10, 5], [30, 15, 8, 4]], | |||
480: [[60, 30, 15, 8], [45, 23, 12, 6]], | |||
640: [[80, 40, 20, 10], [60, 30, 15, 8]], | |||
1280: [[160, 80, 40, 20], [120, 60, 30, 15]] | |||
} | |||
feature_map_w_h_list = feature_map_w_h_list_dict[size] | |||
for i in range(0, len(image_size)): | |||
item_list = [] | |||
for k in range(0, len(feature_map_w_h_list[i])): | |||
item_list.append(image_size[i] / feature_map_w_h_list[i][k]) | |||
shrinkage_list.append(item_list) | |||
priors = generate_priors(feature_map_w_h_list, shrinkage_list, image_size, | |||
min_boxes) |
@@ -0,0 +1,124 @@ | |||
# The implementation is based on ULFD, available at | |||
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
from torch.nn import Conv2d, ModuleList, ReLU, Sequential | |||
from ..mb_tiny import Mb_Tiny | |||
from . import fd_config as config | |||
from .predictor import Predictor | |||
from .ssd import SSD | |||
def SeperableConv2d(in_channels, | |||
out_channels, | |||
kernel_size=1, | |||
stride=1, | |||
padding=0): | |||
"""Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. | |||
""" | |||
return Sequential( | |||
Conv2d( | |||
in_channels=in_channels, | |||
out_channels=in_channels, | |||
kernel_size=kernel_size, | |||
groups=in_channels, | |||
stride=stride, | |||
padding=padding), | |||
ReLU(), | |||
Conv2d( | |||
in_channels=in_channels, out_channels=out_channels, kernel_size=1), | |||
) | |||
def create_mb_tiny_fd(num_classes, is_test=False, device='cuda'): | |||
base_net = Mb_Tiny(2) | |||
base_net_model = base_net.model # disable dropout layer | |||
source_layer_indexes = [8, 11, 13] | |||
extras = ModuleList([ | |||
Sequential( | |||
Conv2d( | |||
in_channels=base_net.base_channel * 16, | |||
out_channels=base_net.base_channel * 4, | |||
kernel_size=1), ReLU(), | |||
SeperableConv2d( | |||
in_channels=base_net.base_channel * 4, | |||
out_channels=base_net.base_channel * 16, | |||
kernel_size=3, | |||
stride=2, | |||
padding=1), ReLU()) | |||
]) | |||
regression_headers = ModuleList([ | |||
SeperableConv2d( | |||
in_channels=base_net.base_channel * 4, | |||
out_channels=3 * 4, | |||
kernel_size=3, | |||
padding=1), | |||
SeperableConv2d( | |||
in_channels=base_net.base_channel * 8, | |||
out_channels=2 * 4, | |||
kernel_size=3, | |||
padding=1), | |||
SeperableConv2d( | |||
in_channels=base_net.base_channel * 16, | |||
out_channels=2 * 4, | |||
kernel_size=3, | |||
padding=1), | |||
Conv2d( | |||
in_channels=base_net.base_channel * 16, | |||
out_channels=3 * 4, | |||
kernel_size=3, | |||
padding=1) | |||
]) | |||
classification_headers = ModuleList([ | |||
SeperableConv2d( | |||
in_channels=base_net.base_channel * 4, | |||
out_channels=3 * num_classes, | |||
kernel_size=3, | |||
padding=1), | |||
SeperableConv2d( | |||
in_channels=base_net.base_channel * 8, | |||
out_channels=2 * num_classes, | |||
kernel_size=3, | |||
padding=1), | |||
SeperableConv2d( | |||
in_channels=base_net.base_channel * 16, | |||
out_channels=2 * num_classes, | |||
kernel_size=3, | |||
padding=1), | |||
Conv2d( | |||
in_channels=base_net.base_channel * 16, | |||
out_channels=3 * num_classes, | |||
kernel_size=3, | |||
padding=1) | |||
]) | |||
return SSD( | |||
num_classes, | |||
base_net_model, | |||
source_layer_indexes, | |||
extras, | |||
classification_headers, | |||
regression_headers, | |||
is_test=is_test, | |||
config=config, | |||
device=device) | |||
def create_mb_tiny_fd_predictor(net, | |||
candidate_size=200, | |||
nms_method=None, | |||
sigma=0.5, | |||
device=None): | |||
predictor = Predictor( | |||
net, | |||
config.image_size, | |||
config.image_mean_test, | |||
config.image_std, | |||
nms_method=nms_method, | |||
iou_threshold=config.iou_threshold, | |||
candidate_size=candidate_size, | |||
sigma=sigma, | |||
device=device) | |||
return predictor |
@@ -0,0 +1,80 @@ | |||
# The implementation is based on ULFD, available at | |||
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
import torch | |||
from .. import box_utils | |||
from .data_preprocessing import PredictionTransform | |||
class Predictor: | |||
def __init__(self, | |||
net, | |||
size, | |||
mean=0.0, | |||
std=1.0, | |||
nms_method=None, | |||
iou_threshold=0.3, | |||
filter_threshold=0.85, | |||
candidate_size=200, | |||
sigma=0.5, | |||
device=None): | |||
self.net = net | |||
self.transform = PredictionTransform(size, mean, std) | |||
self.iou_threshold = iou_threshold | |||
self.filter_threshold = filter_threshold | |||
self.candidate_size = candidate_size | |||
self.nms_method = nms_method | |||
self.sigma = sigma | |||
if device: | |||
self.device = device | |||
else: | |||
self.device = torch.device( | |||
'cuda:0' if torch.cuda.is_available() else 'cpu') | |||
self.net.to(self.device) | |||
self.net.eval() | |||
def predict(self, image, top_k=-1, prob_threshold=None): | |||
height, width, _ = image.shape | |||
image = self.transform(image) | |||
images = image.unsqueeze(0) | |||
images = images.to(self.device) | |||
with torch.no_grad(): | |||
for i in range(1): | |||
scores, boxes = self.net.forward(images) | |||
boxes = boxes[0] | |||
scores = scores[0] | |||
if not prob_threshold: | |||
prob_threshold = self.filter_threshold | |||
# this version of nms is slower on GPU, so we move data to CPU. | |||
picked_box_probs = [] | |||
picked_labels = [] | |||
for class_index in range(1, scores.size(1)): | |||
probs = scores[:, class_index] | |||
mask = probs > prob_threshold | |||
probs = probs[mask] | |||
if probs.size(0) == 0: | |||
continue | |||
subset_boxes = boxes[mask, :] | |||
box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) | |||
box_probs = box_utils.nms( | |||
box_probs, | |||
self.nms_method, | |||
score_threshold=prob_threshold, | |||
iou_threshold=self.iou_threshold, | |||
sigma=self.sigma, | |||
top_k=top_k, | |||
candidate_size=self.candidate_size) | |||
picked_box_probs.append(box_probs) | |||
picked_labels.extend([class_index] * box_probs.size(0)) | |||
if not picked_box_probs: | |||
return torch.tensor([]), torch.tensor([]), torch.tensor([]) | |||
picked_box_probs = torch.cat(picked_box_probs) | |||
picked_box_probs[:, 0] *= width | |||
picked_box_probs[:, 1] *= height | |||
picked_box_probs[:, 2] *= width | |||
picked_box_probs[:, 3] *= height | |||
return picked_box_probs[:, :4], torch.tensor( | |||
picked_labels), picked_box_probs[:, 4] |
@@ -0,0 +1,129 @@ | |||
# The implementation is based on ULFD, available at | |||
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
from collections import namedtuple | |||
from typing import List, Tuple | |||
import numpy as np | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
from .. import box_utils | |||
GraphPath = namedtuple('GraphPath', ['s0', 'name', 's1']) | |||
class SSD(nn.Module): | |||
def __init__(self, | |||
num_classes: int, | |||
base_net: nn.ModuleList, | |||
source_layer_indexes: List[int], | |||
extras: nn.ModuleList, | |||
classification_headers: nn.ModuleList, | |||
regression_headers: nn.ModuleList, | |||
is_test=False, | |||
config=None, | |||
device=None): | |||
"""Compose a SSD model using the given components. | |||
""" | |||
super(SSD, self).__init__() | |||
self.num_classes = num_classes | |||
self.base_net = base_net | |||
self.source_layer_indexes = source_layer_indexes | |||
self.extras = extras | |||
self.classification_headers = classification_headers | |||
self.regression_headers = regression_headers | |||
self.is_test = is_test | |||
self.config = config | |||
# register layers in source_layer_indexes by adding them to a module list | |||
self.source_layer_add_ons = nn.ModuleList([ | |||
t[1] for t in source_layer_indexes | |||
if isinstance(t, tuple) and not isinstance(t, GraphPath) | |||
]) | |||
if device: | |||
self.device = device | |||
else: | |||
self.device = torch.device( | |||
'cuda:0' if torch.cuda.is_available() else 'cpu') | |||
if is_test: | |||
self.config = config | |||
self.priors = config.priors.to(self.device) | |||
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: | |||
confidences = [] | |||
locations = [] | |||
start_layer_index = 0 | |||
header_index = 0 | |||
end_layer_index = 0 | |||
for end_layer_index in self.source_layer_indexes: | |||
if isinstance(end_layer_index, GraphPath): | |||
path = end_layer_index | |||
end_layer_index = end_layer_index.s0 | |||
added_layer = None | |||
elif isinstance(end_layer_index, tuple): | |||
added_layer = end_layer_index[1] | |||
end_layer_index = end_layer_index[0] | |||
path = None | |||
else: | |||
added_layer = None | |||
path = None | |||
for layer in self.base_net[start_layer_index:end_layer_index]: | |||
x = layer(x) | |||
if added_layer: | |||
y = added_layer(x) | |||
else: | |||
y = x | |||
if path: | |||
sub = getattr(self.base_net[end_layer_index], path.name) | |||
for layer in sub[:path.s1]: | |||
x = layer(x) | |||
y = x | |||
for layer in sub[path.s1:]: | |||
x = layer(x) | |||
end_layer_index += 1 | |||
start_layer_index = end_layer_index | |||
confidence, location = self.compute_header(header_index, y) | |||
header_index += 1 | |||
confidences.append(confidence) | |||
locations.append(location) | |||
for layer in self.base_net[end_layer_index:]: | |||
x = layer(x) | |||
for layer in self.extras: | |||
x = layer(x) | |||
confidence, location = self.compute_header(header_index, x) | |||
header_index += 1 | |||
confidences.append(confidence) | |||
locations.append(location) | |||
confidences = torch.cat(confidences, 1) | |||
locations = torch.cat(locations, 1) | |||
if self.is_test: | |||
confidences = F.softmax(confidences, dim=2) | |||
boxes = box_utils.convert_locations_to_boxes( | |||
locations, self.priors, self.config.center_variance, | |||
self.config.size_variance) | |||
boxes = box_utils.center_form_to_corner_form(boxes) | |||
return confidences, boxes | |||
else: | |||
return confidences, locations | |||
def compute_header(self, i, x): | |||
confidence = self.classification_headers[i](x) | |||
confidence = confidence.permute(0, 2, 3, 1).contiguous() | |||
confidence = confidence.view(confidence.size(0), -1, self.num_classes) | |||
location = self.regression_headers[i](x) | |||
location = location.permute(0, 2, 3, 1).contiguous() | |||
location = location.view(location.size(0), -1, 4) | |||
return confidence, location | |||
def load(self, model): | |||
self.load_state_dict( | |||
torch.load(model, map_location=lambda storage, loc: storage)) |
@@ -0,0 +1,56 @@ | |||
# The implementation is based on ULFD, available at | |||
# https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
import types | |||
import cv2 | |||
import numpy as np | |||
import torch | |||
from numpy import random | |||
class Compose(object): | |||
"""Composes several augmentations together. | |||
Args: | |||
transforms (List[Transform]): list of transforms to compose. | |||
Example: | |||
>>> augmentations.Compose([ | |||
>>> transforms.CenterCrop(10), | |||
>>> transforms.ToTensor(), | |||
>>> ]) | |||
""" | |||
def __init__(self, transforms): | |||
self.transforms = transforms | |||
def __call__(self, img, boxes=None, labels=None): | |||
for t in self.transforms: | |||
img, boxes, labels = t(img, boxes, labels) | |||
return img, boxes, labels | |||
class SubtractMeans(object): | |||
def __init__(self, mean): | |||
self.mean = np.array(mean, dtype=np.float32) | |||
def __call__(self, image, boxes=None, labels=None): | |||
image = image.astype(np.float32) | |||
image -= self.mean | |||
return image.astype(np.float32), boxes, labels | |||
class Resize(object): | |||
def __init__(self, size=(300, 300)): | |||
self.size = size | |||
def __call__(self, image, boxes=None, labels=None): | |||
image = cv2.resize(image, (self.size[0], self.size[1])) | |||
return image, boxes, labels | |||
class ToTensor(object): | |||
def __call__(self, cvimage, boxes=None, labels=None): | |||
return torch.from_numpy(cvimage.astype(np.float32)).permute( | |||
2, 0, 1), boxes, labels |
@@ -1,3 +1,7 @@ | |||
""" | |||
The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
https://github.com/deepinsight/insightface/blob/master/python-package/insightface/utils/face_align.py | |||
""" | |||
import cv2 | |||
import numpy as np | |||
from skimage import transform as trans | |||
@@ -1,3 +1,5 @@ | |||
# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at | |||
# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone | |||
from .model_irse import (IR_18, IR_34, IR_50, IR_101, IR_152, IR_200, IR_SE_50, | |||
IR_SE_101, IR_SE_152, IR_SE_200) | |||
from .model_resnet import ResNet_50, ResNet_101, ResNet_152 | |||
@@ -1,3 +1,5 @@ | |||
# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at | |||
# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/common.py | |||
import torch | |||
import torch.nn as nn | |||
from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Linear, Module, ReLU, | |||
@@ -1,5 +1,5 @@ | |||
# based on: | |||
# https://github.com/ZhaoJ9014/face.evoLVe.PyTorch/blob/master/backbone/model_irse.py | |||
# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at | |||
# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/model_irse.py | |||
from collections import namedtuple | |||
from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Dropout, Linear, | |||
@@ -1,5 +1,5 @@ | |||
# based on: | |||
# https://github.com/ZhaoJ9014/face.evoLVe.PyTorch/blob/master/backbone/model_resnet.py | |||
# The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at | |||
# https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/model_resnet.py | |||
import torch.nn as nn | |||
from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Dropout, Linear, | |||
MaxPool2d, Module, ReLU, Sequential) | |||
@@ -105,12 +105,12 @@ def get_img_ins_seg_result(img_seg_result=None, | |||
} | |||
for seg_result in img_seg_result: | |||
box = { | |||
'x': np.int(seg_result[0]), | |||
'y': np.int(seg_result[1]), | |||
'w': np.int(seg_result[2] - seg_result[0]), | |||
'h': np.int(seg_result[3] - seg_result[1]) | |||
} | |||
box = [ | |||
np.int(seg_result[0]), | |||
np.int(seg_result[1]), | |||
np.int(seg_result[2]), | |||
np.int(seg_result[3]) | |||
] | |||
score = np.float(seg_result[4]) | |||
category = seg_result[5] | |||
@@ -161,12 +161,10 @@ def show_result( | |||
np.random.random() * 255.0 | |||
]) | |||
x1 = int(box['x']) | |||
y1 = int(box['y']) | |||
w = int(box['w']) | |||
h = int(box['h']) | |||
x2 = x1 + w | |||
y2 = y1 + h | |||
x1 = int(box[0]) | |||
y1 = int(box[1]) | |||
x2 = int(box[2]) | |||
y2 = int(box[3]) | |||
if show_box: | |||
cv2.rectangle( | |||
@@ -1,4 +1,4 @@ | |||
# The implementation is also open-sourced by the authors as PASS-reID, and is available publicly on | |||
# The implementation is adopted from PASS-reID, made pubicly available under the Apache-2.0 License at | |||
# https://github.com/CASIA-IVA-Lab/PASS-reID | |||
import os | |||
@@ -1,4 +1,4 @@ | |||
# The implementation is also open-sourced by the authors as PASS-reID, and is available publicly on | |||
# The implementation is adopted from PASS-reID, made pubicly available under the Apache-2.0 License at | |||
# https://github.com/CASIA-IVA-Lab/PASS-reID | |||
import collections.abc as container_abcs | |||
@@ -552,7 +552,7 @@ class CLIPVisionTransformer(nn.Module): | |||
nn.GroupNorm(1, embed_dim), | |||
nn.ConvTranspose2d( | |||
embed_dim, embed_dim, kernel_size=2, stride=2), | |||
nn.SyncBatchNorm(embed_dim), | |||
nn.BatchNorm2d(embed_dim), | |||
nn.GELU(), | |||
nn.ConvTranspose2d( | |||
embed_dim, embed_dim, kernel_size=2, stride=2), | |||