Browse Source

merge internal master

master
wenmeng.zwm 2 years ago
parent
commit
584fa8f586
28 changed files with 289 additions and 89 deletions
  1. +1
    -4
      .dev_scripts/ci_container_test.sh
  2. +14
    -14
      .dev_scripts/dockerci.sh
  3. +1
    -1
      .pre-commit-config.yaml
  4. +15
    -11
      modelscope/hub/api.py
  5. +2
    -2
      modelscope/hub/constants.py
  6. +0
    -13
      modelscope/hub/utils/utils.py
  7. +2
    -1
      modelscope/models/audio/kws/farfield/model.py
  8. +0
    -1
      modelscope/models/cv/face_detection/mogface/models/detectors.py
  9. +0
    -1
      modelscope/models/cv/face_detection/mtcnn/models/detector.py
  10. +0
    -1
      modelscope/models/cv/face_detection/retinaface/detection.py
  11. +0
    -1
      modelscope/models/cv/face_detection/ulfd_slim/detection.py
  12. +0
    -1
      modelscope/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py
  13. +0
    -1
      modelscope/models/cv/image_portrait_enhancement/retinaface/detection.py
  14. +6
    -1
      modelscope/models/cv/realtime_object_detection/realtime_video_detector.py
  15. +4
    -1
      modelscope/models/cv/referring_video_object_segmentation/model.py
  16. +4
    -1
      modelscope/models/cv/referring_video_object_segmentation/utils/mttr.py
  17. +5
    -2
      modelscope/models/cv/referring_video_object_segmentation/utils/multimodal_transformer.py
  18. +10
    -8
      modelscope/msdatasets/task_datasets/audio/kws_farfield_dataset.py
  19. +0
    -4
      modelscope/pipelines/base.py
  20. +19
    -3
      modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
  21. +14
    -0
      modelscope/pipelines/nlp/token_classification_pipeline.py
  22. +21
    -4
      modelscope/pipelines/nlp/word_segmentation_pipeline.py
  23. +4
    -3
      modelscope/trainers/audio/kws_farfield_trainer.py
  24. +0
    -5
      modelscope/trainers/trainer.py
  25. +4
    -1
      modelscope/utils/audio/audio_utils.py
  26. +134
    -2
      tests/run.py
  27. +28
    -1
      tests/run_config.yaml
  28. +1
    -1
      tests/trainers/test_dialog_intent_trainer.py

+ 1
- 4
.dev_scripts/ci_container_test.sh View File

@@ -1,6 +1,3 @@
echo "Testing envs"
printenv
echo "ENV END"
if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
pip install -r requirements/tests.txt pip install -r requirements/tests.txt
git config --global --add safe.directory /Maas-lib git config --global --add safe.directory /Maas-lib
@@ -28,7 +25,7 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
awk -F: '/^[^#]/ { print $1 }' requirements/multi-modal.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html awk -F: '/^[^#]/ { print $1 }' requirements/multi-modal.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
awk -F: '/^[^#]/ { print $1 }' requirements/nlp.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html awk -F: '/^[^#]/ { print $1 }' requirements/nlp.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
awk -F: '/^[^#]/ { print $1 }' requirements/science.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html awk -F: '/^[^#]/ { print $1 }' requirements/science.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -r requirements/tests.txt
# test with install # test with install
python setup.py install python setup.py install
else else


+ 14
- 14
.dev_scripts/dockerci.sh View File

@@ -3,30 +3,32 @@ MODELSCOPE_CACHE_DIR_IN_CONTAINER=/modelscope_cache
CODE_DIR=$PWD CODE_DIR=$PWD
CODE_DIR_IN_CONTAINER=/Maas-lib CODE_DIR_IN_CONTAINER=/Maas-lib
echo "$USER" echo "$USER"
gpus='7 6 5 4 3 2 1 0'
cpu_sets='0-7 8-15 16-23 24-30 31-37 38-44 45-51 52-58'
gpus='0,1 2,3 4,5 6,7'
cpu_sets='45-58 31-44 16-30 0-15'
cpu_sets_arr=($cpu_sets) cpu_sets_arr=($cpu_sets)
is_get_file_lock=false is_get_file_lock=false
# export RUN_CASE_COMMAND='python tests/run.py --run_config tests/run_config.yaml'
CI_COMMAND=${CI_COMMAND:-bash .dev_scripts/ci_container_test.sh $RUN_CASE_BASE_COMMAND}
CI_COMMAND='bash .dev_scripts/ci_container_test.sh python tests/run.py --parallel 2 --run_config tests/run_config.yaml'
echo "ci command: $CI_COMMAND" echo "ci command: $CI_COMMAND"
idx=0
for gpu in $gpus for gpu in $gpus
do do
exec {lock_fd}>"/tmp/gpu$gpu" || exit 1 exec {lock_fd}>"/tmp/gpu$gpu" || exit 1
flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; continue; }
flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; idx=$((idx+1)); continue; }
echo "get gpu lock $gpu" echo "get gpu lock $gpu"
CONTAINER_NAME="modelscope-ci-$gpu"

CONTAINER_NAME="modelscope-ci-$idx"
let is_get_file_lock=true let is_get_file_lock=true


# pull image if there are update # pull image if there are update
docker pull ${IMAGE_NAME}:${IMAGE_VERSION} docker pull ${IMAGE_NAME}:${IMAGE_VERSION}
if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
echo 'debugging'
docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ docker run --rm --name $CONTAINER_NAME --shm-size=16gb \
--cpuset-cpus=${cpu_sets_arr[$gpu]} \
--gpus="device=$gpu" \
--cpuset-cpus=${cpu_sets_arr[$idx]} \
--gpus='"'"device=$gpu"'"' \
-v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \
-v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
-v $MODELSCOPE_HOME_CACHE/$gpu:/root \
-v $MODELSCOPE_HOME_CACHE/$idx:/root \
-v /home/admin/pre-commit:/home/admin/pre-commit \ -v /home/admin/pre-commit:/home/admin/pre-commit \
-e CI_TEST=True \ -e CI_TEST=True \
-e TEST_LEVEL=$TEST_LEVEL \ -e TEST_LEVEL=$TEST_LEVEL \
@@ -41,16 +43,15 @@ do
-e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \
-e MODEL_TAG_URL=$MODEL_TAG_URL \ -e MODEL_TAG_URL=$MODEL_TAG_URL \
--workdir=$CODE_DIR_IN_CONTAINER \ --workdir=$CODE_DIR_IN_CONTAINER \
--net host \
${IMAGE_NAME}:${IMAGE_VERSION} \ ${IMAGE_NAME}:${IMAGE_VERSION} \
$CI_COMMAND $CI_COMMAND
else else
docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ docker run --rm --name $CONTAINER_NAME --shm-size=16gb \
--cpuset-cpus=${cpu_sets_arr[$gpu]} \
--gpus="device=$gpu" \
--cpuset-cpus=${cpu_sets_arr[$idx]} \
--gpus='"'"device=$gpu"'"' \
-v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \
-v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
-v $MODELSCOPE_HOME_CACHE/$gpu:/root \
-v $MODELSCOPE_HOME_CACHE/$idx:/root \
-v /home/admin/pre-commit:/home/admin/pre-commit \ -v /home/admin/pre-commit:/home/admin/pre-commit \
-e CI_TEST=True \ -e CI_TEST=True \
-e TEST_LEVEL=$TEST_LEVEL \ -e TEST_LEVEL=$TEST_LEVEL \
@@ -64,7 +65,6 @@ do
-e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \
-e MODEL_TAG_URL=$MODEL_TAG_URL \ -e MODEL_TAG_URL=$MODEL_TAG_URL \
--workdir=$CODE_DIR_IN_CONTAINER \ --workdir=$CODE_DIR_IN_CONTAINER \
--net host \
${IMAGE_NAME}:${IMAGE_VERSION} \ ${IMAGE_NAME}:${IMAGE_VERSION} \
$CI_COMMAND $CI_COMMAND
fi fi


+ 1
- 1
.pre-commit-config.yaml View File

@@ -1,5 +1,5 @@
repos: repos:
- repo: https://github.com/PyCQA/flake8
- repo: https://github.com/pycqa/flake8.git
rev: 4.0.0 rev: 4.0.0
hooks: hooks:
- id: flake8 - id: flake8


+ 15
- 11
modelscope/hub/api.py View File

@@ -23,9 +23,10 @@ from modelscope.hub.constants import (API_RESPONSE_FIELD_DATA,
API_RESPONSE_FIELD_MESSAGE, API_RESPONSE_FIELD_MESSAGE,
API_RESPONSE_FIELD_USERNAME, API_RESPONSE_FIELD_USERNAME,
DEFAULT_CREDENTIALS_PATH, DEFAULT_CREDENTIALS_PATH,
MODELSCOPE_ENVIRONMENT,
MODELSCOPE_USERNAME, ONE_YEAR_SECONDS,
Licenses, ModelVisibility)
MODELSCOPE_CLOUD_ENVIRONMENT,
MODELSCOPE_CLOUD_USERNAME,
ONE_YEAR_SECONDS, Licenses,
ModelVisibility)
from modelscope.hub.errors import (InvalidParameter, NotExistError, from modelscope.hub.errors import (InvalidParameter, NotExistError,
NotLoginException, NoValidRevisionError, NotLoginException, NoValidRevisionError,
RequestError, datahub_raise_on_error, RequestError, datahub_raise_on_error,
@@ -653,10 +654,10 @@ class HubApi:
# get channel and user_name # get channel and user_name
channel = DownloadChannel.LOCAL.value channel = DownloadChannel.LOCAL.value
user_name = '' user_name = ''
if MODELSCOPE_ENVIRONMENT in os.environ:
channel = os.environ[MODELSCOPE_ENVIRONMENT]
if MODELSCOPE_USERNAME in os.environ:
user_name = os.environ[MODELSCOPE_USERNAME]
if MODELSCOPE_CLOUD_ENVIRONMENT in os.environ:
channel = os.environ[MODELSCOPE_CLOUD_ENVIRONMENT]
if MODELSCOPE_CLOUD_USERNAME in os.environ:
user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]


url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}' url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}'
cookies = ModelScopeConfig.get_cookies() cookies = ModelScopeConfig.get_cookies()
@@ -777,12 +778,15 @@ class ModelScopeConfig:
Returns: Returns:
The formatted user-agent string. The formatted user-agent string.
""" """

# include some more telemetrics when executing in dedicated
# cloud containers
env = 'custom' env = 'custom'
if MODELSCOPE_ENVIRONMENT in os.environ:
env = os.environ[MODELSCOPE_ENVIRONMENT]
if MODELSCOPE_CLOUD_ENVIRONMENT in os.environ:
env = os.environ[MODELSCOPE_CLOUD_ENVIRONMENT]
user_name = 'unknown' user_name = 'unknown'
if MODELSCOPE_USERNAME in os.environ:
user_name = os.environ[MODELSCOPE_USERNAME]
if MODELSCOPE_CLOUD_USERNAME in os.environ:
user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]


ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % ( ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % (
__version__, __version__,


+ 2
- 2
modelscope/hub/constants.py View File

@@ -16,9 +16,9 @@ API_RESPONSE_FIELD_GIT_ACCESS_TOKEN = 'AccessToken'
API_RESPONSE_FIELD_USERNAME = 'Username' API_RESPONSE_FIELD_USERNAME = 'Username'
API_RESPONSE_FIELD_EMAIL = 'Email' API_RESPONSE_FIELD_EMAIL = 'Email'
API_RESPONSE_FIELD_MESSAGE = 'Message' API_RESPONSE_FIELD_MESSAGE = 'Message'
MODELSCOPE_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT'
MODELSCOPE_CLOUD_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT'
MODELSCOPE_CLOUD_USERNAME = 'MODELSCOPE_USERNAME'
MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG' MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG'
MODELSCOPE_USERNAME = 'MODELSCOPE_USERNAME'
ONE_YEAR_SECONDS = 24 * 365 * 60 * 60 ONE_YEAR_SECONDS = 24 * 365 * 60 * 60






+ 0
- 13
modelscope/hub/utils/utils.py View File

@@ -87,16 +87,3 @@ def file_integrity_validation(file_path, expected_sha256):
msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path
logger.error(msg) logger.error(msg)
raise FileIntegrityError(msg) raise FileIntegrityError(msg)


def create_library_statistics(method: str, name: str, cn_name: Optional[str]):
try:
from modelscope.hub.api import ModelScopeConfig
path = f'{get_endpoint()}/api/v1/statistics/library'
headers = {'user-agent': ModelScopeConfig.get_user_agent()}
params = {'Method': method, 'Name': name, 'CnName': cn_name}
r = requests.post(path, params=params, headers=headers)
r.raise_for_status()
except Exception:
pass
return

+ 2
- 1
modelscope/models/audio/kws/farfield/model.py View File

@@ -54,7 +54,8 @@ class FSMNSeleNetV2Decorator(TorchModel):
) )


def __del__(self): def __del__(self):
self.tmp_dir.cleanup()
if hasattr(self, 'tmp_dir'):
self.tmp_dir.cleanup()


def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
return self.model.forward(input) return self.model.forward(input)


+ 0
- 1
modelscope/models/cv/face_detection/mogface/models/detectors.py View File

@@ -20,7 +20,6 @@ class MogFaceDetector(TorchModel):


def __init__(self, model_path, device='cuda'): def __init__(self, model_path, device='cuda'):
super().__init__(model_path) super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True cudnn.benchmark = True
self.model_path = model_path self.model_path = model_path
self.device = device self.device = device


+ 0
- 1
modelscope/models/cv/face_detection/mtcnn/models/detector.py View File

@@ -21,7 +21,6 @@ class MtcnnFaceDetector(TorchModel):


def __init__(self, model_path, device='cuda'): def __init__(self, model_path, device='cuda'):
super().__init__(model_path) super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True cudnn.benchmark = True
self.model_path = model_path self.model_path = model_path
self.device = device self.device = device


+ 0
- 1
modelscope/models/cv/face_detection/retinaface/detection.py View File

@@ -18,7 +18,6 @@ class RetinaFaceDetection(TorchModel):


def __init__(self, model_path, device='cuda'): def __init__(self, model_path, device='cuda'):
super().__init__(model_path) super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True cudnn.benchmark = True
self.model_path = model_path self.model_path = model_path
self.cfg = Config.from_file( self.cfg = Config.from_file(


+ 0
- 1
modelscope/models/cv/face_detection/ulfd_slim/detection.py View File

@@ -24,7 +24,6 @@ class UlfdFaceDetector(TorchModel):


def __init__(self, model_path, device='cuda'): def __init__(self, model_path, device='cuda'):
super().__init__(model_path) super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True cudnn.benchmark = True
self.model_path = model_path self.model_path = model_path
self.device = device self.device = device


+ 0
- 1
modelscope/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py View File

@@ -24,7 +24,6 @@ class FacialExpressionRecognition(TorchModel):


def __init__(self, model_path, device='cuda'): def __init__(self, model_path, device='cuda'):
super().__init__(model_path) super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True cudnn.benchmark = True
self.model_path = model_path self.model_path = model_path
self.device = device self.device = device


+ 0
- 1
modelscope/models/cv/image_portrait_enhancement/retinaface/detection.py View File

@@ -31,7 +31,6 @@ cfg_re50 = {
class RetinaFaceDetection(object): class RetinaFaceDetection(object):


def __init__(self, model_path, device='cuda'): def __init__(self, model_path, device='cuda'):
torch.set_grad_enabled(False)
cudnn.benchmark = True cudnn.benchmark = True
self.model_path = model_path self.model_path = model_path
self.device = device self.device = device


+ 6
- 1
modelscope/models/cv/realtime_object_detection/realtime_video_detector.py View File

@@ -7,6 +7,7 @@ import time


import cv2 import cv2
import json import json
import numpy as np
import torch import torch
from tqdm import tqdm from tqdm import tqdm


@@ -87,13 +88,17 @@ class RealtimeVideoDetector(TorchModel):
self.nmsthre, self.nmsthre,
class_agnostic=True) class_agnostic=True)


if len(outputs) == 1:
if len(outputs) == 1 and (outputs[0] is not None):
bboxes = outputs[0][:, 0:4].cpu().numpy() / self.ratio bboxes = outputs[0][:, 0:4].cpu().numpy() / self.ratio
scores = outputs[0][:, 5].cpu().numpy() scores = outputs[0][:, 5].cpu().numpy()
labels = outputs[0][:, 6].cpu().int().numpy() labels = outputs[0][:, 6].cpu().int().numpy()
pred_label_names = [] pred_label_names = []
for lab in labels: for lab in labels:
pred_label_names.append(self.label_mapping[lab]) pred_label_names.append(self.label_mapping[lab])
else:
bboxes = np.asarray([])
scores = np.asarray([])
pred_label_names = np.asarray([])


return bboxes, scores, pred_label_names return bboxes, scores, pred_label_names




+ 4
- 1
modelscope/models/cv/referring_video_object_segmentation/model.py View File

@@ -31,7 +31,10 @@ class ReferringVideoObjectSegmentation(TorchModel):


config_path = osp.join(model_dir, ModelFile.CONFIGURATION) config_path = osp.join(model_dir, ModelFile.CONFIGURATION)
self.cfg = Config.from_file(config_path) self.cfg = Config.from_file(config_path)
self.model = MTTR(**self.cfg.model)
transformer_cfg_dir = osp.join(model_dir, 'transformer_cfg_dir')

self.model = MTTR(
transformer_cfg_dir=transformer_cfg_dir, **self.cfg.model)


model_path = osp.join(model_dir, ModelFile.TORCH_MODEL_FILE) model_path = osp.join(model_dir, ModelFile.TORCH_MODEL_FILE)
params_dict = torch.load(model_path, map_location='cpu') params_dict = torch.load(model_path, map_location='cpu')


+ 4
- 1
modelscope/models/cv/referring_video_object_segmentation/utils/mttr.py View File

@@ -19,6 +19,7 @@ class MTTR(nn.Module):
num_queries, num_queries,
mask_kernels_dim=8, mask_kernels_dim=8,
aux_loss=False, aux_loss=False,
transformer_cfg_dir=None,
**kwargs): **kwargs):
""" """
Parameters: Parameters:
@@ -29,7 +30,9 @@ class MTTR(nn.Module):
""" """
super().__init__() super().__init__()
self.backbone = init_backbone(**kwargs) self.backbone = init_backbone(**kwargs)
self.transformer = MultimodalTransformer(**kwargs)
assert transformer_cfg_dir is not None
self.transformer = MultimodalTransformer(
transformer_cfg_dir=transformer_cfg_dir, **kwargs)
d_model = self.transformer.d_model d_model = self.transformer.d_model
self.is_referred_head = nn.Linear( self.is_referred_head = nn.Linear(
d_model, d_model,


+ 5
- 2
modelscope/models/cv/referring_video_object_segmentation/utils/multimodal_transformer.py View File

@@ -26,6 +26,7 @@ class MultimodalTransformer(nn.Module):
num_decoder_layers=3, num_decoder_layers=3,
text_encoder_type='roberta-base', text_encoder_type='roberta-base',
freeze_text_encoder=True, freeze_text_encoder=True,
transformer_cfg_dir=None,
**kwargs): **kwargs):
super().__init__() super().__init__()
self.d_model = kwargs['d_model'] self.d_model = kwargs['d_model']
@@ -40,10 +41,12 @@ class MultimodalTransformer(nn.Module):
self.pos_encoder_2d = PositionEmbeddingSine2D() self.pos_encoder_2d = PositionEmbeddingSine2D()
self._reset_parameters() self._reset_parameters()


self.text_encoder = RobertaModel.from_pretrained(text_encoder_type)
if text_encoder_type != 'roberta-base':
transformer_cfg_dir = text_encoder_type
self.text_encoder = RobertaModel.from_pretrained(transformer_cfg_dir)
self.text_encoder.pooler = None # this pooler is never used, this is a hack to avoid DDP problems... self.text_encoder.pooler = None # this pooler is never used, this is a hack to avoid DDP problems...
self.tokenizer = RobertaTokenizerFast.from_pretrained( self.tokenizer = RobertaTokenizerFast.from_pretrained(
text_encoder_type)
transformer_cfg_dir)
self.freeze_text_encoder = freeze_text_encoder self.freeze_text_encoder = freeze_text_encoder
if freeze_text_encoder: if freeze_text_encoder:
for p in self.text_encoder.parameters(): for p in self.text_encoder.parameters():


+ 10
- 8
modelscope/msdatasets/task_datasets/audio/kws_farfield_dataset.py View File

@@ -188,11 +188,13 @@ class Worker(threading.Thread):




class KWSDataLoader: class KWSDataLoader:
"""
dataset: the dataset reference
batchsize: data batch size
numworkers: no. of workers
prefetch: prefetch factor
""" Load and organize audio data with multiple threads

Args:
dataset: the dataset reference
batchsize: data batch size
numworkers: no. of workers
prefetch: prefetch factor
""" """


def __init__(self, dataset, batchsize, numworkers, prefetch=2): def __init__(self, dataset, batchsize, numworkers, prefetch=2):
@@ -202,7 +204,7 @@ class KWSDataLoader:
self.isrun = True self.isrun = True


# data queue # data queue
self.pool = queue.Queue(batchsize * prefetch)
self.pool = queue.Queue(numworkers * prefetch)


# initialize workers # initialize workers
self.workerlist = [] self.workerlist = []
@@ -270,11 +272,11 @@ class KWSDataLoader:
w.stopWorker() w.stopWorker()


while not self.pool.empty(): while not self.pool.empty():
self.pool.get(block=True, timeout=0.001)
self.pool.get(block=True, timeout=0.01)


# wait workers terminated # wait workers terminated
for w in self.workerlist: for w in self.workerlist:
while not self.pool.empty(): while not self.pool.empty():
self.pool.get(block=True, timeout=0.001)
self.pool.get(block=True, timeout=0.01)
w.join() w.join()
logger.info('KWSDataLoader: All worker stopped.') logger.info('KWSDataLoader: All worker stopped.')

+ 0
- 4
modelscope/pipelines/base.py View File

@@ -10,7 +10,6 @@ from typing import Any, Dict, Generator, List, Mapping, Union


import numpy as np import numpy as np


from modelscope.hub.utils.utils import create_library_statistics
from modelscope.models.base import Model from modelscope.models.base import Model
from modelscope.msdatasets import MsDataset from modelscope.msdatasets import MsDataset
from modelscope.outputs import TASK_OUTPUTS from modelscope.outputs import TASK_OUTPUTS
@@ -152,9 +151,6 @@ class Pipeline(ABC):
**kwargs) -> Union[Dict[str, Any], Generator]: **kwargs) -> Union[Dict[str, Any], Generator]:
# model provider should leave it as it is # model provider should leave it as it is
# modelscope library developer will handle this function # modelscope library developer will handle this function
for single_model in self.models:
if hasattr(single_model, 'name'):
create_library_statistics('pipeline', single_model.name, None)
# place model to cpu or gpu # place model to cpu or gpu
if (self.model or (self.has_multiple_models and self.models[0])): if (self.model or (self.has_multiple_models and self.models[0])):
if not self._model_prepare: if not self._model_prepare:


+ 19
- 3
modelscope/pipelines/nlp/named_entity_recognition_pipeline.py View File

@@ -92,6 +92,8 @@ class NamedEntityRecognitionPipeline(Pipeline):
offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']] offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']]


labels = [self.id2label[x] for x in predictions] labels = [self.id2label[x] for x in predictions]
if len(labels) > len(offset_mapping):
labels = labels[1:-1]
chunks = [] chunks = []
chunk = {} chunk = {}
for label, offsets in zip(labels, offset_mapping): for label, offsets in zip(labels, offset_mapping):
@@ -104,6 +106,20 @@ class NamedEntityRecognitionPipeline(Pipeline):
'start': offsets[0], 'start': offsets[0],
'end': offsets[1] 'end': offsets[1]
} }
if label[0] in 'I':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'E':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'IES': if label[0] in 'IES':
if chunk: if chunk:
chunk['end'] = offsets[1] chunk['end'] = offsets[1]
@@ -118,15 +134,15 @@ class NamedEntityRecognitionPipeline(Pipeline):
chunk['span'] = text[chunk['start']:chunk['end']] chunk['span'] = text[chunk['start']:chunk['end']]
chunks.append(chunk) chunks.append(chunk)


# for cws output
# for cws outputs
if len(chunks) > 0 and chunks[0]['type'] == 'cws': if len(chunks) > 0 and chunks[0]['type'] == 'cws':
spans = [ spans = [
chunk['span'] for chunk in chunks if chunk['span'].strip() chunk['span'] for chunk in chunks if chunk['span'].strip()
] ]
seg_result = ' '.join(spans) seg_result = ' '.join(spans)
outputs = {OutputKeys.OUTPUT: seg_result, OutputKeys.LABELS: []}
outputs = {OutputKeys.OUTPUT: seg_result}


# for ner outpus
# for ner outputs
else: else:
outputs = {OutputKeys.OUTPUT: chunks} outputs = {OutputKeys.OUTPUT: chunks}
return outputs return outputs


+ 14
- 0
modelscope/pipelines/nlp/token_classification_pipeline.py View File

@@ -95,6 +95,20 @@ class TokenClassificationPipeline(Pipeline):
'start': offsets[0], 'start': offsets[0],
'end': offsets[1] 'end': offsets[1]
} }
if label[0] in 'I':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'E':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'IES': if label[0] in 'IES':
if chunk: if chunk:
chunk['end'] = offsets[1] chunk['end'] = offsets[1]


+ 21
- 4
modelscope/pipelines/nlp/word_segmentation_pipeline.py View File

@@ -80,9 +80,12 @@ class WordSegmentationPipeline(Pipeline):
Dict[str, str]: the prediction results Dict[str, str]: the prediction results
""" """
text = inputs['text'] text = inputs['text']
logits = inputs[OutputKeys.LOGITS]
predictions = torch.argmax(logits[0], dim=-1)
logits = torch_nested_numpify(torch_nested_detach(logits))
if not hasattr(inputs, 'predictions'):
logits = inputs[OutputKeys.LOGITS]
predictions = torch.argmax(logits[0], dim=-1)
else:
predictions = inputs[OutputKeys.PREDICTIONS].squeeze(
0).cpu().numpy()
predictions = torch_nested_numpify(torch_nested_detach(predictions)) predictions = torch_nested_numpify(torch_nested_detach(predictions))
offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']] offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']]


@@ -101,6 +104,20 @@ class WordSegmentationPipeline(Pipeline):
'start': offsets[0], 'start': offsets[0],
'end': offsets[1] 'end': offsets[1]
} }
if label[0] in 'I':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'E':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'IES': if label[0] in 'IES':
if chunk: if chunk:
chunk['end'] = offsets[1] chunk['end'] = offsets[1]
@@ -123,7 +140,7 @@ class WordSegmentationPipeline(Pipeline):
seg_result = ' '.join(spans) seg_result = ' '.join(spans)
outputs = {OutputKeys.OUTPUT: seg_result} outputs = {OutputKeys.OUTPUT: seg_result}


# for ner output
# for ner outputs
else: else:
outputs = {OutputKeys.OUTPUT: chunks} outputs = {OutputKeys.OUTPUT: chunks}
return outputs return outputs

+ 4
- 3
modelscope/trainers/audio/kws_farfield_trainer.py View File

@@ -117,8 +117,7 @@ class KWSFarfieldTrainer(BaseTrainer):
self._batch_size = dataloader_config.batch_size_per_gpu self._batch_size = dataloader_config.batch_size_per_gpu
if 'model_bin' in kwargs: if 'model_bin' in kwargs:
model_bin_file = os.path.join(self.model_dir, kwargs['model_bin']) model_bin_file = os.path.join(self.model_dir, kwargs['model_bin'])
checkpoint = torch.load(model_bin_file)
self.model.load_state_dict(checkpoint)
self.model = torch.load(model_bin_file)
# build corresponding optimizer and loss function # build corresponding optimizer and loss function
lr = self.cfg.train.optimizer.lr lr = self.cfg.train.optimizer.lr
self.optimizer = optim.Adam(self.model.parameters(), lr) self.optimizer = optim.Adam(self.model.parameters(), lr)
@@ -219,7 +218,9 @@ class KWSFarfieldTrainer(BaseTrainer):
# check point # check point
ckpt_name = 'checkpoint_{:04d}_loss_train_{:.4f}_loss_val_{:.4f}.pth'.format( ckpt_name = 'checkpoint_{:04d}_loss_train_{:.4f}_loss_val_{:.4f}.pth'.format(
self._current_epoch, loss_train_epoch, loss_val_epoch) self._current_epoch, loss_train_epoch, loss_val_epoch)
torch.save(self.model, os.path.join(self.work_dir, ckpt_name))
save_path = os.path.join(self.work_dir, ckpt_name)
logger.info(f'Save model to {save_path}')
torch.save(self.model, save_path)
# time spent per epoch # time spent per epoch
epochtime = datetime.datetime.now() - epochtime epochtime = datetime.datetime.now() - epochtime
logger.info('Epoch {:04d} time spent: {:.2f} hours'.format( logger.info('Epoch {:04d} time spent: {:.2f} hours'.format(


+ 0
- 5
modelscope/trainers/trainer.py View File

@@ -15,7 +15,6 @@ from torch.utils.data.dataloader import default_collate
from torch.utils.data.distributed import DistributedSampler from torch.utils.data.distributed import DistributedSampler


from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
from modelscope.hub.utils.utils import create_library_statistics
from modelscope.metainfo import Trainers from modelscope.metainfo import Trainers
from modelscope.metrics import build_metric, task_default_metrics from modelscope.metrics import build_metric, task_default_metrics
from modelscope.models.base import Model, TorchModel from modelscope.models.base import Model, TorchModel
@@ -437,8 +436,6 @@ class EpochBasedTrainer(BaseTrainer):


def train(self, checkpoint_path=None, *args, **kwargs): def train(self, checkpoint_path=None, *args, **kwargs):
self._mode = ModeKeys.TRAIN self._mode = ModeKeys.TRAIN
if hasattr(self.model, 'name'):
create_library_statistics('train', self.model.name, None)


if self.train_dataset is None: if self.train_dataset is None:
self.train_dataloader = self.get_train_dataloader() self.train_dataloader = self.get_train_dataloader()
@@ -459,8 +456,6 @@ class EpochBasedTrainer(BaseTrainer):
self.train_loop(self.train_dataloader) self.train_loop(self.train_dataloader)


def evaluate(self, checkpoint_path=None): def evaluate(self, checkpoint_path=None):
if hasattr(self.model, 'name'):
create_library_statistics('evaluate', self.model.name, None)
if checkpoint_path is not None and os.path.isfile(checkpoint_path): if checkpoint_path is not None and os.path.isfile(checkpoint_path):
from modelscope.trainers.hooks import CheckpointHook from modelscope.trainers.hooks import CheckpointHook
CheckpointHook.load_checkpoint(checkpoint_path, self) CheckpointHook.load_checkpoint(checkpoint_path, self)


+ 4
- 1
modelscope/utils/audio/audio_utils.py View File

@@ -43,7 +43,10 @@ def update_conf(origin_config_file, new_config_file, conf_item: [str, str]):
def repl(matched): def repl(matched):
key = matched.group(1) key = matched.group(1)
if key in conf_item: if key in conf_item:
return conf_item[key]
value = conf_item[key]
if not isinstance(value, str):
value = str(value)
return value
else: else:
return None return None




+ 134
- 2
tests/run.py View File

@@ -3,11 +3,13 @@


import argparse import argparse
import datetime import datetime
import math
import multiprocessing import multiprocessing
import os import os
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
import time
import unittest import unittest
from fnmatch import fnmatch from fnmatch import fnmatch
from multiprocessing.managers import BaseManager from multiprocessing.managers import BaseManager
@@ -158,6 +160,21 @@ def run_command_with_popen(cmd):
sys.stdout.write(line) sys.stdout.write(line)




def async_run_command_with_popen(cmd, device_id):
logger.info('Worker id: %s args: %s' % (device_id, cmd))
env = os.environ.copy()
env['CUDA_VISIBLE_DEVICES'] = '%s' % device_id
sub_process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=1,
universal_newlines=True,
env=env,
encoding='utf8')
return sub_process


def save_test_result(df, args): def save_test_result(df, args):
if args.result_dir is not None: if args.result_dir is not None:
file_name = str(int(datetime.datetime.now().timestamp() * 1000)) file_name = str(int(datetime.datetime.now().timestamp() * 1000))
@@ -199,6 +216,108 @@ def install_requirements(requirements):
run_command(cmd) run_command(cmd)




def wait_for_free_worker(workers):
while True:
for idx, worker in enumerate(workers):
if worker is None:
logger.info('return free worker: %s' % (idx))
return idx
if worker.poll() is None: # running, get output
for line in iter(worker.stdout.readline, ''):
if line != '':
sys.stdout.write(line)
else:
break
else: # worker process completed.
logger.info('Process end: %s' % (idx))
workers[idx] = None
return idx
time.sleep(0.001)


def wait_for_workers(workers):
while True:
for idx, worker in enumerate(workers):
if worker is None:
continue
# check worker is completed.
if worker.poll() is None:
for line in iter(worker.stdout.readline, ''):
if line != '':
sys.stdout.write(line)
else:
break
else:
logger.info('Process idx: %s end!' % (idx))
workers[idx] = None

is_all_completed = True
for idx, worker in enumerate(workers):
if worker is not None:
is_all_completed = False
break

if is_all_completed:
logger.info('All sub porcess is completed!')
break
time.sleep(0.001)


def parallel_run_case_in_env(env_name, env, test_suite_env_map, isolated_cases,
result_dir, parallel):
logger.info('Running case in env: %s' % env_name)
# install requirements and deps # run_config['envs'][env]
if 'requirements' in env:
install_requirements(env['requirements'])
if 'dependencies' in env:
install_packages(env['dependencies'])
# case worker processes
worker_processes = [None] * parallel
for test_suite_file in isolated_cases: # run case in subprocess
if test_suite_file in test_suite_env_map and test_suite_env_map[
test_suite_file] == env_name:
cmd = [
'python',
'tests/run.py',
'--pattern',
test_suite_file,
'--result_dir',
result_dir,
]
worker_idx = wait_for_free_worker(worker_processes)
worker_process = async_run_command_with_popen(cmd, worker_idx)
os.set_blocking(worker_process.stdout.fileno(), False)
worker_processes[worker_idx] = worker_process
else:
pass # case not in run list.

# run remain cases in a process.
remain_suite_files = []
for k, v in test_suite_env_map.items():
if k not in isolated_cases and v == env_name:
remain_suite_files.append(k)
if len(remain_suite_files) == 0:
return
# roughly split case in parallel
part_count = math.ceil(len(remain_suite_files) / parallel)
suites_chunks = [
remain_suite_files[x:x + part_count]
for x in range(0, len(remain_suite_files), part_count)
]
for suites_chunk in suites_chunks:
worker_idx = wait_for_free_worker(worker_processes)
cmd = [
'python', 'tests/run.py', '--result_dir', result_dir, '--suites'
]
for suite in suites_chunk:
cmd.append(suite)
worker_process = async_run_command_with_popen(cmd, worker_idx)
os.set_blocking(worker_process.stdout.fileno(), False)
worker_processes[worker_idx] = worker_process

wait_for_workers(worker_processes)


def run_case_in_env(env_name, env, test_suite_env_map, isolated_cases, def run_case_in_env(env_name, env, test_suite_env_map, isolated_cases,
result_dir): result_dir):
# install requirements and deps # run_config['envs'][env] # install requirements and deps # run_config['envs'][env]
@@ -264,8 +383,9 @@ def run_in_subprocess(args):


with tempfile.TemporaryDirectory() as temp_result_dir: with tempfile.TemporaryDirectory() as temp_result_dir:
for env in set(test_suite_env_map.values()): for env in set(test_suite_env_map.values()):
run_case_in_env(env, run_config['envs'][env], test_suite_env_map,
isolated_cases, temp_result_dir)
parallel_run_case_in_env(env, run_config['envs'][env],
test_suite_env_map, isolated_cases,
temp_result_dir, args.parallel)


result_dfs = [] result_dfs = []
result_path = Path(temp_result_dir) result_path = Path(temp_result_dir)
@@ -312,6 +432,10 @@ class TimeCostTextTestResult(TextTestResult):
self.stream.writeln( self.stream.writeln(
'Test case: %s stop at: %s, cost time: %s(seconds)' % 'Test case: %s stop at: %s, cost time: %s(seconds)' %
(test.test_full_name, test.stop_time, test.time_cost)) (test.test_full_name, test.stop_time, test.time_cost))
if torch.cuda.is_available(
) and test.time_cost > 5.0: # print nvidia-smi
cmd = ['nvidia-smi']
run_command_with_popen(cmd)
super(TimeCostTextTestResult, self).stopTest(test) super(TimeCostTextTestResult, self).stopTest(test)


def addSuccess(self, test): def addSuccess(self, test):
@@ -383,6 +507,8 @@ def main(args):
os.path.abspath(args.test_dir), args.pattern, args.list_tests) os.path.abspath(args.test_dir), args.pattern, args.list_tests)
if not args.list_tests: if not args.list_tests:
result = runner.run(test_suite) result = runner.run(test_suite)
logger.info('Running case completed, pid: %s, suites: %s' %
(os.getpid(), args.suites))
result = collect_test_results(result) result = collect_test_results(result)
df = test_cases_result_to_df(result) df = test_cases_result_to_df(result)
if args.result_dir is not None: if args.result_dir is not None:
@@ -417,6 +543,12 @@ if __name__ == '__main__':
'--result_dir', '--result_dir',
default=None, default=None,
help='Save result to directory, internal use only') help='Save result to directory, internal use only')
parser.add_argument(
'--parallel',
default=1,
type=int,
help='Set case parallels, default single process, set with gpu number.'
)
parser.add_argument( parser.add_argument(
'--suites', '--suites',
nargs='*', nargs='*',


+ 28
- 1
tests/run_config.yaml View File

@@ -1,5 +1,5 @@
# isolate cases in env, we can install different dependencies in each env. # isolate cases in env, we can install different dependencies in each env.
isolated: # test cases that may require excessive anmount of GPU memory, which will be executed in dedicagted process.
isolated: # test cases that may require excessive anmount of GPU memory or run long time, which will be executed in dedicagted process.
- test_text_to_speech.py - test_text_to_speech.py
- test_multi_modal_embedding.py - test_multi_modal_embedding.py
- test_ofa_tasks.py - test_ofa_tasks.py
@@ -13,6 +13,33 @@ isolated: # test cases that may require excessive anmount of GPU memory, which
- test_movie_scene_segmentation.py - test_movie_scene_segmentation.py
- test_image_inpainting.py - test_image_inpainting.py
- test_mglm_text_summarization.py - test_mglm_text_summarization.py
- test_team_transfer_trainer.py
- test_image_denoise_trainer.py
- test_dialog_intent_trainer.py
- test_finetune_mplug.py
- test_image_instance_segmentation_trainer.py
- test_image_portrait_enhancement_trainer.py
- test_translation_trainer.py
- test_unifold.py
- test_automatic_post_editing.py
- test_mplug_tasks.py
- test_movie_scene_segmentation.py
- test_body_3d_keypoints.py
- test_finetune_text_generation.py
- test_clip_trainer.py
- test_ofa_trainer.py
- test_fill_mask.py
- test_hand_2d_keypoints.py
- test_referring_video_object_segmentation.py
- test_easycv_trainer_hand_2d_keypoints.py
- test_card_detection_scrfd_trainer.py
- test_referring_video_object_segmentation_trainer.py
- test_person_image_cartoon.py
- test_image_style_transfer.py
- test_ocr_detection.py
- test_automatic_speech_recognition.py
- test_image_matting.py
- test_skin_retouching.py


envs: envs:
default: # default env, case not in other env will in default, pytorch. default: # default env, case not in other env will in default, pytorch.


+ 1
- 1
tests/trainers/test_dialog_intent_trainer.py View File

@@ -94,7 +94,7 @@ class TestDialogIntentTrainer(unittest.TestCase):
cfg.Model.update(config['Model']) cfg.Model.update(config['Model'])
if self.debugging: if self.debugging:
cfg.Trainer.save_checkpoint = False cfg.Trainer.save_checkpoint = False
cfg.Trainer.num_epochs = 5
cfg.Trainer.num_epochs = 1
cfg.Trainer.batch_size_label = 64 cfg.Trainer.batch_size_label = 64
return cfg return cfg




Loading…
Cancel
Save