Browse Source

merge internal master

master
wenmeng.zwm 2 years ago
parent
commit
584fa8f586
28 changed files with 289 additions and 89 deletions
  1. +1
    -4
      .dev_scripts/ci_container_test.sh
  2. +14
    -14
      .dev_scripts/dockerci.sh
  3. +1
    -1
      .pre-commit-config.yaml
  4. +15
    -11
      modelscope/hub/api.py
  5. +2
    -2
      modelscope/hub/constants.py
  6. +0
    -13
      modelscope/hub/utils/utils.py
  7. +2
    -1
      modelscope/models/audio/kws/farfield/model.py
  8. +0
    -1
      modelscope/models/cv/face_detection/mogface/models/detectors.py
  9. +0
    -1
      modelscope/models/cv/face_detection/mtcnn/models/detector.py
  10. +0
    -1
      modelscope/models/cv/face_detection/retinaface/detection.py
  11. +0
    -1
      modelscope/models/cv/face_detection/ulfd_slim/detection.py
  12. +0
    -1
      modelscope/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py
  13. +0
    -1
      modelscope/models/cv/image_portrait_enhancement/retinaface/detection.py
  14. +6
    -1
      modelscope/models/cv/realtime_object_detection/realtime_video_detector.py
  15. +4
    -1
      modelscope/models/cv/referring_video_object_segmentation/model.py
  16. +4
    -1
      modelscope/models/cv/referring_video_object_segmentation/utils/mttr.py
  17. +5
    -2
      modelscope/models/cv/referring_video_object_segmentation/utils/multimodal_transformer.py
  18. +10
    -8
      modelscope/msdatasets/task_datasets/audio/kws_farfield_dataset.py
  19. +0
    -4
      modelscope/pipelines/base.py
  20. +19
    -3
      modelscope/pipelines/nlp/named_entity_recognition_pipeline.py
  21. +14
    -0
      modelscope/pipelines/nlp/token_classification_pipeline.py
  22. +21
    -4
      modelscope/pipelines/nlp/word_segmentation_pipeline.py
  23. +4
    -3
      modelscope/trainers/audio/kws_farfield_trainer.py
  24. +0
    -5
      modelscope/trainers/trainer.py
  25. +4
    -1
      modelscope/utils/audio/audio_utils.py
  26. +134
    -2
      tests/run.py
  27. +28
    -1
      tests/run_config.yaml
  28. +1
    -1
      tests/trainers/test_dialog_intent_trainer.py

+ 1
- 4
.dev_scripts/ci_container_test.sh View File

@@ -1,6 +1,3 @@
echo "Testing envs"
printenv
echo "ENV END"
if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
pip install -r requirements/tests.txt
git config --global --add safe.directory /Maas-lib
@@ -28,7 +25,7 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
awk -F: '/^[^#]/ { print $1 }' requirements/multi-modal.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
awk -F: '/^[^#]/ { print $1 }' requirements/nlp.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
awk -F: '/^[^#]/ { print $1 }' requirements/science.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install -r requirements/tests.txt
# test with install
python setup.py install
else


+ 14
- 14
.dev_scripts/dockerci.sh View File

@@ -3,30 +3,32 @@ MODELSCOPE_CACHE_DIR_IN_CONTAINER=/modelscope_cache
CODE_DIR=$PWD
CODE_DIR_IN_CONTAINER=/Maas-lib
echo "$USER"
gpus='7 6 5 4 3 2 1 0'
cpu_sets='0-7 8-15 16-23 24-30 31-37 38-44 45-51 52-58'
gpus='0,1 2,3 4,5 6,7'
cpu_sets='45-58 31-44 16-30 0-15'
cpu_sets_arr=($cpu_sets)
is_get_file_lock=false
# export RUN_CASE_COMMAND='python tests/run.py --run_config tests/run_config.yaml'
CI_COMMAND=${CI_COMMAND:-bash .dev_scripts/ci_container_test.sh $RUN_CASE_BASE_COMMAND}
CI_COMMAND='bash .dev_scripts/ci_container_test.sh python tests/run.py --parallel 2 --run_config tests/run_config.yaml'
echo "ci command: $CI_COMMAND"
idx=0
for gpu in $gpus
do
exec {lock_fd}>"/tmp/gpu$gpu" || exit 1
flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; continue; }
flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; idx=$((idx+1)); continue; }
echo "get gpu lock $gpu"
CONTAINER_NAME="modelscope-ci-$gpu"

CONTAINER_NAME="modelscope-ci-$idx"
let is_get_file_lock=true

# pull image if there are update
docker pull ${IMAGE_NAME}:${IMAGE_VERSION}
if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
echo 'debugging'
docker run --rm --name $CONTAINER_NAME --shm-size=16gb \
--cpuset-cpus=${cpu_sets_arr[$gpu]} \
--gpus="device=$gpu" \
--cpuset-cpus=${cpu_sets_arr[$idx]} \
--gpus='"'"device=$gpu"'"' \
-v $CODE_DIR:$CODE_DIR_IN_CONTAINER \
-v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
-v $MODELSCOPE_HOME_CACHE/$gpu:/root \
-v $MODELSCOPE_HOME_CACHE/$idx:/root \
-v /home/admin/pre-commit:/home/admin/pre-commit \
-e CI_TEST=True \
-e TEST_LEVEL=$TEST_LEVEL \
@@ -41,16 +43,15 @@ do
-e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \
-e MODEL_TAG_URL=$MODEL_TAG_URL \
--workdir=$CODE_DIR_IN_CONTAINER \
--net host \
${IMAGE_NAME}:${IMAGE_VERSION} \
$CI_COMMAND
else
docker run --rm --name $CONTAINER_NAME --shm-size=16gb \
--cpuset-cpus=${cpu_sets_arr[$gpu]} \
--gpus="device=$gpu" \
--cpuset-cpus=${cpu_sets_arr[$idx]} \
--gpus='"'"device=$gpu"'"' \
-v $CODE_DIR:$CODE_DIR_IN_CONTAINER \
-v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \
-v $MODELSCOPE_HOME_CACHE/$gpu:/root \
-v $MODELSCOPE_HOME_CACHE/$idx:/root \
-v /home/admin/pre-commit:/home/admin/pre-commit \
-e CI_TEST=True \
-e TEST_LEVEL=$TEST_LEVEL \
@@ -64,7 +65,6 @@ do
-e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \
-e MODEL_TAG_URL=$MODEL_TAG_URL \
--workdir=$CODE_DIR_IN_CONTAINER \
--net host \
${IMAGE_NAME}:${IMAGE_VERSION} \
$CI_COMMAND
fi


+ 1
- 1
.pre-commit-config.yaml View File

@@ -1,5 +1,5 @@
repos:
- repo: https://github.com/PyCQA/flake8
- repo: https://github.com/pycqa/flake8.git
rev: 4.0.0
hooks:
- id: flake8


+ 15
- 11
modelscope/hub/api.py View File

@@ -23,9 +23,10 @@ from modelscope.hub.constants import (API_RESPONSE_FIELD_DATA,
API_RESPONSE_FIELD_MESSAGE,
API_RESPONSE_FIELD_USERNAME,
DEFAULT_CREDENTIALS_PATH,
MODELSCOPE_ENVIRONMENT,
MODELSCOPE_USERNAME, ONE_YEAR_SECONDS,
Licenses, ModelVisibility)
MODELSCOPE_CLOUD_ENVIRONMENT,
MODELSCOPE_CLOUD_USERNAME,
ONE_YEAR_SECONDS, Licenses,
ModelVisibility)
from modelscope.hub.errors import (InvalidParameter, NotExistError,
NotLoginException, NoValidRevisionError,
RequestError, datahub_raise_on_error,
@@ -653,10 +654,10 @@ class HubApi:
# get channel and user_name
channel = DownloadChannel.LOCAL.value
user_name = ''
if MODELSCOPE_ENVIRONMENT in os.environ:
channel = os.environ[MODELSCOPE_ENVIRONMENT]
if MODELSCOPE_USERNAME in os.environ:
user_name = os.environ[MODELSCOPE_USERNAME]
if MODELSCOPE_CLOUD_ENVIRONMENT in os.environ:
channel = os.environ[MODELSCOPE_CLOUD_ENVIRONMENT]
if MODELSCOPE_CLOUD_USERNAME in os.environ:
user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]

url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}'
cookies = ModelScopeConfig.get_cookies()
@@ -777,12 +778,15 @@ class ModelScopeConfig:
Returns:
The formatted user-agent string.
"""

# include some more telemetrics when executing in dedicated
# cloud containers
env = 'custom'
if MODELSCOPE_ENVIRONMENT in os.environ:
env = os.environ[MODELSCOPE_ENVIRONMENT]
if MODELSCOPE_CLOUD_ENVIRONMENT in os.environ:
env = os.environ[MODELSCOPE_CLOUD_ENVIRONMENT]
user_name = 'unknown'
if MODELSCOPE_USERNAME in os.environ:
user_name = os.environ[MODELSCOPE_USERNAME]
if MODELSCOPE_CLOUD_USERNAME in os.environ:
user_name = os.environ[MODELSCOPE_CLOUD_USERNAME]

ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % (
__version__,


+ 2
- 2
modelscope/hub/constants.py View File

@@ -16,9 +16,9 @@ API_RESPONSE_FIELD_GIT_ACCESS_TOKEN = 'AccessToken'
API_RESPONSE_FIELD_USERNAME = 'Username'
API_RESPONSE_FIELD_EMAIL = 'Email'
API_RESPONSE_FIELD_MESSAGE = 'Message'
MODELSCOPE_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT'
MODELSCOPE_CLOUD_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT'
MODELSCOPE_CLOUD_USERNAME = 'MODELSCOPE_USERNAME'
MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG'
MODELSCOPE_USERNAME = 'MODELSCOPE_USERNAME'
ONE_YEAR_SECONDS = 24 * 365 * 60 * 60




+ 0
- 13
modelscope/hub/utils/utils.py View File

@@ -87,16 +87,3 @@ def file_integrity_validation(file_path, expected_sha256):
msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path
logger.error(msg)
raise FileIntegrityError(msg)


def create_library_statistics(method: str, name: str, cn_name: Optional[str]):
try:
from modelscope.hub.api import ModelScopeConfig
path = f'{get_endpoint()}/api/v1/statistics/library'
headers = {'user-agent': ModelScopeConfig.get_user_agent()}
params = {'Method': method, 'Name': name, 'CnName': cn_name}
r = requests.post(path, params=params, headers=headers)
r.raise_for_status()
except Exception:
pass
return

+ 2
- 1
modelscope/models/audio/kws/farfield/model.py View File

@@ -54,7 +54,8 @@ class FSMNSeleNetV2Decorator(TorchModel):
)

def __del__(self):
self.tmp_dir.cleanup()
if hasattr(self, 'tmp_dir'):
self.tmp_dir.cleanup()

def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
return self.model.forward(input)


+ 0
- 1
modelscope/models/cv/face_detection/mogface/models/detectors.py View File

@@ -20,7 +20,6 @@ class MogFaceDetector(TorchModel):

def __init__(self, model_path, device='cuda'):
super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True
self.model_path = model_path
self.device = device


+ 0
- 1
modelscope/models/cv/face_detection/mtcnn/models/detector.py View File

@@ -21,7 +21,6 @@ class MtcnnFaceDetector(TorchModel):

def __init__(self, model_path, device='cuda'):
super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True
self.model_path = model_path
self.device = device


+ 0
- 1
modelscope/models/cv/face_detection/retinaface/detection.py View File

@@ -18,7 +18,6 @@ class RetinaFaceDetection(TorchModel):

def __init__(self, model_path, device='cuda'):
super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True
self.model_path = model_path
self.cfg = Config.from_file(


+ 0
- 1
modelscope/models/cv/face_detection/ulfd_slim/detection.py View File

@@ -24,7 +24,6 @@ class UlfdFaceDetector(TorchModel):

def __init__(self, model_path, device='cuda'):
super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True
self.model_path = model_path
self.device = device


+ 0
- 1
modelscope/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py View File

@@ -24,7 +24,6 @@ class FacialExpressionRecognition(TorchModel):

def __init__(self, model_path, device='cuda'):
super().__init__(model_path)
torch.set_grad_enabled(False)
cudnn.benchmark = True
self.model_path = model_path
self.device = device


+ 0
- 1
modelscope/models/cv/image_portrait_enhancement/retinaface/detection.py View File

@@ -31,7 +31,6 @@ cfg_re50 = {
class RetinaFaceDetection(object):

def __init__(self, model_path, device='cuda'):
torch.set_grad_enabled(False)
cudnn.benchmark = True
self.model_path = model_path
self.device = device


+ 6
- 1
modelscope/models/cv/realtime_object_detection/realtime_video_detector.py View File

@@ -7,6 +7,7 @@ import time

import cv2
import json
import numpy as np
import torch
from tqdm import tqdm

@@ -87,13 +88,17 @@ class RealtimeVideoDetector(TorchModel):
self.nmsthre,
class_agnostic=True)

if len(outputs) == 1:
if len(outputs) == 1 and (outputs[0] is not None):
bboxes = outputs[0][:, 0:4].cpu().numpy() / self.ratio
scores = outputs[0][:, 5].cpu().numpy()
labels = outputs[0][:, 6].cpu().int().numpy()
pred_label_names = []
for lab in labels:
pred_label_names.append(self.label_mapping[lab])
else:
bboxes = np.asarray([])
scores = np.asarray([])
pred_label_names = np.asarray([])

return bboxes, scores, pred_label_names



+ 4
- 1
modelscope/models/cv/referring_video_object_segmentation/model.py View File

@@ -31,7 +31,10 @@ class ReferringVideoObjectSegmentation(TorchModel):

config_path = osp.join(model_dir, ModelFile.CONFIGURATION)
self.cfg = Config.from_file(config_path)
self.model = MTTR(**self.cfg.model)
transformer_cfg_dir = osp.join(model_dir, 'transformer_cfg_dir')

self.model = MTTR(
transformer_cfg_dir=transformer_cfg_dir, **self.cfg.model)

model_path = osp.join(model_dir, ModelFile.TORCH_MODEL_FILE)
params_dict = torch.load(model_path, map_location='cpu')


+ 4
- 1
modelscope/models/cv/referring_video_object_segmentation/utils/mttr.py View File

@@ -19,6 +19,7 @@ class MTTR(nn.Module):
num_queries,
mask_kernels_dim=8,
aux_loss=False,
transformer_cfg_dir=None,
**kwargs):
"""
Parameters:
@@ -29,7 +30,9 @@ class MTTR(nn.Module):
"""
super().__init__()
self.backbone = init_backbone(**kwargs)
self.transformer = MultimodalTransformer(**kwargs)
assert transformer_cfg_dir is not None
self.transformer = MultimodalTransformer(
transformer_cfg_dir=transformer_cfg_dir, **kwargs)
d_model = self.transformer.d_model
self.is_referred_head = nn.Linear(
d_model,


+ 5
- 2
modelscope/models/cv/referring_video_object_segmentation/utils/multimodal_transformer.py View File

@@ -26,6 +26,7 @@ class MultimodalTransformer(nn.Module):
num_decoder_layers=3,
text_encoder_type='roberta-base',
freeze_text_encoder=True,
transformer_cfg_dir=None,
**kwargs):
super().__init__()
self.d_model = kwargs['d_model']
@@ -40,10 +41,12 @@ class MultimodalTransformer(nn.Module):
self.pos_encoder_2d = PositionEmbeddingSine2D()
self._reset_parameters()

self.text_encoder = RobertaModel.from_pretrained(text_encoder_type)
if text_encoder_type != 'roberta-base':
transformer_cfg_dir = text_encoder_type
self.text_encoder = RobertaModel.from_pretrained(transformer_cfg_dir)
self.text_encoder.pooler = None # this pooler is never used, this is a hack to avoid DDP problems...
self.tokenizer = RobertaTokenizerFast.from_pretrained(
text_encoder_type)
transformer_cfg_dir)
self.freeze_text_encoder = freeze_text_encoder
if freeze_text_encoder:
for p in self.text_encoder.parameters():


+ 10
- 8
modelscope/msdatasets/task_datasets/audio/kws_farfield_dataset.py View File

@@ -188,11 +188,13 @@ class Worker(threading.Thread):


class KWSDataLoader:
"""
dataset: the dataset reference
batchsize: data batch size
numworkers: no. of workers
prefetch: prefetch factor
""" Load and organize audio data with multiple threads

Args:
dataset: the dataset reference
batchsize: data batch size
numworkers: no. of workers
prefetch: prefetch factor
"""

def __init__(self, dataset, batchsize, numworkers, prefetch=2):
@@ -202,7 +204,7 @@ class KWSDataLoader:
self.isrun = True

# data queue
self.pool = queue.Queue(batchsize * prefetch)
self.pool = queue.Queue(numworkers * prefetch)

# initialize workers
self.workerlist = []
@@ -270,11 +272,11 @@ class KWSDataLoader:
w.stopWorker()

while not self.pool.empty():
self.pool.get(block=True, timeout=0.001)
self.pool.get(block=True, timeout=0.01)

# wait workers terminated
for w in self.workerlist:
while not self.pool.empty():
self.pool.get(block=True, timeout=0.001)
self.pool.get(block=True, timeout=0.01)
w.join()
logger.info('KWSDataLoader: All worker stopped.')

+ 0
- 4
modelscope/pipelines/base.py View File

@@ -10,7 +10,6 @@ from typing import Any, Dict, Generator, List, Mapping, Union

import numpy as np

from modelscope.hub.utils.utils import create_library_statistics
from modelscope.models.base import Model
from modelscope.msdatasets import MsDataset
from modelscope.outputs import TASK_OUTPUTS
@@ -152,9 +151,6 @@ class Pipeline(ABC):
**kwargs) -> Union[Dict[str, Any], Generator]:
# model provider should leave it as it is
# modelscope library developer will handle this function
for single_model in self.models:
if hasattr(single_model, 'name'):
create_library_statistics('pipeline', single_model.name, None)
# place model to cpu or gpu
if (self.model or (self.has_multiple_models and self.models[0])):
if not self._model_prepare:


+ 19
- 3
modelscope/pipelines/nlp/named_entity_recognition_pipeline.py View File

@@ -92,6 +92,8 @@ class NamedEntityRecognitionPipeline(Pipeline):
offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']]

labels = [self.id2label[x] for x in predictions]
if len(labels) > len(offset_mapping):
labels = labels[1:-1]
chunks = []
chunk = {}
for label, offsets in zip(labels, offset_mapping):
@@ -104,6 +106,20 @@ class NamedEntityRecognitionPipeline(Pipeline):
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'I':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'E':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'IES':
if chunk:
chunk['end'] = offsets[1]
@@ -118,15 +134,15 @@ class NamedEntityRecognitionPipeline(Pipeline):
chunk['span'] = text[chunk['start']:chunk['end']]
chunks.append(chunk)

# for cws output
# for cws outputs
if len(chunks) > 0 and chunks[0]['type'] == 'cws':
spans = [
chunk['span'] for chunk in chunks if chunk['span'].strip()
]
seg_result = ' '.join(spans)
outputs = {OutputKeys.OUTPUT: seg_result, OutputKeys.LABELS: []}
outputs = {OutputKeys.OUTPUT: seg_result}

# for ner outpus
# for ner outputs
else:
outputs = {OutputKeys.OUTPUT: chunks}
return outputs


+ 14
- 0
modelscope/pipelines/nlp/token_classification_pipeline.py View File

@@ -95,6 +95,20 @@ class TokenClassificationPipeline(Pipeline):
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'I':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'E':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'IES':
if chunk:
chunk['end'] = offsets[1]


+ 21
- 4
modelscope/pipelines/nlp/word_segmentation_pipeline.py View File

@@ -80,9 +80,12 @@ class WordSegmentationPipeline(Pipeline):
Dict[str, str]: the prediction results
"""
text = inputs['text']
logits = inputs[OutputKeys.LOGITS]
predictions = torch.argmax(logits[0], dim=-1)
logits = torch_nested_numpify(torch_nested_detach(logits))
if not hasattr(inputs, 'predictions'):
logits = inputs[OutputKeys.LOGITS]
predictions = torch.argmax(logits[0], dim=-1)
else:
predictions = inputs[OutputKeys.PREDICTIONS].squeeze(
0).cpu().numpy()
predictions = torch_nested_numpify(torch_nested_detach(predictions))
offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']]

@@ -101,6 +104,20 @@ class WordSegmentationPipeline(Pipeline):
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'I':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'E':
if not chunk:
chunk = {
'type': label[2:],
'start': offsets[0],
'end': offsets[1]
}
if label[0] in 'IES':
if chunk:
chunk['end'] = offsets[1]
@@ -123,7 +140,7 @@ class WordSegmentationPipeline(Pipeline):
seg_result = ' '.join(spans)
outputs = {OutputKeys.OUTPUT: seg_result}

# for ner output
# for ner outputs
else:
outputs = {OutputKeys.OUTPUT: chunks}
return outputs

+ 4
- 3
modelscope/trainers/audio/kws_farfield_trainer.py View File

@@ -117,8 +117,7 @@ class KWSFarfieldTrainer(BaseTrainer):
self._batch_size = dataloader_config.batch_size_per_gpu
if 'model_bin' in kwargs:
model_bin_file = os.path.join(self.model_dir, kwargs['model_bin'])
checkpoint = torch.load(model_bin_file)
self.model.load_state_dict(checkpoint)
self.model = torch.load(model_bin_file)
# build corresponding optimizer and loss function
lr = self.cfg.train.optimizer.lr
self.optimizer = optim.Adam(self.model.parameters(), lr)
@@ -219,7 +218,9 @@ class KWSFarfieldTrainer(BaseTrainer):
# check point
ckpt_name = 'checkpoint_{:04d}_loss_train_{:.4f}_loss_val_{:.4f}.pth'.format(
self._current_epoch, loss_train_epoch, loss_val_epoch)
torch.save(self.model, os.path.join(self.work_dir, ckpt_name))
save_path = os.path.join(self.work_dir, ckpt_name)
logger.info(f'Save model to {save_path}')
torch.save(self.model, save_path)
# time spent per epoch
epochtime = datetime.datetime.now() - epochtime
logger.info('Epoch {:04d} time spent: {:.2f} hours'.format(


+ 0
- 5
modelscope/trainers/trainer.py View File

@@ -15,7 +15,6 @@ from torch.utils.data.dataloader import default_collate
from torch.utils.data.distributed import DistributedSampler

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.hub.utils.utils import create_library_statistics
from modelscope.metainfo import Trainers
from modelscope.metrics import build_metric, task_default_metrics
from modelscope.models.base import Model, TorchModel
@@ -437,8 +436,6 @@ class EpochBasedTrainer(BaseTrainer):

def train(self, checkpoint_path=None, *args, **kwargs):
self._mode = ModeKeys.TRAIN
if hasattr(self.model, 'name'):
create_library_statistics('train', self.model.name, None)

if self.train_dataset is None:
self.train_dataloader = self.get_train_dataloader()
@@ -459,8 +456,6 @@ class EpochBasedTrainer(BaseTrainer):
self.train_loop(self.train_dataloader)

def evaluate(self, checkpoint_path=None):
if hasattr(self.model, 'name'):
create_library_statistics('evaluate', self.model.name, None)
if checkpoint_path is not None and os.path.isfile(checkpoint_path):
from modelscope.trainers.hooks import CheckpointHook
CheckpointHook.load_checkpoint(checkpoint_path, self)


+ 4
- 1
modelscope/utils/audio/audio_utils.py View File

@@ -43,7 +43,10 @@ def update_conf(origin_config_file, new_config_file, conf_item: [str, str]):
def repl(matched):
key = matched.group(1)
if key in conf_item:
return conf_item[key]
value = conf_item[key]
if not isinstance(value, str):
value = str(value)
return value
else:
return None



+ 134
- 2
tests/run.py View File

@@ -3,11 +3,13 @@

import argparse
import datetime
import math
import multiprocessing
import os
import subprocess
import sys
import tempfile
import time
import unittest
from fnmatch import fnmatch
from multiprocessing.managers import BaseManager
@@ -158,6 +160,21 @@ def run_command_with_popen(cmd):
sys.stdout.write(line)


def async_run_command_with_popen(cmd, device_id):
logger.info('Worker id: %s args: %s' % (device_id, cmd))
env = os.environ.copy()
env['CUDA_VISIBLE_DEVICES'] = '%s' % device_id
sub_process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=1,
universal_newlines=True,
env=env,
encoding='utf8')
return sub_process


def save_test_result(df, args):
if args.result_dir is not None:
file_name = str(int(datetime.datetime.now().timestamp() * 1000))
@@ -199,6 +216,108 @@ def install_requirements(requirements):
run_command(cmd)


def wait_for_free_worker(workers):
while True:
for idx, worker in enumerate(workers):
if worker is None:
logger.info('return free worker: %s' % (idx))
return idx
if worker.poll() is None: # running, get output
for line in iter(worker.stdout.readline, ''):
if line != '':
sys.stdout.write(line)
else:
break
else: # worker process completed.
logger.info('Process end: %s' % (idx))
workers[idx] = None
return idx
time.sleep(0.001)


def wait_for_workers(workers):
while True:
for idx, worker in enumerate(workers):
if worker is None:
continue
# check worker is completed.
if worker.poll() is None:
for line in iter(worker.stdout.readline, ''):
if line != '':
sys.stdout.write(line)
else:
break
else:
logger.info('Process idx: %s end!' % (idx))
workers[idx] = None

is_all_completed = True
for idx, worker in enumerate(workers):
if worker is not None:
is_all_completed = False
break

if is_all_completed:
logger.info('All sub porcess is completed!')
break
time.sleep(0.001)


def parallel_run_case_in_env(env_name, env, test_suite_env_map, isolated_cases,
result_dir, parallel):
logger.info('Running case in env: %s' % env_name)
# install requirements and deps # run_config['envs'][env]
if 'requirements' in env:
install_requirements(env['requirements'])
if 'dependencies' in env:
install_packages(env['dependencies'])
# case worker processes
worker_processes = [None] * parallel
for test_suite_file in isolated_cases: # run case in subprocess
if test_suite_file in test_suite_env_map and test_suite_env_map[
test_suite_file] == env_name:
cmd = [
'python',
'tests/run.py',
'--pattern',
test_suite_file,
'--result_dir',
result_dir,
]
worker_idx = wait_for_free_worker(worker_processes)
worker_process = async_run_command_with_popen(cmd, worker_idx)
os.set_blocking(worker_process.stdout.fileno(), False)
worker_processes[worker_idx] = worker_process
else:
pass # case not in run list.

# run remain cases in a process.
remain_suite_files = []
for k, v in test_suite_env_map.items():
if k not in isolated_cases and v == env_name:
remain_suite_files.append(k)
if len(remain_suite_files) == 0:
return
# roughly split case in parallel
part_count = math.ceil(len(remain_suite_files) / parallel)
suites_chunks = [
remain_suite_files[x:x + part_count]
for x in range(0, len(remain_suite_files), part_count)
]
for suites_chunk in suites_chunks:
worker_idx = wait_for_free_worker(worker_processes)
cmd = [
'python', 'tests/run.py', '--result_dir', result_dir, '--suites'
]
for suite in suites_chunk:
cmd.append(suite)
worker_process = async_run_command_with_popen(cmd, worker_idx)
os.set_blocking(worker_process.stdout.fileno(), False)
worker_processes[worker_idx] = worker_process

wait_for_workers(worker_processes)


def run_case_in_env(env_name, env, test_suite_env_map, isolated_cases,
result_dir):
# install requirements and deps # run_config['envs'][env]
@@ -264,8 +383,9 @@ def run_in_subprocess(args):

with tempfile.TemporaryDirectory() as temp_result_dir:
for env in set(test_suite_env_map.values()):
run_case_in_env(env, run_config['envs'][env], test_suite_env_map,
isolated_cases, temp_result_dir)
parallel_run_case_in_env(env, run_config['envs'][env],
test_suite_env_map, isolated_cases,
temp_result_dir, args.parallel)

result_dfs = []
result_path = Path(temp_result_dir)
@@ -312,6 +432,10 @@ class TimeCostTextTestResult(TextTestResult):
self.stream.writeln(
'Test case: %s stop at: %s, cost time: %s(seconds)' %
(test.test_full_name, test.stop_time, test.time_cost))
if torch.cuda.is_available(
) and test.time_cost > 5.0: # print nvidia-smi
cmd = ['nvidia-smi']
run_command_with_popen(cmd)
super(TimeCostTextTestResult, self).stopTest(test)

def addSuccess(self, test):
@@ -383,6 +507,8 @@ def main(args):
os.path.abspath(args.test_dir), args.pattern, args.list_tests)
if not args.list_tests:
result = runner.run(test_suite)
logger.info('Running case completed, pid: %s, suites: %s' %
(os.getpid(), args.suites))
result = collect_test_results(result)
df = test_cases_result_to_df(result)
if args.result_dir is not None:
@@ -417,6 +543,12 @@ if __name__ == '__main__':
'--result_dir',
default=None,
help='Save result to directory, internal use only')
parser.add_argument(
'--parallel',
default=1,
type=int,
help='Set case parallels, default single process, set with gpu number.'
)
parser.add_argument(
'--suites',
nargs='*',


+ 28
- 1
tests/run_config.yaml View File

@@ -1,5 +1,5 @@
# isolate cases in env, we can install different dependencies in each env.
isolated: # test cases that may require excessive anmount of GPU memory, which will be executed in dedicagted process.
isolated: # test cases that may require excessive anmount of GPU memory or run long time, which will be executed in dedicagted process.
- test_text_to_speech.py
- test_multi_modal_embedding.py
- test_ofa_tasks.py
@@ -13,6 +13,33 @@ isolated: # test cases that may require excessive anmount of GPU memory, which
- test_movie_scene_segmentation.py
- test_image_inpainting.py
- test_mglm_text_summarization.py
- test_team_transfer_trainer.py
- test_image_denoise_trainer.py
- test_dialog_intent_trainer.py
- test_finetune_mplug.py
- test_image_instance_segmentation_trainer.py
- test_image_portrait_enhancement_trainer.py
- test_translation_trainer.py
- test_unifold.py
- test_automatic_post_editing.py
- test_mplug_tasks.py
- test_movie_scene_segmentation.py
- test_body_3d_keypoints.py
- test_finetune_text_generation.py
- test_clip_trainer.py
- test_ofa_trainer.py
- test_fill_mask.py
- test_hand_2d_keypoints.py
- test_referring_video_object_segmentation.py
- test_easycv_trainer_hand_2d_keypoints.py
- test_card_detection_scrfd_trainer.py
- test_referring_video_object_segmentation_trainer.py
- test_person_image_cartoon.py
- test_image_style_transfer.py
- test_ocr_detection.py
- test_automatic_speech_recognition.py
- test_image_matting.py
- test_skin_retouching.py

envs:
default: # default env, case not in other env will in default, pytorch.


+ 1
- 1
tests/trainers/test_dialog_intent_trainer.py View File

@@ -94,7 +94,7 @@ class TestDialogIntentTrainer(unittest.TestCase):
cfg.Model.update(config['Model'])
if self.debugging:
cfg.Trainer.save_checkpoint = False
cfg.Trainer.num_epochs = 5
cfg.Trainer.num_epochs = 1
cfg.Trainer.batch_size_label = 64
return cfg



Loading…
Cancel
Save