diff --git a/.dev_scripts/ci_container_test.sh b/.dev_scripts/ci_container_test.sh index 81c28513..a3f13137 100644 --- a/.dev_scripts/ci_container_test.sh +++ b/.dev_scripts/ci_container_test.sh @@ -1,6 +1,3 @@ -echo "Testing envs" -printenv -echo "ENV END" if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then pip install -r requirements/tests.txt git config --global --add safe.directory /Maas-lib @@ -28,7 +25,7 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then awk -F: '/^[^#]/ { print $1 }' requirements/multi-modal.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html awk -F: '/^[^#]/ { print $1 }' requirements/nlp.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html awk -F: '/^[^#]/ { print $1 }' requirements/science.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html - pip install -r requirements/tests.txt + # test with install python setup.py install else diff --git a/.dev_scripts/dockerci.sh b/.dev_scripts/dockerci.sh index 07ea947a..de5d9a4a 100644 --- a/.dev_scripts/dockerci.sh +++ b/.dev_scripts/dockerci.sh @@ -3,30 +3,32 @@ MODELSCOPE_CACHE_DIR_IN_CONTAINER=/modelscope_cache CODE_DIR=$PWD CODE_DIR_IN_CONTAINER=/Maas-lib echo "$USER" -gpus='7 6 5 4 3 2 1 0' -cpu_sets='0-7 8-15 16-23 24-30 31-37 38-44 45-51 52-58' +gpus='0,1 2,3 4,5 6,7' +cpu_sets='45-58 31-44 16-30 0-15' cpu_sets_arr=($cpu_sets) is_get_file_lock=false -# export RUN_CASE_COMMAND='python tests/run.py --run_config tests/run_config.yaml' -CI_COMMAND=${CI_COMMAND:-bash .dev_scripts/ci_container_test.sh $RUN_CASE_BASE_COMMAND} +CI_COMMAND='bash .dev_scripts/ci_container_test.sh python tests/run.py --parallel 2 --run_config tests/run_config.yaml' echo "ci command: $CI_COMMAND" +idx=0 for gpu in $gpus do exec {lock_fd}>"/tmp/gpu$gpu" || exit 1 - flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; continue; } + flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; idx=$((idx+1)); continue; } echo "get gpu lock $gpu" - CONTAINER_NAME="modelscope-ci-$gpu" + + CONTAINER_NAME="modelscope-ci-$idx" let is_get_file_lock=true # pull image if there are update docker pull ${IMAGE_NAME}:${IMAGE_VERSION} if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then + echo 'debugging' docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ - --cpuset-cpus=${cpu_sets_arr[$gpu]} \ - --gpus="device=$gpu" \ + --cpuset-cpus=${cpu_sets_arr[$idx]} \ + --gpus='"'"device=$gpu"'"' \ -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ - -v $MODELSCOPE_HOME_CACHE/$gpu:/root \ + -v $MODELSCOPE_HOME_CACHE/$idx:/root \ -v /home/admin/pre-commit:/home/admin/pre-commit \ -e CI_TEST=True \ -e TEST_LEVEL=$TEST_LEVEL \ @@ -41,16 +43,15 @@ do -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ -e MODEL_TAG_URL=$MODEL_TAG_URL \ --workdir=$CODE_DIR_IN_CONTAINER \ - --net host \ ${IMAGE_NAME}:${IMAGE_VERSION} \ $CI_COMMAND else docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ - --cpuset-cpus=${cpu_sets_arr[$gpu]} \ - --gpus="device=$gpu" \ + --cpuset-cpus=${cpu_sets_arr[$idx]} \ + --gpus='"'"device=$gpu"'"' \ -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ - -v $MODELSCOPE_HOME_CACHE/$gpu:/root \ + -v $MODELSCOPE_HOME_CACHE/$idx:/root \ -v /home/admin/pre-commit:/home/admin/pre-commit \ -e CI_TEST=True \ -e TEST_LEVEL=$TEST_LEVEL \ @@ -64,7 +65,6 @@ do -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ -e MODEL_TAG_URL=$MODEL_TAG_URL \ --workdir=$CODE_DIR_IN_CONTAINER \ - --net host \ ${IMAGE_NAME}:${IMAGE_VERSION} \ $CI_COMMAND fi diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 68fc8484..d03bb411 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ repos: - - repo: https://github.com/PyCQA/flake8 + - repo: https://github.com/pycqa/flake8.git rev: 4.0.0 hooks: - id: flake8 diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 17c21d44..60e0e274 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -23,9 +23,10 @@ from modelscope.hub.constants import (API_RESPONSE_FIELD_DATA, API_RESPONSE_FIELD_MESSAGE, API_RESPONSE_FIELD_USERNAME, DEFAULT_CREDENTIALS_PATH, - MODELSCOPE_ENVIRONMENT, - MODELSCOPE_USERNAME, ONE_YEAR_SECONDS, - Licenses, ModelVisibility) + MODELSCOPE_CLOUD_ENVIRONMENT, + MODELSCOPE_CLOUD_USERNAME, + ONE_YEAR_SECONDS, Licenses, + ModelVisibility) from modelscope.hub.errors import (InvalidParameter, NotExistError, NotLoginException, NoValidRevisionError, RequestError, datahub_raise_on_error, @@ -653,10 +654,10 @@ class HubApi: # get channel and user_name channel = DownloadChannel.LOCAL.value user_name = '' - if MODELSCOPE_ENVIRONMENT in os.environ: - channel = os.environ[MODELSCOPE_ENVIRONMENT] - if MODELSCOPE_USERNAME in os.environ: - user_name = os.environ[MODELSCOPE_USERNAME] + if MODELSCOPE_CLOUD_ENVIRONMENT in os.environ: + channel = os.environ[MODELSCOPE_CLOUD_ENVIRONMENT] + if MODELSCOPE_CLOUD_USERNAME in os.environ: + user_name = os.environ[MODELSCOPE_CLOUD_USERNAME] url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}' cookies = ModelScopeConfig.get_cookies() @@ -777,12 +778,15 @@ class ModelScopeConfig: Returns: The formatted user-agent string. """ + + # include some more telemetrics when executing in dedicated + # cloud containers env = 'custom' - if MODELSCOPE_ENVIRONMENT in os.environ: - env = os.environ[MODELSCOPE_ENVIRONMENT] + if MODELSCOPE_CLOUD_ENVIRONMENT in os.environ: + env = os.environ[MODELSCOPE_CLOUD_ENVIRONMENT] user_name = 'unknown' - if MODELSCOPE_USERNAME in os.environ: - user_name = os.environ[MODELSCOPE_USERNAME] + if MODELSCOPE_CLOUD_USERNAME in os.environ: + user_name = os.environ[MODELSCOPE_CLOUD_USERNAME] ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % ( __version__, diff --git a/modelscope/hub/constants.py b/modelscope/hub/constants.py index 373a0cf4..83991e4e 100644 --- a/modelscope/hub/constants.py +++ b/modelscope/hub/constants.py @@ -16,9 +16,9 @@ API_RESPONSE_FIELD_GIT_ACCESS_TOKEN = 'AccessToken' API_RESPONSE_FIELD_USERNAME = 'Username' API_RESPONSE_FIELD_EMAIL = 'Email' API_RESPONSE_FIELD_MESSAGE = 'Message' -MODELSCOPE_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT' +MODELSCOPE_CLOUD_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT' +MODELSCOPE_CLOUD_USERNAME = 'MODELSCOPE_USERNAME' MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG' -MODELSCOPE_USERNAME = 'MODELSCOPE_USERNAME' ONE_YEAR_SECONDS = 24 * 365 * 60 * 60 diff --git a/modelscope/hub/utils/utils.py b/modelscope/hub/utils/utils.py index 61d560fa..3cc2c1e6 100644 --- a/modelscope/hub/utils/utils.py +++ b/modelscope/hub/utils/utils.py @@ -87,16 +87,3 @@ def file_integrity_validation(file_path, expected_sha256): msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path logger.error(msg) raise FileIntegrityError(msg) - - -def create_library_statistics(method: str, name: str, cn_name: Optional[str]): - try: - from modelscope.hub.api import ModelScopeConfig - path = f'{get_endpoint()}/api/v1/statistics/library' - headers = {'user-agent': ModelScopeConfig.get_user_agent()} - params = {'Method': method, 'Name': name, 'CnName': cn_name} - r = requests.post(path, params=params, headers=headers) - r.raise_for_status() - except Exception: - pass - return diff --git a/modelscope/models/audio/kws/farfield/model.py b/modelscope/models/audio/kws/farfield/model.py index af1c0a27..ee0301f9 100644 --- a/modelscope/models/audio/kws/farfield/model.py +++ b/modelscope/models/audio/kws/farfield/model.py @@ -54,7 +54,8 @@ class FSMNSeleNetV2Decorator(TorchModel): ) def __del__(self): - self.tmp_dir.cleanup() + if hasattr(self, 'tmp_dir'): + self.tmp_dir.cleanup() def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: return self.model.forward(input) diff --git a/modelscope/models/cv/face_detection/mogface/models/detectors.py b/modelscope/models/cv/face_detection/mogface/models/detectors.py index 8c1d9150..9009abfb 100644 --- a/modelscope/models/cv/face_detection/mogface/models/detectors.py +++ b/modelscope/models/cv/face_detection/mogface/models/detectors.py @@ -20,7 +20,6 @@ class MogFaceDetector(TorchModel): def __init__(self, model_path, device='cuda'): super().__init__(model_path) - torch.set_grad_enabled(False) cudnn.benchmark = True self.model_path = model_path self.device = device diff --git a/modelscope/models/cv/face_detection/mtcnn/models/detector.py b/modelscope/models/cv/face_detection/mtcnn/models/detector.py index 9c3aca3a..dde8a5e5 100644 --- a/modelscope/models/cv/face_detection/mtcnn/models/detector.py +++ b/modelscope/models/cv/face_detection/mtcnn/models/detector.py @@ -21,7 +21,6 @@ class MtcnnFaceDetector(TorchModel): def __init__(self, model_path, device='cuda'): super().__init__(model_path) - torch.set_grad_enabled(False) cudnn.benchmark = True self.model_path = model_path self.device = device diff --git a/modelscope/models/cv/face_detection/retinaface/detection.py b/modelscope/models/cv/face_detection/retinaface/detection.py index 3dd31659..7de2da58 100755 --- a/modelscope/models/cv/face_detection/retinaface/detection.py +++ b/modelscope/models/cv/face_detection/retinaface/detection.py @@ -18,7 +18,6 @@ class RetinaFaceDetection(TorchModel): def __init__(self, model_path, device='cuda'): super().__init__(model_path) - torch.set_grad_enabled(False) cudnn.benchmark = True self.model_path = model_path self.cfg = Config.from_file( diff --git a/modelscope/models/cv/face_detection/ulfd_slim/detection.py b/modelscope/models/cv/face_detection/ulfd_slim/detection.py index c0e2da6e..25cad115 100755 --- a/modelscope/models/cv/face_detection/ulfd_slim/detection.py +++ b/modelscope/models/cv/face_detection/ulfd_slim/detection.py @@ -24,7 +24,6 @@ class UlfdFaceDetector(TorchModel): def __init__(self, model_path, device='cuda'): super().__init__(model_path) - torch.set_grad_enabled(False) cudnn.benchmark = True self.model_path = model_path self.device = device diff --git a/modelscope/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py b/modelscope/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py index c5eb71a1..ee08f527 100644 --- a/modelscope/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py +++ b/modelscope/models/cv/facial_expression_recognition/fer/facial_expression_recognition.py @@ -24,7 +24,6 @@ class FacialExpressionRecognition(TorchModel): def __init__(self, model_path, device='cuda'): super().__init__(model_path) - torch.set_grad_enabled(False) cudnn.benchmark = True self.model_path = model_path self.device = device diff --git a/modelscope/models/cv/image_portrait_enhancement/retinaface/detection.py b/modelscope/models/cv/image_portrait_enhancement/retinaface/detection.py index 7ad780a8..e377d3fc 100755 --- a/modelscope/models/cv/image_portrait_enhancement/retinaface/detection.py +++ b/modelscope/models/cv/image_portrait_enhancement/retinaface/detection.py @@ -31,7 +31,6 @@ cfg_re50 = { class RetinaFaceDetection(object): def __init__(self, model_path, device='cuda'): - torch.set_grad_enabled(False) cudnn.benchmark = True self.model_path = model_path self.device = device diff --git a/modelscope/models/cv/realtime_object_detection/realtime_video_detector.py b/modelscope/models/cv/realtime_object_detection/realtime_video_detector.py index 3830fb42..ebe0221b 100644 --- a/modelscope/models/cv/realtime_object_detection/realtime_video_detector.py +++ b/modelscope/models/cv/realtime_object_detection/realtime_video_detector.py @@ -7,6 +7,7 @@ import time import cv2 import json +import numpy as np import torch from tqdm import tqdm @@ -87,13 +88,17 @@ class RealtimeVideoDetector(TorchModel): self.nmsthre, class_agnostic=True) - if len(outputs) == 1: + if len(outputs) == 1 and (outputs[0] is not None): bboxes = outputs[0][:, 0:4].cpu().numpy() / self.ratio scores = outputs[0][:, 5].cpu().numpy() labels = outputs[0][:, 6].cpu().int().numpy() pred_label_names = [] for lab in labels: pred_label_names.append(self.label_mapping[lab]) + else: + bboxes = np.asarray([]) + scores = np.asarray([]) + pred_label_names = np.asarray([]) return bboxes, scores, pred_label_names diff --git a/modelscope/models/cv/referring_video_object_segmentation/model.py b/modelscope/models/cv/referring_video_object_segmentation/model.py index 91f7ea91..29e702be 100644 --- a/modelscope/models/cv/referring_video_object_segmentation/model.py +++ b/modelscope/models/cv/referring_video_object_segmentation/model.py @@ -31,7 +31,10 @@ class ReferringVideoObjectSegmentation(TorchModel): config_path = osp.join(model_dir, ModelFile.CONFIGURATION) self.cfg = Config.from_file(config_path) - self.model = MTTR(**self.cfg.model) + transformer_cfg_dir = osp.join(model_dir, 'transformer_cfg_dir') + + self.model = MTTR( + transformer_cfg_dir=transformer_cfg_dir, **self.cfg.model) model_path = osp.join(model_dir, ModelFile.TORCH_MODEL_FILE) params_dict = torch.load(model_path, map_location='cpu') diff --git a/modelscope/models/cv/referring_video_object_segmentation/utils/mttr.py b/modelscope/models/cv/referring_video_object_segmentation/utils/mttr.py index e603df6c..48d4bf70 100644 --- a/modelscope/models/cv/referring_video_object_segmentation/utils/mttr.py +++ b/modelscope/models/cv/referring_video_object_segmentation/utils/mttr.py @@ -19,6 +19,7 @@ class MTTR(nn.Module): num_queries, mask_kernels_dim=8, aux_loss=False, + transformer_cfg_dir=None, **kwargs): """ Parameters: @@ -29,7 +30,9 @@ class MTTR(nn.Module): """ super().__init__() self.backbone = init_backbone(**kwargs) - self.transformer = MultimodalTransformer(**kwargs) + assert transformer_cfg_dir is not None + self.transformer = MultimodalTransformer( + transformer_cfg_dir=transformer_cfg_dir, **kwargs) d_model = self.transformer.d_model self.is_referred_head = nn.Linear( d_model, diff --git a/modelscope/models/cv/referring_video_object_segmentation/utils/multimodal_transformer.py b/modelscope/models/cv/referring_video_object_segmentation/utils/multimodal_transformer.py index 39962715..f750437a 100644 --- a/modelscope/models/cv/referring_video_object_segmentation/utils/multimodal_transformer.py +++ b/modelscope/models/cv/referring_video_object_segmentation/utils/multimodal_transformer.py @@ -26,6 +26,7 @@ class MultimodalTransformer(nn.Module): num_decoder_layers=3, text_encoder_type='roberta-base', freeze_text_encoder=True, + transformer_cfg_dir=None, **kwargs): super().__init__() self.d_model = kwargs['d_model'] @@ -40,10 +41,12 @@ class MultimodalTransformer(nn.Module): self.pos_encoder_2d = PositionEmbeddingSine2D() self._reset_parameters() - self.text_encoder = RobertaModel.from_pretrained(text_encoder_type) + if text_encoder_type != 'roberta-base': + transformer_cfg_dir = text_encoder_type + self.text_encoder = RobertaModel.from_pretrained(transformer_cfg_dir) self.text_encoder.pooler = None # this pooler is never used, this is a hack to avoid DDP problems... self.tokenizer = RobertaTokenizerFast.from_pretrained( - text_encoder_type) + transformer_cfg_dir) self.freeze_text_encoder = freeze_text_encoder if freeze_text_encoder: for p in self.text_encoder.parameters(): diff --git a/modelscope/msdatasets/task_datasets/audio/kws_farfield_dataset.py b/modelscope/msdatasets/task_datasets/audio/kws_farfield_dataset.py index 8c518ec9..d4866204 100644 --- a/modelscope/msdatasets/task_datasets/audio/kws_farfield_dataset.py +++ b/modelscope/msdatasets/task_datasets/audio/kws_farfield_dataset.py @@ -188,11 +188,13 @@ class Worker(threading.Thread): class KWSDataLoader: - """ - dataset: the dataset reference - batchsize: data batch size - numworkers: no. of workers - prefetch: prefetch factor + """ Load and organize audio data with multiple threads + + Args: + dataset: the dataset reference + batchsize: data batch size + numworkers: no. of workers + prefetch: prefetch factor """ def __init__(self, dataset, batchsize, numworkers, prefetch=2): @@ -202,7 +204,7 @@ class KWSDataLoader: self.isrun = True # data queue - self.pool = queue.Queue(batchsize * prefetch) + self.pool = queue.Queue(numworkers * prefetch) # initialize workers self.workerlist = [] @@ -270,11 +272,11 @@ class KWSDataLoader: w.stopWorker() while not self.pool.empty(): - self.pool.get(block=True, timeout=0.001) + self.pool.get(block=True, timeout=0.01) # wait workers terminated for w in self.workerlist: while not self.pool.empty(): - self.pool.get(block=True, timeout=0.001) + self.pool.get(block=True, timeout=0.01) w.join() logger.info('KWSDataLoader: All worker stopped.') diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index 7a8bfd14..60d67786 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -10,7 +10,6 @@ from typing import Any, Dict, Generator, List, Mapping, Union import numpy as np -from modelscope.hub.utils.utils import create_library_statistics from modelscope.models.base import Model from modelscope.msdatasets import MsDataset from modelscope.outputs import TASK_OUTPUTS @@ -152,9 +151,6 @@ class Pipeline(ABC): **kwargs) -> Union[Dict[str, Any], Generator]: # model provider should leave it as it is # modelscope library developer will handle this function - for single_model in self.models: - if hasattr(single_model, 'name'): - create_library_statistics('pipeline', single_model.name, None) # place model to cpu or gpu if (self.model or (self.has_multiple_models and self.models[0])): if not self._model_prepare: diff --git a/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py b/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py index fdcf9e0f..0e35efcb 100644 --- a/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py +++ b/modelscope/pipelines/nlp/named_entity_recognition_pipeline.py @@ -92,6 +92,8 @@ class NamedEntityRecognitionPipeline(Pipeline): offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']] labels = [self.id2label[x] for x in predictions] + if len(labels) > len(offset_mapping): + labels = labels[1:-1] chunks = [] chunk = {} for label, offsets in zip(labels, offset_mapping): @@ -104,6 +106,20 @@ class NamedEntityRecognitionPipeline(Pipeline): 'start': offsets[0], 'end': offsets[1] } + if label[0] in 'I': + if not chunk: + chunk = { + 'type': label[2:], + 'start': offsets[0], + 'end': offsets[1] + } + if label[0] in 'E': + if not chunk: + chunk = { + 'type': label[2:], + 'start': offsets[0], + 'end': offsets[1] + } if label[0] in 'IES': if chunk: chunk['end'] = offsets[1] @@ -118,15 +134,15 @@ class NamedEntityRecognitionPipeline(Pipeline): chunk['span'] = text[chunk['start']:chunk['end']] chunks.append(chunk) - # for cws output + # for cws outputs if len(chunks) > 0 and chunks[0]['type'] == 'cws': spans = [ chunk['span'] for chunk in chunks if chunk['span'].strip() ] seg_result = ' '.join(spans) - outputs = {OutputKeys.OUTPUT: seg_result, OutputKeys.LABELS: []} + outputs = {OutputKeys.OUTPUT: seg_result} - # for ner outpus + # for ner outputs else: outputs = {OutputKeys.OUTPUT: chunks} return outputs diff --git a/modelscope/pipelines/nlp/token_classification_pipeline.py b/modelscope/pipelines/nlp/token_classification_pipeline.py index 4af187ee..d2168b8a 100644 --- a/modelscope/pipelines/nlp/token_classification_pipeline.py +++ b/modelscope/pipelines/nlp/token_classification_pipeline.py @@ -95,6 +95,20 @@ class TokenClassificationPipeline(Pipeline): 'start': offsets[0], 'end': offsets[1] } + if label[0] in 'I': + if not chunk: + chunk = { + 'type': label[2:], + 'start': offsets[0], + 'end': offsets[1] + } + if label[0] in 'E': + if not chunk: + chunk = { + 'type': label[2:], + 'start': offsets[0], + 'end': offsets[1] + } if label[0] in 'IES': if chunk: chunk['end'] = offsets[1] diff --git a/modelscope/pipelines/nlp/word_segmentation_pipeline.py b/modelscope/pipelines/nlp/word_segmentation_pipeline.py index c57f6b93..3d6f8a4a 100644 --- a/modelscope/pipelines/nlp/word_segmentation_pipeline.py +++ b/modelscope/pipelines/nlp/word_segmentation_pipeline.py @@ -80,9 +80,12 @@ class WordSegmentationPipeline(Pipeline): Dict[str, str]: the prediction results """ text = inputs['text'] - logits = inputs[OutputKeys.LOGITS] - predictions = torch.argmax(logits[0], dim=-1) - logits = torch_nested_numpify(torch_nested_detach(logits)) + if not hasattr(inputs, 'predictions'): + logits = inputs[OutputKeys.LOGITS] + predictions = torch.argmax(logits[0], dim=-1) + else: + predictions = inputs[OutputKeys.PREDICTIONS].squeeze( + 0).cpu().numpy() predictions = torch_nested_numpify(torch_nested_detach(predictions)) offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']] @@ -101,6 +104,20 @@ class WordSegmentationPipeline(Pipeline): 'start': offsets[0], 'end': offsets[1] } + if label[0] in 'I': + if not chunk: + chunk = { + 'type': label[2:], + 'start': offsets[0], + 'end': offsets[1] + } + if label[0] in 'E': + if not chunk: + chunk = { + 'type': label[2:], + 'start': offsets[0], + 'end': offsets[1] + } if label[0] in 'IES': if chunk: chunk['end'] = offsets[1] @@ -123,7 +140,7 @@ class WordSegmentationPipeline(Pipeline): seg_result = ' '.join(spans) outputs = {OutputKeys.OUTPUT: seg_result} - # for ner output + # for ner outputs else: outputs = {OutputKeys.OUTPUT: chunks} return outputs diff --git a/modelscope/trainers/audio/kws_farfield_trainer.py b/modelscope/trainers/audio/kws_farfield_trainer.py index 85c1a496..9d6013e9 100644 --- a/modelscope/trainers/audio/kws_farfield_trainer.py +++ b/modelscope/trainers/audio/kws_farfield_trainer.py @@ -117,8 +117,7 @@ class KWSFarfieldTrainer(BaseTrainer): self._batch_size = dataloader_config.batch_size_per_gpu if 'model_bin' in kwargs: model_bin_file = os.path.join(self.model_dir, kwargs['model_bin']) - checkpoint = torch.load(model_bin_file) - self.model.load_state_dict(checkpoint) + self.model = torch.load(model_bin_file) # build corresponding optimizer and loss function lr = self.cfg.train.optimizer.lr self.optimizer = optim.Adam(self.model.parameters(), lr) @@ -219,7 +218,9 @@ class KWSFarfieldTrainer(BaseTrainer): # check point ckpt_name = 'checkpoint_{:04d}_loss_train_{:.4f}_loss_val_{:.4f}.pth'.format( self._current_epoch, loss_train_epoch, loss_val_epoch) - torch.save(self.model, os.path.join(self.work_dir, ckpt_name)) + save_path = os.path.join(self.work_dir, ckpt_name) + logger.info(f'Save model to {save_path}') + torch.save(self.model, save_path) # time spent per epoch epochtime = datetime.datetime.now() - epochtime logger.info('Epoch {:04d} time spent: {:.2f} hours'.format( diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 12c25f30..3556badf 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -15,7 +15,6 @@ from torch.utils.data.dataloader import default_collate from torch.utils.data.distributed import DistributedSampler from modelscope.hub.snapshot_download import snapshot_download -from modelscope.hub.utils.utils import create_library_statistics from modelscope.metainfo import Trainers from modelscope.metrics import build_metric, task_default_metrics from modelscope.models.base import Model, TorchModel @@ -437,8 +436,6 @@ class EpochBasedTrainer(BaseTrainer): def train(self, checkpoint_path=None, *args, **kwargs): self._mode = ModeKeys.TRAIN - if hasattr(self.model, 'name'): - create_library_statistics('train', self.model.name, None) if self.train_dataset is None: self.train_dataloader = self.get_train_dataloader() @@ -459,8 +456,6 @@ class EpochBasedTrainer(BaseTrainer): self.train_loop(self.train_dataloader) def evaluate(self, checkpoint_path=None): - if hasattr(self.model, 'name'): - create_library_statistics('evaluate', self.model.name, None) if checkpoint_path is not None and os.path.isfile(checkpoint_path): from modelscope.trainers.hooks import CheckpointHook CheckpointHook.load_checkpoint(checkpoint_path, self) diff --git a/modelscope/utils/audio/audio_utils.py b/modelscope/utils/audio/audio_utils.py index 1ae5c8d2..c56359bd 100644 --- a/modelscope/utils/audio/audio_utils.py +++ b/modelscope/utils/audio/audio_utils.py @@ -43,7 +43,10 @@ def update_conf(origin_config_file, new_config_file, conf_item: [str, str]): def repl(matched): key = matched.group(1) if key in conf_item: - return conf_item[key] + value = conf_item[key] + if not isinstance(value, str): + value = str(value) + return value else: return None diff --git a/tests/run.py b/tests/run.py index 0759379f..dfc76fda 100644 --- a/tests/run.py +++ b/tests/run.py @@ -3,11 +3,13 @@ import argparse import datetime +import math import multiprocessing import os import subprocess import sys import tempfile +import time import unittest from fnmatch import fnmatch from multiprocessing.managers import BaseManager @@ -158,6 +160,21 @@ def run_command_with_popen(cmd): sys.stdout.write(line) +def async_run_command_with_popen(cmd, device_id): + logger.info('Worker id: %s args: %s' % (device_id, cmd)) + env = os.environ.copy() + env['CUDA_VISIBLE_DEVICES'] = '%s' % device_id + sub_process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + bufsize=1, + universal_newlines=True, + env=env, + encoding='utf8') + return sub_process + + def save_test_result(df, args): if args.result_dir is not None: file_name = str(int(datetime.datetime.now().timestamp() * 1000)) @@ -199,6 +216,108 @@ def install_requirements(requirements): run_command(cmd) +def wait_for_free_worker(workers): + while True: + for idx, worker in enumerate(workers): + if worker is None: + logger.info('return free worker: %s' % (idx)) + return idx + if worker.poll() is None: # running, get output + for line in iter(worker.stdout.readline, ''): + if line != '': + sys.stdout.write(line) + else: + break + else: # worker process completed. + logger.info('Process end: %s' % (idx)) + workers[idx] = None + return idx + time.sleep(0.001) + + +def wait_for_workers(workers): + while True: + for idx, worker in enumerate(workers): + if worker is None: + continue + # check worker is completed. + if worker.poll() is None: + for line in iter(worker.stdout.readline, ''): + if line != '': + sys.stdout.write(line) + else: + break + else: + logger.info('Process idx: %s end!' % (idx)) + workers[idx] = None + + is_all_completed = True + for idx, worker in enumerate(workers): + if worker is not None: + is_all_completed = False + break + + if is_all_completed: + logger.info('All sub porcess is completed!') + break + time.sleep(0.001) + + +def parallel_run_case_in_env(env_name, env, test_suite_env_map, isolated_cases, + result_dir, parallel): + logger.info('Running case in env: %s' % env_name) + # install requirements and deps # run_config['envs'][env] + if 'requirements' in env: + install_requirements(env['requirements']) + if 'dependencies' in env: + install_packages(env['dependencies']) + # case worker processes + worker_processes = [None] * parallel + for test_suite_file in isolated_cases: # run case in subprocess + if test_suite_file in test_suite_env_map and test_suite_env_map[ + test_suite_file] == env_name: + cmd = [ + 'python', + 'tests/run.py', + '--pattern', + test_suite_file, + '--result_dir', + result_dir, + ] + worker_idx = wait_for_free_worker(worker_processes) + worker_process = async_run_command_with_popen(cmd, worker_idx) + os.set_blocking(worker_process.stdout.fileno(), False) + worker_processes[worker_idx] = worker_process + else: + pass # case not in run list. + + # run remain cases in a process. + remain_suite_files = [] + for k, v in test_suite_env_map.items(): + if k not in isolated_cases and v == env_name: + remain_suite_files.append(k) + if len(remain_suite_files) == 0: + return + # roughly split case in parallel + part_count = math.ceil(len(remain_suite_files) / parallel) + suites_chunks = [ + remain_suite_files[x:x + part_count] + for x in range(0, len(remain_suite_files), part_count) + ] + for suites_chunk in suites_chunks: + worker_idx = wait_for_free_worker(worker_processes) + cmd = [ + 'python', 'tests/run.py', '--result_dir', result_dir, '--suites' + ] + for suite in suites_chunk: + cmd.append(suite) + worker_process = async_run_command_with_popen(cmd, worker_idx) + os.set_blocking(worker_process.stdout.fileno(), False) + worker_processes[worker_idx] = worker_process + + wait_for_workers(worker_processes) + + def run_case_in_env(env_name, env, test_suite_env_map, isolated_cases, result_dir): # install requirements and deps # run_config['envs'][env] @@ -264,8 +383,9 @@ def run_in_subprocess(args): with tempfile.TemporaryDirectory() as temp_result_dir: for env in set(test_suite_env_map.values()): - run_case_in_env(env, run_config['envs'][env], test_suite_env_map, - isolated_cases, temp_result_dir) + parallel_run_case_in_env(env, run_config['envs'][env], + test_suite_env_map, isolated_cases, + temp_result_dir, args.parallel) result_dfs = [] result_path = Path(temp_result_dir) @@ -312,6 +432,10 @@ class TimeCostTextTestResult(TextTestResult): self.stream.writeln( 'Test case: %s stop at: %s, cost time: %s(seconds)' % (test.test_full_name, test.stop_time, test.time_cost)) + if torch.cuda.is_available( + ) and test.time_cost > 5.0: # print nvidia-smi + cmd = ['nvidia-smi'] + run_command_with_popen(cmd) super(TimeCostTextTestResult, self).stopTest(test) def addSuccess(self, test): @@ -383,6 +507,8 @@ def main(args): os.path.abspath(args.test_dir), args.pattern, args.list_tests) if not args.list_tests: result = runner.run(test_suite) + logger.info('Running case completed, pid: %s, suites: %s' % + (os.getpid(), args.suites)) result = collect_test_results(result) df = test_cases_result_to_df(result) if args.result_dir is not None: @@ -417,6 +543,12 @@ if __name__ == '__main__': '--result_dir', default=None, help='Save result to directory, internal use only') + parser.add_argument( + '--parallel', + default=1, + type=int, + help='Set case parallels, default single process, set with gpu number.' + ) parser.add_argument( '--suites', nargs='*', diff --git a/tests/run_config.yaml b/tests/run_config.yaml index faee2869..2a203f4a 100644 --- a/tests/run_config.yaml +++ b/tests/run_config.yaml @@ -1,5 +1,5 @@ # isolate cases in env, we can install different dependencies in each env. -isolated: # test cases that may require excessive anmount of GPU memory, which will be executed in dedicagted process. +isolated: # test cases that may require excessive anmount of GPU memory or run long time, which will be executed in dedicagted process. - test_text_to_speech.py - test_multi_modal_embedding.py - test_ofa_tasks.py @@ -13,6 +13,33 @@ isolated: # test cases that may require excessive anmount of GPU memory, which - test_movie_scene_segmentation.py - test_image_inpainting.py - test_mglm_text_summarization.py + - test_team_transfer_trainer.py + - test_image_denoise_trainer.py + - test_dialog_intent_trainer.py + - test_finetune_mplug.py + - test_image_instance_segmentation_trainer.py + - test_image_portrait_enhancement_trainer.py + - test_translation_trainer.py + - test_unifold.py + - test_automatic_post_editing.py + - test_mplug_tasks.py + - test_movie_scene_segmentation.py + - test_body_3d_keypoints.py + - test_finetune_text_generation.py + - test_clip_trainer.py + - test_ofa_trainer.py + - test_fill_mask.py + - test_hand_2d_keypoints.py + - test_referring_video_object_segmentation.py + - test_easycv_trainer_hand_2d_keypoints.py + - test_card_detection_scrfd_trainer.py + - test_referring_video_object_segmentation_trainer.py + - test_person_image_cartoon.py + - test_image_style_transfer.py + - test_ocr_detection.py + - test_automatic_speech_recognition.py + - test_image_matting.py + - test_skin_retouching.py envs: default: # default env, case not in other env will in default, pytorch. diff --git a/tests/trainers/test_dialog_intent_trainer.py b/tests/trainers/test_dialog_intent_trainer.py index 207387ac..ea1cb482 100644 --- a/tests/trainers/test_dialog_intent_trainer.py +++ b/tests/trainers/test_dialog_intent_trainer.py @@ -94,7 +94,7 @@ class TestDialogIntentTrainer(unittest.TestCase): cfg.Model.update(config['Model']) if self.debugging: cfg.Trainer.save_checkpoint = False - cfg.Trainer.num_epochs = 5 + cfg.Trainer.num_epochs = 1 cfg.Trainer.batch_size_label = 64 return cfg