@@ -1,6 +1,3 @@ | |||||
echo "Testing envs" | |||||
printenv | |||||
echo "ENV END" | |||||
if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then | if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then | ||||
pip install -r requirements/tests.txt | pip install -r requirements/tests.txt | ||||
git config --global --add safe.directory /Maas-lib | git config --global --add safe.directory /Maas-lib | ||||
@@ -28,7 +25,7 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then | |||||
awk -F: '/^[^#]/ { print $1 }' requirements/multi-modal.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html | awk -F: '/^[^#]/ { print $1 }' requirements/multi-modal.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html | ||||
awk -F: '/^[^#]/ { print $1 }' requirements/nlp.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html | awk -F: '/^[^#]/ { print $1 }' requirements/nlp.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html | ||||
awk -F: '/^[^#]/ { print $1 }' requirements/science.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html | awk -F: '/^[^#]/ { print $1 }' requirements/science.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html | ||||
pip install -r requirements/tests.txt | |||||
# test with install | # test with install | ||||
python setup.py install | python setup.py install | ||||
else | else | ||||
@@ -3,30 +3,32 @@ MODELSCOPE_CACHE_DIR_IN_CONTAINER=/modelscope_cache | |||||
CODE_DIR=$PWD | CODE_DIR=$PWD | ||||
CODE_DIR_IN_CONTAINER=/Maas-lib | CODE_DIR_IN_CONTAINER=/Maas-lib | ||||
echo "$USER" | echo "$USER" | ||||
gpus='7 6 5 4 3 2 1 0' | |||||
cpu_sets='0-7 8-15 16-23 24-30 31-37 38-44 45-51 52-58' | |||||
gpus='0,1 2,3 4,5 6,7' | |||||
cpu_sets='45-58 31-44 16-30 0-15' | |||||
cpu_sets_arr=($cpu_sets) | cpu_sets_arr=($cpu_sets) | ||||
is_get_file_lock=false | is_get_file_lock=false | ||||
# export RUN_CASE_COMMAND='python tests/run.py --run_config tests/run_config.yaml' | |||||
CI_COMMAND=${CI_COMMAND:-bash .dev_scripts/ci_container_test.sh $RUN_CASE_BASE_COMMAND} | |||||
CI_COMMAND='bash .dev_scripts/ci_container_test.sh python tests/run.py --parallel 2 --run_config tests/run_config.yaml' | |||||
echo "ci command: $CI_COMMAND" | echo "ci command: $CI_COMMAND" | ||||
idx=0 | |||||
for gpu in $gpus | for gpu in $gpus | ||||
do | do | ||||
exec {lock_fd}>"/tmp/gpu$gpu" || exit 1 | exec {lock_fd}>"/tmp/gpu$gpu" || exit 1 | ||||
flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; continue; } | |||||
flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; idx=$((idx+1)); continue; } | |||||
echo "get gpu lock $gpu" | echo "get gpu lock $gpu" | ||||
CONTAINER_NAME="modelscope-ci-$gpu" | |||||
CONTAINER_NAME="modelscope-ci-$idx" | |||||
let is_get_file_lock=true | let is_get_file_lock=true | ||||
# pull image if there are update | # pull image if there are update | ||||
docker pull ${IMAGE_NAME}:${IMAGE_VERSION} | docker pull ${IMAGE_NAME}:${IMAGE_VERSION} | ||||
if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then | if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then | ||||
echo 'debugging' | |||||
docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ | docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ | ||||
--cpuset-cpus=${cpu_sets_arr[$gpu]} \ | |||||
--gpus="device=$gpu" \ | |||||
--cpuset-cpus=${cpu_sets_arr[$idx]} \ | |||||
--gpus='"'"device=$gpu"'"' \ | |||||
-v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ | -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ | ||||
-v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ | -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ | ||||
-v $MODELSCOPE_HOME_CACHE/$gpu:/root \ | |||||
-v $MODELSCOPE_HOME_CACHE/$idx:/root \ | |||||
-v /home/admin/pre-commit:/home/admin/pre-commit \ | -v /home/admin/pre-commit:/home/admin/pre-commit \ | ||||
-e CI_TEST=True \ | -e CI_TEST=True \ | ||||
-e TEST_LEVEL=$TEST_LEVEL \ | -e TEST_LEVEL=$TEST_LEVEL \ | ||||
@@ -41,16 +43,15 @@ do | |||||
-e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | ||||
-e MODEL_TAG_URL=$MODEL_TAG_URL \ | -e MODEL_TAG_URL=$MODEL_TAG_URL \ | ||||
--workdir=$CODE_DIR_IN_CONTAINER \ | --workdir=$CODE_DIR_IN_CONTAINER \ | ||||
--net host \ | |||||
${IMAGE_NAME}:${IMAGE_VERSION} \ | ${IMAGE_NAME}:${IMAGE_VERSION} \ | ||||
$CI_COMMAND | $CI_COMMAND | ||||
else | else | ||||
docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ | docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ | ||||
--cpuset-cpus=${cpu_sets_arr[$gpu]} \ | |||||
--gpus="device=$gpu" \ | |||||
--cpuset-cpus=${cpu_sets_arr[$idx]} \ | |||||
--gpus='"'"device=$gpu"'"' \ | |||||
-v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ | -v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ | ||||
-v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ | -v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ | ||||
-v $MODELSCOPE_HOME_CACHE/$gpu:/root \ | |||||
-v $MODELSCOPE_HOME_CACHE/$idx:/root \ | |||||
-v /home/admin/pre-commit:/home/admin/pre-commit \ | -v /home/admin/pre-commit:/home/admin/pre-commit \ | ||||
-e CI_TEST=True \ | -e CI_TEST=True \ | ||||
-e TEST_LEVEL=$TEST_LEVEL \ | -e TEST_LEVEL=$TEST_LEVEL \ | ||||
@@ -64,7 +65,6 @@ do | |||||
-e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | ||||
-e MODEL_TAG_URL=$MODEL_TAG_URL \ | -e MODEL_TAG_URL=$MODEL_TAG_URL \ | ||||
--workdir=$CODE_DIR_IN_CONTAINER \ | --workdir=$CODE_DIR_IN_CONTAINER \ | ||||
--net host \ | |||||
${IMAGE_NAME}:${IMAGE_VERSION} \ | ${IMAGE_NAME}:${IMAGE_VERSION} \ | ||||
$CI_COMMAND | $CI_COMMAND | ||||
fi | fi | ||||
@@ -1,5 +1,5 @@ | |||||
repos: | repos: | ||||
- repo: https://github.com/PyCQA/flake8 | |||||
- repo: https://github.com/pycqa/flake8.git | |||||
rev: 4.0.0 | rev: 4.0.0 | ||||
hooks: | hooks: | ||||
- id: flake8 | - id: flake8 | ||||
@@ -23,9 +23,10 @@ from modelscope.hub.constants import (API_RESPONSE_FIELD_DATA, | |||||
API_RESPONSE_FIELD_MESSAGE, | API_RESPONSE_FIELD_MESSAGE, | ||||
API_RESPONSE_FIELD_USERNAME, | API_RESPONSE_FIELD_USERNAME, | ||||
DEFAULT_CREDENTIALS_PATH, | DEFAULT_CREDENTIALS_PATH, | ||||
MODELSCOPE_ENVIRONMENT, | |||||
MODELSCOPE_USERNAME, ONE_YEAR_SECONDS, | |||||
Licenses, ModelVisibility) | |||||
MODELSCOPE_CLOUD_ENVIRONMENT, | |||||
MODELSCOPE_CLOUD_USERNAME, | |||||
ONE_YEAR_SECONDS, Licenses, | |||||
ModelVisibility) | |||||
from modelscope.hub.errors import (InvalidParameter, NotExistError, | from modelscope.hub.errors import (InvalidParameter, NotExistError, | ||||
NotLoginException, NoValidRevisionError, | NotLoginException, NoValidRevisionError, | ||||
RequestError, datahub_raise_on_error, | RequestError, datahub_raise_on_error, | ||||
@@ -653,10 +654,10 @@ class HubApi: | |||||
# get channel and user_name | # get channel and user_name | ||||
channel = DownloadChannel.LOCAL.value | channel = DownloadChannel.LOCAL.value | ||||
user_name = '' | user_name = '' | ||||
if MODELSCOPE_ENVIRONMENT in os.environ: | |||||
channel = os.environ[MODELSCOPE_ENVIRONMENT] | |||||
if MODELSCOPE_USERNAME in os.environ: | |||||
user_name = os.environ[MODELSCOPE_USERNAME] | |||||
if MODELSCOPE_CLOUD_ENVIRONMENT in os.environ: | |||||
channel = os.environ[MODELSCOPE_CLOUD_ENVIRONMENT] | |||||
if MODELSCOPE_CLOUD_USERNAME in os.environ: | |||||
user_name = os.environ[MODELSCOPE_CLOUD_USERNAME] | |||||
url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}' | url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}' | ||||
cookies = ModelScopeConfig.get_cookies() | cookies = ModelScopeConfig.get_cookies() | ||||
@@ -777,12 +778,15 @@ class ModelScopeConfig: | |||||
Returns: | Returns: | ||||
The formatted user-agent string. | The formatted user-agent string. | ||||
""" | """ | ||||
# include some more telemetrics when executing in dedicated | |||||
# cloud containers | |||||
env = 'custom' | env = 'custom' | ||||
if MODELSCOPE_ENVIRONMENT in os.environ: | |||||
env = os.environ[MODELSCOPE_ENVIRONMENT] | |||||
if MODELSCOPE_CLOUD_ENVIRONMENT in os.environ: | |||||
env = os.environ[MODELSCOPE_CLOUD_ENVIRONMENT] | |||||
user_name = 'unknown' | user_name = 'unknown' | ||||
if MODELSCOPE_USERNAME in os.environ: | |||||
user_name = os.environ[MODELSCOPE_USERNAME] | |||||
if MODELSCOPE_CLOUD_USERNAME in os.environ: | |||||
user_name = os.environ[MODELSCOPE_CLOUD_USERNAME] | |||||
ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % ( | ua = 'modelscope/%s; python/%s; session_id/%s; platform/%s; processor/%s; env/%s; user/%s' % ( | ||||
__version__, | __version__, | ||||
@@ -16,9 +16,9 @@ API_RESPONSE_FIELD_GIT_ACCESS_TOKEN = 'AccessToken' | |||||
API_RESPONSE_FIELD_USERNAME = 'Username' | API_RESPONSE_FIELD_USERNAME = 'Username' | ||||
API_RESPONSE_FIELD_EMAIL = 'Email' | API_RESPONSE_FIELD_EMAIL = 'Email' | ||||
API_RESPONSE_FIELD_MESSAGE = 'Message' | API_RESPONSE_FIELD_MESSAGE = 'Message' | ||||
MODELSCOPE_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT' | |||||
MODELSCOPE_CLOUD_ENVIRONMENT = 'MODELSCOPE_ENVIRONMENT' | |||||
MODELSCOPE_CLOUD_USERNAME = 'MODELSCOPE_USERNAME' | |||||
MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG' | MODELSCOPE_SDK_DEBUG = 'MODELSCOPE_SDK_DEBUG' | ||||
MODELSCOPE_USERNAME = 'MODELSCOPE_USERNAME' | |||||
ONE_YEAR_SECONDS = 24 * 365 * 60 * 60 | ONE_YEAR_SECONDS = 24 * 365 * 60 * 60 | ||||
@@ -87,16 +87,3 @@ def file_integrity_validation(file_path, expected_sha256): | |||||
msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path | msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path | ||||
logger.error(msg) | logger.error(msg) | ||||
raise FileIntegrityError(msg) | raise FileIntegrityError(msg) | ||||
def create_library_statistics(method: str, name: str, cn_name: Optional[str]): | |||||
try: | |||||
from modelscope.hub.api import ModelScopeConfig | |||||
path = f'{get_endpoint()}/api/v1/statistics/library' | |||||
headers = {'user-agent': ModelScopeConfig.get_user_agent()} | |||||
params = {'Method': method, 'Name': name, 'CnName': cn_name} | |||||
r = requests.post(path, params=params, headers=headers) | |||||
r.raise_for_status() | |||||
except Exception: | |||||
pass | |||||
return |
@@ -54,7 +54,8 @@ class FSMNSeleNetV2Decorator(TorchModel): | |||||
) | ) | ||||
def __del__(self): | def __del__(self): | ||||
self.tmp_dir.cleanup() | |||||
if hasattr(self, 'tmp_dir'): | |||||
self.tmp_dir.cleanup() | |||||
def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | ||||
return self.model.forward(input) | return self.model.forward(input) | ||||
@@ -20,7 +20,6 @@ class MogFaceDetector(TorchModel): | |||||
def __init__(self, model_path, device='cuda'): | def __init__(self, model_path, device='cuda'): | ||||
super().__init__(model_path) | super().__init__(model_path) | ||||
torch.set_grad_enabled(False) | |||||
cudnn.benchmark = True | cudnn.benchmark = True | ||||
self.model_path = model_path | self.model_path = model_path | ||||
self.device = device | self.device = device | ||||
@@ -21,7 +21,6 @@ class MtcnnFaceDetector(TorchModel): | |||||
def __init__(self, model_path, device='cuda'): | def __init__(self, model_path, device='cuda'): | ||||
super().__init__(model_path) | super().__init__(model_path) | ||||
torch.set_grad_enabled(False) | |||||
cudnn.benchmark = True | cudnn.benchmark = True | ||||
self.model_path = model_path | self.model_path = model_path | ||||
self.device = device | self.device = device | ||||
@@ -18,7 +18,6 @@ class RetinaFaceDetection(TorchModel): | |||||
def __init__(self, model_path, device='cuda'): | def __init__(self, model_path, device='cuda'): | ||||
super().__init__(model_path) | super().__init__(model_path) | ||||
torch.set_grad_enabled(False) | |||||
cudnn.benchmark = True | cudnn.benchmark = True | ||||
self.model_path = model_path | self.model_path = model_path | ||||
self.cfg = Config.from_file( | self.cfg = Config.from_file( | ||||
@@ -24,7 +24,6 @@ class UlfdFaceDetector(TorchModel): | |||||
def __init__(self, model_path, device='cuda'): | def __init__(self, model_path, device='cuda'): | ||||
super().__init__(model_path) | super().__init__(model_path) | ||||
torch.set_grad_enabled(False) | |||||
cudnn.benchmark = True | cudnn.benchmark = True | ||||
self.model_path = model_path | self.model_path = model_path | ||||
self.device = device | self.device = device | ||||
@@ -24,7 +24,6 @@ class FacialExpressionRecognition(TorchModel): | |||||
def __init__(self, model_path, device='cuda'): | def __init__(self, model_path, device='cuda'): | ||||
super().__init__(model_path) | super().__init__(model_path) | ||||
torch.set_grad_enabled(False) | |||||
cudnn.benchmark = True | cudnn.benchmark = True | ||||
self.model_path = model_path | self.model_path = model_path | ||||
self.device = device | self.device = device | ||||
@@ -31,7 +31,6 @@ cfg_re50 = { | |||||
class RetinaFaceDetection(object): | class RetinaFaceDetection(object): | ||||
def __init__(self, model_path, device='cuda'): | def __init__(self, model_path, device='cuda'): | ||||
torch.set_grad_enabled(False) | |||||
cudnn.benchmark = True | cudnn.benchmark = True | ||||
self.model_path = model_path | self.model_path = model_path | ||||
self.device = device | self.device = device | ||||
@@ -7,6 +7,7 @@ import time | |||||
import cv2 | import cv2 | ||||
import json | import json | ||||
import numpy as np | |||||
import torch | import torch | ||||
from tqdm import tqdm | from tqdm import tqdm | ||||
@@ -87,13 +88,17 @@ class RealtimeVideoDetector(TorchModel): | |||||
self.nmsthre, | self.nmsthre, | ||||
class_agnostic=True) | class_agnostic=True) | ||||
if len(outputs) == 1: | |||||
if len(outputs) == 1 and (outputs[0] is not None): | |||||
bboxes = outputs[0][:, 0:4].cpu().numpy() / self.ratio | bboxes = outputs[0][:, 0:4].cpu().numpy() / self.ratio | ||||
scores = outputs[0][:, 5].cpu().numpy() | scores = outputs[0][:, 5].cpu().numpy() | ||||
labels = outputs[0][:, 6].cpu().int().numpy() | labels = outputs[0][:, 6].cpu().int().numpy() | ||||
pred_label_names = [] | pred_label_names = [] | ||||
for lab in labels: | for lab in labels: | ||||
pred_label_names.append(self.label_mapping[lab]) | pred_label_names.append(self.label_mapping[lab]) | ||||
else: | |||||
bboxes = np.asarray([]) | |||||
scores = np.asarray([]) | |||||
pred_label_names = np.asarray([]) | |||||
return bboxes, scores, pred_label_names | return bboxes, scores, pred_label_names | ||||
@@ -31,7 +31,10 @@ class ReferringVideoObjectSegmentation(TorchModel): | |||||
config_path = osp.join(model_dir, ModelFile.CONFIGURATION) | config_path = osp.join(model_dir, ModelFile.CONFIGURATION) | ||||
self.cfg = Config.from_file(config_path) | self.cfg = Config.from_file(config_path) | ||||
self.model = MTTR(**self.cfg.model) | |||||
transformer_cfg_dir = osp.join(model_dir, 'transformer_cfg_dir') | |||||
self.model = MTTR( | |||||
transformer_cfg_dir=transformer_cfg_dir, **self.cfg.model) | |||||
model_path = osp.join(model_dir, ModelFile.TORCH_MODEL_FILE) | model_path = osp.join(model_dir, ModelFile.TORCH_MODEL_FILE) | ||||
params_dict = torch.load(model_path, map_location='cpu') | params_dict = torch.load(model_path, map_location='cpu') | ||||
@@ -19,6 +19,7 @@ class MTTR(nn.Module): | |||||
num_queries, | num_queries, | ||||
mask_kernels_dim=8, | mask_kernels_dim=8, | ||||
aux_loss=False, | aux_loss=False, | ||||
transformer_cfg_dir=None, | |||||
**kwargs): | **kwargs): | ||||
""" | """ | ||||
Parameters: | Parameters: | ||||
@@ -29,7 +30,9 @@ class MTTR(nn.Module): | |||||
""" | """ | ||||
super().__init__() | super().__init__() | ||||
self.backbone = init_backbone(**kwargs) | self.backbone = init_backbone(**kwargs) | ||||
self.transformer = MultimodalTransformer(**kwargs) | |||||
assert transformer_cfg_dir is not None | |||||
self.transformer = MultimodalTransformer( | |||||
transformer_cfg_dir=transformer_cfg_dir, **kwargs) | |||||
d_model = self.transformer.d_model | d_model = self.transformer.d_model | ||||
self.is_referred_head = nn.Linear( | self.is_referred_head = nn.Linear( | ||||
d_model, | d_model, | ||||
@@ -26,6 +26,7 @@ class MultimodalTransformer(nn.Module): | |||||
num_decoder_layers=3, | num_decoder_layers=3, | ||||
text_encoder_type='roberta-base', | text_encoder_type='roberta-base', | ||||
freeze_text_encoder=True, | freeze_text_encoder=True, | ||||
transformer_cfg_dir=None, | |||||
**kwargs): | **kwargs): | ||||
super().__init__() | super().__init__() | ||||
self.d_model = kwargs['d_model'] | self.d_model = kwargs['d_model'] | ||||
@@ -40,10 +41,12 @@ class MultimodalTransformer(nn.Module): | |||||
self.pos_encoder_2d = PositionEmbeddingSine2D() | self.pos_encoder_2d = PositionEmbeddingSine2D() | ||||
self._reset_parameters() | self._reset_parameters() | ||||
self.text_encoder = RobertaModel.from_pretrained(text_encoder_type) | |||||
if text_encoder_type != 'roberta-base': | |||||
transformer_cfg_dir = text_encoder_type | |||||
self.text_encoder = RobertaModel.from_pretrained(transformer_cfg_dir) | |||||
self.text_encoder.pooler = None # this pooler is never used, this is a hack to avoid DDP problems... | self.text_encoder.pooler = None # this pooler is never used, this is a hack to avoid DDP problems... | ||||
self.tokenizer = RobertaTokenizerFast.from_pretrained( | self.tokenizer = RobertaTokenizerFast.from_pretrained( | ||||
text_encoder_type) | |||||
transformer_cfg_dir) | |||||
self.freeze_text_encoder = freeze_text_encoder | self.freeze_text_encoder = freeze_text_encoder | ||||
if freeze_text_encoder: | if freeze_text_encoder: | ||||
for p in self.text_encoder.parameters(): | for p in self.text_encoder.parameters(): | ||||
@@ -188,11 +188,13 @@ class Worker(threading.Thread): | |||||
class KWSDataLoader: | class KWSDataLoader: | ||||
""" | |||||
dataset: the dataset reference | |||||
batchsize: data batch size | |||||
numworkers: no. of workers | |||||
prefetch: prefetch factor | |||||
""" Load and organize audio data with multiple threads | |||||
Args: | |||||
dataset: the dataset reference | |||||
batchsize: data batch size | |||||
numworkers: no. of workers | |||||
prefetch: prefetch factor | |||||
""" | """ | ||||
def __init__(self, dataset, batchsize, numworkers, prefetch=2): | def __init__(self, dataset, batchsize, numworkers, prefetch=2): | ||||
@@ -202,7 +204,7 @@ class KWSDataLoader: | |||||
self.isrun = True | self.isrun = True | ||||
# data queue | # data queue | ||||
self.pool = queue.Queue(batchsize * prefetch) | |||||
self.pool = queue.Queue(numworkers * prefetch) | |||||
# initialize workers | # initialize workers | ||||
self.workerlist = [] | self.workerlist = [] | ||||
@@ -270,11 +272,11 @@ class KWSDataLoader: | |||||
w.stopWorker() | w.stopWorker() | ||||
while not self.pool.empty(): | while not self.pool.empty(): | ||||
self.pool.get(block=True, timeout=0.001) | |||||
self.pool.get(block=True, timeout=0.01) | |||||
# wait workers terminated | # wait workers terminated | ||||
for w in self.workerlist: | for w in self.workerlist: | ||||
while not self.pool.empty(): | while not self.pool.empty(): | ||||
self.pool.get(block=True, timeout=0.001) | |||||
self.pool.get(block=True, timeout=0.01) | |||||
w.join() | w.join() | ||||
logger.info('KWSDataLoader: All worker stopped.') | logger.info('KWSDataLoader: All worker stopped.') |
@@ -10,7 +10,6 @@ from typing import Any, Dict, Generator, List, Mapping, Union | |||||
import numpy as np | import numpy as np | ||||
from modelscope.hub.utils.utils import create_library_statistics | |||||
from modelscope.models.base import Model | from modelscope.models.base import Model | ||||
from modelscope.msdatasets import MsDataset | from modelscope.msdatasets import MsDataset | ||||
from modelscope.outputs import TASK_OUTPUTS | from modelscope.outputs import TASK_OUTPUTS | ||||
@@ -152,9 +151,6 @@ class Pipeline(ABC): | |||||
**kwargs) -> Union[Dict[str, Any], Generator]: | **kwargs) -> Union[Dict[str, Any], Generator]: | ||||
# model provider should leave it as it is | # model provider should leave it as it is | ||||
# modelscope library developer will handle this function | # modelscope library developer will handle this function | ||||
for single_model in self.models: | |||||
if hasattr(single_model, 'name'): | |||||
create_library_statistics('pipeline', single_model.name, None) | |||||
# place model to cpu or gpu | # place model to cpu or gpu | ||||
if (self.model or (self.has_multiple_models and self.models[0])): | if (self.model or (self.has_multiple_models and self.models[0])): | ||||
if not self._model_prepare: | if not self._model_prepare: | ||||
@@ -92,6 +92,8 @@ class NamedEntityRecognitionPipeline(Pipeline): | |||||
offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']] | offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']] | ||||
labels = [self.id2label[x] for x in predictions] | labels = [self.id2label[x] for x in predictions] | ||||
if len(labels) > len(offset_mapping): | |||||
labels = labels[1:-1] | |||||
chunks = [] | chunks = [] | ||||
chunk = {} | chunk = {} | ||||
for label, offsets in zip(labels, offset_mapping): | for label, offsets in zip(labels, offset_mapping): | ||||
@@ -104,6 +106,20 @@ class NamedEntityRecognitionPipeline(Pipeline): | |||||
'start': offsets[0], | 'start': offsets[0], | ||||
'end': offsets[1] | 'end': offsets[1] | ||||
} | } | ||||
if label[0] in 'I': | |||||
if not chunk: | |||||
chunk = { | |||||
'type': label[2:], | |||||
'start': offsets[0], | |||||
'end': offsets[1] | |||||
} | |||||
if label[0] in 'E': | |||||
if not chunk: | |||||
chunk = { | |||||
'type': label[2:], | |||||
'start': offsets[0], | |||||
'end': offsets[1] | |||||
} | |||||
if label[0] in 'IES': | if label[0] in 'IES': | ||||
if chunk: | if chunk: | ||||
chunk['end'] = offsets[1] | chunk['end'] = offsets[1] | ||||
@@ -118,15 +134,15 @@ class NamedEntityRecognitionPipeline(Pipeline): | |||||
chunk['span'] = text[chunk['start']:chunk['end']] | chunk['span'] = text[chunk['start']:chunk['end']] | ||||
chunks.append(chunk) | chunks.append(chunk) | ||||
# for cws output | |||||
# for cws outputs | |||||
if len(chunks) > 0 and chunks[0]['type'] == 'cws': | if len(chunks) > 0 and chunks[0]['type'] == 'cws': | ||||
spans = [ | spans = [ | ||||
chunk['span'] for chunk in chunks if chunk['span'].strip() | chunk['span'] for chunk in chunks if chunk['span'].strip() | ||||
] | ] | ||||
seg_result = ' '.join(spans) | seg_result = ' '.join(spans) | ||||
outputs = {OutputKeys.OUTPUT: seg_result, OutputKeys.LABELS: []} | |||||
outputs = {OutputKeys.OUTPUT: seg_result} | |||||
# for ner outpus | |||||
# for ner outputs | |||||
else: | else: | ||||
outputs = {OutputKeys.OUTPUT: chunks} | outputs = {OutputKeys.OUTPUT: chunks} | ||||
return outputs | return outputs | ||||
@@ -95,6 +95,20 @@ class TokenClassificationPipeline(Pipeline): | |||||
'start': offsets[0], | 'start': offsets[0], | ||||
'end': offsets[1] | 'end': offsets[1] | ||||
} | } | ||||
if label[0] in 'I': | |||||
if not chunk: | |||||
chunk = { | |||||
'type': label[2:], | |||||
'start': offsets[0], | |||||
'end': offsets[1] | |||||
} | |||||
if label[0] in 'E': | |||||
if not chunk: | |||||
chunk = { | |||||
'type': label[2:], | |||||
'start': offsets[0], | |||||
'end': offsets[1] | |||||
} | |||||
if label[0] in 'IES': | if label[0] in 'IES': | ||||
if chunk: | if chunk: | ||||
chunk['end'] = offsets[1] | chunk['end'] = offsets[1] | ||||
@@ -80,9 +80,12 @@ class WordSegmentationPipeline(Pipeline): | |||||
Dict[str, str]: the prediction results | Dict[str, str]: the prediction results | ||||
""" | """ | ||||
text = inputs['text'] | text = inputs['text'] | ||||
logits = inputs[OutputKeys.LOGITS] | |||||
predictions = torch.argmax(logits[0], dim=-1) | |||||
logits = torch_nested_numpify(torch_nested_detach(logits)) | |||||
if not hasattr(inputs, 'predictions'): | |||||
logits = inputs[OutputKeys.LOGITS] | |||||
predictions = torch.argmax(logits[0], dim=-1) | |||||
else: | |||||
predictions = inputs[OutputKeys.PREDICTIONS].squeeze( | |||||
0).cpu().numpy() | |||||
predictions = torch_nested_numpify(torch_nested_detach(predictions)) | predictions = torch_nested_numpify(torch_nested_detach(predictions)) | ||||
offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']] | offset_mapping = [x.cpu().tolist() for x in inputs['offset_mapping']] | ||||
@@ -101,6 +104,20 @@ class WordSegmentationPipeline(Pipeline): | |||||
'start': offsets[0], | 'start': offsets[0], | ||||
'end': offsets[1] | 'end': offsets[1] | ||||
} | } | ||||
if label[0] in 'I': | |||||
if not chunk: | |||||
chunk = { | |||||
'type': label[2:], | |||||
'start': offsets[0], | |||||
'end': offsets[1] | |||||
} | |||||
if label[0] in 'E': | |||||
if not chunk: | |||||
chunk = { | |||||
'type': label[2:], | |||||
'start': offsets[0], | |||||
'end': offsets[1] | |||||
} | |||||
if label[0] in 'IES': | if label[0] in 'IES': | ||||
if chunk: | if chunk: | ||||
chunk['end'] = offsets[1] | chunk['end'] = offsets[1] | ||||
@@ -123,7 +140,7 @@ class WordSegmentationPipeline(Pipeline): | |||||
seg_result = ' '.join(spans) | seg_result = ' '.join(spans) | ||||
outputs = {OutputKeys.OUTPUT: seg_result} | outputs = {OutputKeys.OUTPUT: seg_result} | ||||
# for ner output | |||||
# for ner outputs | |||||
else: | else: | ||||
outputs = {OutputKeys.OUTPUT: chunks} | outputs = {OutputKeys.OUTPUT: chunks} | ||||
return outputs | return outputs |
@@ -117,8 +117,7 @@ class KWSFarfieldTrainer(BaseTrainer): | |||||
self._batch_size = dataloader_config.batch_size_per_gpu | self._batch_size = dataloader_config.batch_size_per_gpu | ||||
if 'model_bin' in kwargs: | if 'model_bin' in kwargs: | ||||
model_bin_file = os.path.join(self.model_dir, kwargs['model_bin']) | model_bin_file = os.path.join(self.model_dir, kwargs['model_bin']) | ||||
checkpoint = torch.load(model_bin_file) | |||||
self.model.load_state_dict(checkpoint) | |||||
self.model = torch.load(model_bin_file) | |||||
# build corresponding optimizer and loss function | # build corresponding optimizer and loss function | ||||
lr = self.cfg.train.optimizer.lr | lr = self.cfg.train.optimizer.lr | ||||
self.optimizer = optim.Adam(self.model.parameters(), lr) | self.optimizer = optim.Adam(self.model.parameters(), lr) | ||||
@@ -219,7 +218,9 @@ class KWSFarfieldTrainer(BaseTrainer): | |||||
# check point | # check point | ||||
ckpt_name = 'checkpoint_{:04d}_loss_train_{:.4f}_loss_val_{:.4f}.pth'.format( | ckpt_name = 'checkpoint_{:04d}_loss_train_{:.4f}_loss_val_{:.4f}.pth'.format( | ||||
self._current_epoch, loss_train_epoch, loss_val_epoch) | self._current_epoch, loss_train_epoch, loss_val_epoch) | ||||
torch.save(self.model, os.path.join(self.work_dir, ckpt_name)) | |||||
save_path = os.path.join(self.work_dir, ckpt_name) | |||||
logger.info(f'Save model to {save_path}') | |||||
torch.save(self.model, save_path) | |||||
# time spent per epoch | # time spent per epoch | ||||
epochtime = datetime.datetime.now() - epochtime | epochtime = datetime.datetime.now() - epochtime | ||||
logger.info('Epoch {:04d} time spent: {:.2f} hours'.format( | logger.info('Epoch {:04d} time spent: {:.2f} hours'.format( | ||||
@@ -15,7 +15,6 @@ from torch.utils.data.dataloader import default_collate | |||||
from torch.utils.data.distributed import DistributedSampler | from torch.utils.data.distributed import DistributedSampler | ||||
from modelscope.hub.snapshot_download import snapshot_download | from modelscope.hub.snapshot_download import snapshot_download | ||||
from modelscope.hub.utils.utils import create_library_statistics | |||||
from modelscope.metainfo import Trainers | from modelscope.metainfo import Trainers | ||||
from modelscope.metrics import build_metric, task_default_metrics | from modelscope.metrics import build_metric, task_default_metrics | ||||
from modelscope.models.base import Model, TorchModel | from modelscope.models.base import Model, TorchModel | ||||
@@ -437,8 +436,6 @@ class EpochBasedTrainer(BaseTrainer): | |||||
def train(self, checkpoint_path=None, *args, **kwargs): | def train(self, checkpoint_path=None, *args, **kwargs): | ||||
self._mode = ModeKeys.TRAIN | self._mode = ModeKeys.TRAIN | ||||
if hasattr(self.model, 'name'): | |||||
create_library_statistics('train', self.model.name, None) | |||||
if self.train_dataset is None: | if self.train_dataset is None: | ||||
self.train_dataloader = self.get_train_dataloader() | self.train_dataloader = self.get_train_dataloader() | ||||
@@ -459,8 +456,6 @@ class EpochBasedTrainer(BaseTrainer): | |||||
self.train_loop(self.train_dataloader) | self.train_loop(self.train_dataloader) | ||||
def evaluate(self, checkpoint_path=None): | def evaluate(self, checkpoint_path=None): | ||||
if hasattr(self.model, 'name'): | |||||
create_library_statistics('evaluate', self.model.name, None) | |||||
if checkpoint_path is not None and os.path.isfile(checkpoint_path): | if checkpoint_path is not None and os.path.isfile(checkpoint_path): | ||||
from modelscope.trainers.hooks import CheckpointHook | from modelscope.trainers.hooks import CheckpointHook | ||||
CheckpointHook.load_checkpoint(checkpoint_path, self) | CheckpointHook.load_checkpoint(checkpoint_path, self) | ||||
@@ -43,7 +43,10 @@ def update_conf(origin_config_file, new_config_file, conf_item: [str, str]): | |||||
def repl(matched): | def repl(matched): | ||||
key = matched.group(1) | key = matched.group(1) | ||||
if key in conf_item: | if key in conf_item: | ||||
return conf_item[key] | |||||
value = conf_item[key] | |||||
if not isinstance(value, str): | |||||
value = str(value) | |||||
return value | |||||
else: | else: | ||||
return None | return None | ||||
@@ -3,11 +3,13 @@ | |||||
import argparse | import argparse | ||||
import datetime | import datetime | ||||
import math | |||||
import multiprocessing | import multiprocessing | ||||
import os | import os | ||||
import subprocess | import subprocess | ||||
import sys | import sys | ||||
import tempfile | import tempfile | ||||
import time | |||||
import unittest | import unittest | ||||
from fnmatch import fnmatch | from fnmatch import fnmatch | ||||
from multiprocessing.managers import BaseManager | from multiprocessing.managers import BaseManager | ||||
@@ -158,6 +160,21 @@ def run_command_with_popen(cmd): | |||||
sys.stdout.write(line) | sys.stdout.write(line) | ||||
def async_run_command_with_popen(cmd, device_id): | |||||
logger.info('Worker id: %s args: %s' % (device_id, cmd)) | |||||
env = os.environ.copy() | |||||
env['CUDA_VISIBLE_DEVICES'] = '%s' % device_id | |||||
sub_process = subprocess.Popen( | |||||
cmd, | |||||
stdout=subprocess.PIPE, | |||||
stderr=subprocess.STDOUT, | |||||
bufsize=1, | |||||
universal_newlines=True, | |||||
env=env, | |||||
encoding='utf8') | |||||
return sub_process | |||||
def save_test_result(df, args): | def save_test_result(df, args): | ||||
if args.result_dir is not None: | if args.result_dir is not None: | ||||
file_name = str(int(datetime.datetime.now().timestamp() * 1000)) | file_name = str(int(datetime.datetime.now().timestamp() * 1000)) | ||||
@@ -199,6 +216,108 @@ def install_requirements(requirements): | |||||
run_command(cmd) | run_command(cmd) | ||||
def wait_for_free_worker(workers): | |||||
while True: | |||||
for idx, worker in enumerate(workers): | |||||
if worker is None: | |||||
logger.info('return free worker: %s' % (idx)) | |||||
return idx | |||||
if worker.poll() is None: # running, get output | |||||
for line in iter(worker.stdout.readline, ''): | |||||
if line != '': | |||||
sys.stdout.write(line) | |||||
else: | |||||
break | |||||
else: # worker process completed. | |||||
logger.info('Process end: %s' % (idx)) | |||||
workers[idx] = None | |||||
return idx | |||||
time.sleep(0.001) | |||||
def wait_for_workers(workers): | |||||
while True: | |||||
for idx, worker in enumerate(workers): | |||||
if worker is None: | |||||
continue | |||||
# check worker is completed. | |||||
if worker.poll() is None: | |||||
for line in iter(worker.stdout.readline, ''): | |||||
if line != '': | |||||
sys.stdout.write(line) | |||||
else: | |||||
break | |||||
else: | |||||
logger.info('Process idx: %s end!' % (idx)) | |||||
workers[idx] = None | |||||
is_all_completed = True | |||||
for idx, worker in enumerate(workers): | |||||
if worker is not None: | |||||
is_all_completed = False | |||||
break | |||||
if is_all_completed: | |||||
logger.info('All sub porcess is completed!') | |||||
break | |||||
time.sleep(0.001) | |||||
def parallel_run_case_in_env(env_name, env, test_suite_env_map, isolated_cases, | |||||
result_dir, parallel): | |||||
logger.info('Running case in env: %s' % env_name) | |||||
# install requirements and deps # run_config['envs'][env] | |||||
if 'requirements' in env: | |||||
install_requirements(env['requirements']) | |||||
if 'dependencies' in env: | |||||
install_packages(env['dependencies']) | |||||
# case worker processes | |||||
worker_processes = [None] * parallel | |||||
for test_suite_file in isolated_cases: # run case in subprocess | |||||
if test_suite_file in test_suite_env_map and test_suite_env_map[ | |||||
test_suite_file] == env_name: | |||||
cmd = [ | |||||
'python', | |||||
'tests/run.py', | |||||
'--pattern', | |||||
test_suite_file, | |||||
'--result_dir', | |||||
result_dir, | |||||
] | |||||
worker_idx = wait_for_free_worker(worker_processes) | |||||
worker_process = async_run_command_with_popen(cmd, worker_idx) | |||||
os.set_blocking(worker_process.stdout.fileno(), False) | |||||
worker_processes[worker_idx] = worker_process | |||||
else: | |||||
pass # case not in run list. | |||||
# run remain cases in a process. | |||||
remain_suite_files = [] | |||||
for k, v in test_suite_env_map.items(): | |||||
if k not in isolated_cases and v == env_name: | |||||
remain_suite_files.append(k) | |||||
if len(remain_suite_files) == 0: | |||||
return | |||||
# roughly split case in parallel | |||||
part_count = math.ceil(len(remain_suite_files) / parallel) | |||||
suites_chunks = [ | |||||
remain_suite_files[x:x + part_count] | |||||
for x in range(0, len(remain_suite_files), part_count) | |||||
] | |||||
for suites_chunk in suites_chunks: | |||||
worker_idx = wait_for_free_worker(worker_processes) | |||||
cmd = [ | |||||
'python', 'tests/run.py', '--result_dir', result_dir, '--suites' | |||||
] | |||||
for suite in suites_chunk: | |||||
cmd.append(suite) | |||||
worker_process = async_run_command_with_popen(cmd, worker_idx) | |||||
os.set_blocking(worker_process.stdout.fileno(), False) | |||||
worker_processes[worker_idx] = worker_process | |||||
wait_for_workers(worker_processes) | |||||
def run_case_in_env(env_name, env, test_suite_env_map, isolated_cases, | def run_case_in_env(env_name, env, test_suite_env_map, isolated_cases, | ||||
result_dir): | result_dir): | ||||
# install requirements and deps # run_config['envs'][env] | # install requirements and deps # run_config['envs'][env] | ||||
@@ -264,8 +383,9 @@ def run_in_subprocess(args): | |||||
with tempfile.TemporaryDirectory() as temp_result_dir: | with tempfile.TemporaryDirectory() as temp_result_dir: | ||||
for env in set(test_suite_env_map.values()): | for env in set(test_suite_env_map.values()): | ||||
run_case_in_env(env, run_config['envs'][env], test_suite_env_map, | |||||
isolated_cases, temp_result_dir) | |||||
parallel_run_case_in_env(env, run_config['envs'][env], | |||||
test_suite_env_map, isolated_cases, | |||||
temp_result_dir, args.parallel) | |||||
result_dfs = [] | result_dfs = [] | ||||
result_path = Path(temp_result_dir) | result_path = Path(temp_result_dir) | ||||
@@ -312,6 +432,10 @@ class TimeCostTextTestResult(TextTestResult): | |||||
self.stream.writeln( | self.stream.writeln( | ||||
'Test case: %s stop at: %s, cost time: %s(seconds)' % | 'Test case: %s stop at: %s, cost time: %s(seconds)' % | ||||
(test.test_full_name, test.stop_time, test.time_cost)) | (test.test_full_name, test.stop_time, test.time_cost)) | ||||
if torch.cuda.is_available( | |||||
) and test.time_cost > 5.0: # print nvidia-smi | |||||
cmd = ['nvidia-smi'] | |||||
run_command_with_popen(cmd) | |||||
super(TimeCostTextTestResult, self).stopTest(test) | super(TimeCostTextTestResult, self).stopTest(test) | ||||
def addSuccess(self, test): | def addSuccess(self, test): | ||||
@@ -383,6 +507,8 @@ def main(args): | |||||
os.path.abspath(args.test_dir), args.pattern, args.list_tests) | os.path.abspath(args.test_dir), args.pattern, args.list_tests) | ||||
if not args.list_tests: | if not args.list_tests: | ||||
result = runner.run(test_suite) | result = runner.run(test_suite) | ||||
logger.info('Running case completed, pid: %s, suites: %s' % | |||||
(os.getpid(), args.suites)) | |||||
result = collect_test_results(result) | result = collect_test_results(result) | ||||
df = test_cases_result_to_df(result) | df = test_cases_result_to_df(result) | ||||
if args.result_dir is not None: | if args.result_dir is not None: | ||||
@@ -417,6 +543,12 @@ if __name__ == '__main__': | |||||
'--result_dir', | '--result_dir', | ||||
default=None, | default=None, | ||||
help='Save result to directory, internal use only') | help='Save result to directory, internal use only') | ||||
parser.add_argument( | |||||
'--parallel', | |||||
default=1, | |||||
type=int, | |||||
help='Set case parallels, default single process, set with gpu number.' | |||||
) | |||||
parser.add_argument( | parser.add_argument( | ||||
'--suites', | '--suites', | ||||
nargs='*', | nargs='*', | ||||
@@ -1,5 +1,5 @@ | |||||
# isolate cases in env, we can install different dependencies in each env. | # isolate cases in env, we can install different dependencies in each env. | ||||
isolated: # test cases that may require excessive anmount of GPU memory, which will be executed in dedicagted process. | |||||
isolated: # test cases that may require excessive anmount of GPU memory or run long time, which will be executed in dedicagted process. | |||||
- test_text_to_speech.py | - test_text_to_speech.py | ||||
- test_multi_modal_embedding.py | - test_multi_modal_embedding.py | ||||
- test_ofa_tasks.py | - test_ofa_tasks.py | ||||
@@ -13,6 +13,33 @@ isolated: # test cases that may require excessive anmount of GPU memory, which | |||||
- test_movie_scene_segmentation.py | - test_movie_scene_segmentation.py | ||||
- test_image_inpainting.py | - test_image_inpainting.py | ||||
- test_mglm_text_summarization.py | - test_mglm_text_summarization.py | ||||
- test_team_transfer_trainer.py | |||||
- test_image_denoise_trainer.py | |||||
- test_dialog_intent_trainer.py | |||||
- test_finetune_mplug.py | |||||
- test_image_instance_segmentation_trainer.py | |||||
- test_image_portrait_enhancement_trainer.py | |||||
- test_translation_trainer.py | |||||
- test_unifold.py | |||||
- test_automatic_post_editing.py | |||||
- test_mplug_tasks.py | |||||
- test_movie_scene_segmentation.py | |||||
- test_body_3d_keypoints.py | |||||
- test_finetune_text_generation.py | |||||
- test_clip_trainer.py | |||||
- test_ofa_trainer.py | |||||
- test_fill_mask.py | |||||
- test_hand_2d_keypoints.py | |||||
- test_referring_video_object_segmentation.py | |||||
- test_easycv_trainer_hand_2d_keypoints.py | |||||
- test_card_detection_scrfd_trainer.py | |||||
- test_referring_video_object_segmentation_trainer.py | |||||
- test_person_image_cartoon.py | |||||
- test_image_style_transfer.py | |||||
- test_ocr_detection.py | |||||
- test_automatic_speech_recognition.py | |||||
- test_image_matting.py | |||||
- test_skin_retouching.py | |||||
envs: | envs: | ||||
default: # default env, case not in other env will in default, pytorch. | default: # default env, case not in other env will in default, pytorch. | ||||
@@ -94,7 +94,7 @@ class TestDialogIntentTrainer(unittest.TestCase): | |||||
cfg.Model.update(config['Model']) | cfg.Model.update(config['Model']) | ||||
if self.debugging: | if self.debugging: | ||||
cfg.Trainer.save_checkpoint = False | cfg.Trainer.save_checkpoint = False | ||||
cfg.Trainer.num_epochs = 5 | |||||
cfg.Trainer.num_epochs = 1 | |||||
cfg.Trainer.batch_size_label = 64 | cfg.Trainer.batch_size_label = 64 | ||||
return cfg | return cfg | ||||