@@ -1,6 +1,3 @@ | |||
echo "Testing envs" | |||
printenv | |||
echo "ENV END" | |||
if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then | |||
pip install -r requirements/tests.txt | |||
git config --global --add safe.directory /Maas-lib | |||
@@ -23,7 +20,7 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then | |||
awk -F: '/^[^#]/ { print $1 }' requirements/multi-modal.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html | |||
awk -F: '/^[^#]/ { print $1 }' requirements/nlp.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html | |||
awk -F: '/^[^#]/ { print $1 }' requirements/science.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html | |||
pip install -r requirements/tests.txt | |||
# test with install | |||
python setup.py install | |||
else | |||
@@ -3,30 +3,32 @@ MODELSCOPE_CACHE_DIR_IN_CONTAINER=/modelscope_cache | |||
CODE_DIR=$PWD | |||
CODE_DIR_IN_CONTAINER=/Maas-lib | |||
echo "$USER" | |||
gpus='7 6 5 4 3 2 1 0' | |||
cpu_sets='0-7 8-15 16-23 24-30 31-37 38-44 45-51 52-58' | |||
gpus='0,1 2,3 4,5 6,7' | |||
cpu_sets='45-58 31-44 16-30 0-15' | |||
cpu_sets_arr=($cpu_sets) | |||
is_get_file_lock=false | |||
# export RUN_CASE_COMMAND='python tests/run.py --run_config tests/run_config.yaml' | |||
CI_COMMAND=${CI_COMMAND:-bash .dev_scripts/ci_container_test.sh $RUN_CASE_BASE_COMMAND} | |||
CI_COMMAND='bash .dev_scripts/ci_container_test.sh python tests/run.py --parallel 2 --run_config tests/run_config.yaml' | |||
echo "ci command: $CI_COMMAND" | |||
idx=0 | |||
for gpu in $gpus | |||
do | |||
exec {lock_fd}>"/tmp/gpu$gpu" || exit 1 | |||
flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; continue; } | |||
flock -n "$lock_fd" || { echo "WARN: gpu $gpu is in use!" >&2; idx=$((idx+1)); continue; } | |||
echo "get gpu lock $gpu" | |||
CONTAINER_NAME="modelscope-ci-$gpu" | |||
CONTAINER_NAME="modelscope-ci-$idx" | |||
let is_get_file_lock=true | |||
# pull image if there are update | |||
docker pull ${IMAGE_NAME}:${IMAGE_VERSION} | |||
if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then | |||
echo 'debugging' | |||
docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ | |||
--cpuset-cpus=${cpu_sets_arr[$gpu]} \ | |||
--gpus="device=$gpu" \ | |||
--cpuset-cpus=${cpu_sets_arr[$idx]} \ | |||
--gpus='"'"device=$gpu"'"' \ | |||
-v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ | |||
-v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ | |||
-v $MODELSCOPE_HOME_CACHE/$gpu:/root \ | |||
-v $MODELSCOPE_HOME_CACHE/$idx:/root \ | |||
-v /home/admin/pre-commit:/home/admin/pre-commit \ | |||
-e CI_TEST=True \ | |||
-e TEST_LEVEL=$TEST_LEVEL \ | |||
@@ -41,16 +43,15 @@ do | |||
-e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | |||
-e MODEL_TAG_URL=$MODEL_TAG_URL \ | |||
--workdir=$CODE_DIR_IN_CONTAINER \ | |||
--net host \ | |||
${IMAGE_NAME}:${IMAGE_VERSION} \ | |||
$CI_COMMAND | |||
else | |||
docker run --rm --name $CONTAINER_NAME --shm-size=16gb \ | |||
--cpuset-cpus=${cpu_sets_arr[$gpu]} \ | |||
--gpus="device=$gpu" \ | |||
--cpuset-cpus=${cpu_sets_arr[$idx]} \ | |||
--gpus='"'"device=$gpu"'"' \ | |||
-v $CODE_DIR:$CODE_DIR_IN_CONTAINER \ | |||
-v $MODELSCOPE_CACHE:$MODELSCOPE_CACHE_DIR_IN_CONTAINER \ | |||
-v $MODELSCOPE_HOME_CACHE/$gpu:/root \ | |||
-v $MODELSCOPE_HOME_CACHE/$idx:/root \ | |||
-v /home/admin/pre-commit:/home/admin/pre-commit \ | |||
-e CI_TEST=True \ | |||
-e TEST_LEVEL=$TEST_LEVEL \ | |||
@@ -64,7 +65,6 @@ do | |||
-e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | |||
-e MODEL_TAG_URL=$MODEL_TAG_URL \ | |||
--workdir=$CODE_DIR_IN_CONTAINER \ | |||
--net host \ | |||
${IMAGE_NAME}:${IMAGE_VERSION} \ | |||
$CI_COMMAND | |||
fi | |||
@@ -20,7 +20,6 @@ class MogFaceDetector(TorchModel): | |||
def __init__(self, model_path, device='cuda'): | |||
super().__init__(model_path) | |||
torch.set_grad_enabled(False) | |||
cudnn.benchmark = True | |||
self.model_path = model_path | |||
self.device = device | |||
@@ -21,7 +21,6 @@ class MtcnnFaceDetector(TorchModel): | |||
def __init__(self, model_path, device='cuda'): | |||
super().__init__(model_path) | |||
torch.set_grad_enabled(False) | |||
cudnn.benchmark = True | |||
self.model_path = model_path | |||
self.device = device | |||
@@ -18,7 +18,6 @@ class RetinaFaceDetection(TorchModel): | |||
def __init__(self, model_path, device='cuda'): | |||
super().__init__(model_path) | |||
torch.set_grad_enabled(False) | |||
cudnn.benchmark = True | |||
self.model_path = model_path | |||
self.cfg = Config.from_file( | |||
@@ -24,7 +24,6 @@ class UlfdFaceDetector(TorchModel): | |||
def __init__(self, model_path, device='cuda'): | |||
super().__init__(model_path) | |||
torch.set_grad_enabled(False) | |||
cudnn.benchmark = True | |||
self.model_path = model_path | |||
self.device = device | |||
@@ -24,7 +24,6 @@ class FacialExpressionRecognition(TorchModel): | |||
def __init__(self, model_path, device='cuda'): | |||
super().__init__(model_path) | |||
torch.set_grad_enabled(False) | |||
cudnn.benchmark = True | |||
self.model_path = model_path | |||
self.device = device | |||
@@ -31,7 +31,6 @@ cfg_re50 = { | |||
class RetinaFaceDetection(object): | |||
def __init__(self, model_path, device='cuda'): | |||
torch.set_grad_enabled(False) | |||
cudnn.benchmark = True | |||
self.model_path = model_path | |||
self.device = device | |||
@@ -3,11 +3,13 @@ | |||
import argparse | |||
import datetime | |||
import math | |||
import multiprocessing | |||
import os | |||
import subprocess | |||
import sys | |||
import tempfile | |||
import time | |||
import unittest | |||
from fnmatch import fnmatch | |||
from multiprocessing.managers import BaseManager | |||
@@ -158,6 +160,21 @@ def run_command_with_popen(cmd): | |||
sys.stdout.write(line) | |||
def async_run_command_with_popen(cmd, device_id): | |||
logger.info('Worker id: %s args: %s' % (device_id, cmd)) | |||
env = os.environ.copy() | |||
env['CUDA_VISIBLE_DEVICES'] = '%s' % device_id | |||
sub_process = subprocess.Popen( | |||
cmd, | |||
stdout=subprocess.PIPE, | |||
stderr=subprocess.STDOUT, | |||
bufsize=1, | |||
universal_newlines=True, | |||
env=env, | |||
encoding='utf8') | |||
return sub_process | |||
def save_test_result(df, args): | |||
if args.result_dir is not None: | |||
file_name = str(int(datetime.datetime.now().timestamp() * 1000)) | |||
@@ -199,6 +216,108 @@ def install_requirements(requirements): | |||
run_command(cmd) | |||
def wait_for_free_worker(workers): | |||
while True: | |||
for idx, worker in enumerate(workers): | |||
if worker is None: | |||
logger.info('return free worker: %s' % (idx)) | |||
return idx | |||
if worker.poll() is None: # running, get output | |||
for line in iter(worker.stdout.readline, ''): | |||
if line != '': | |||
sys.stdout.write(line) | |||
else: | |||
break | |||
else: # worker process completed. | |||
logger.info('Process end: %s' % (idx)) | |||
workers[idx] = None | |||
return idx | |||
time.sleep(0.001) | |||
def wait_for_workers(workers): | |||
while True: | |||
for idx, worker in enumerate(workers): | |||
if worker is None: | |||
continue | |||
# check worker is completed. | |||
if worker.poll() is None: | |||
for line in iter(worker.stdout.readline, ''): | |||
if line != '': | |||
sys.stdout.write(line) | |||
else: | |||
break | |||
else: | |||
logger.info('Process idx: %s end!' % (idx)) | |||
workers[idx] = None | |||
is_all_completed = True | |||
for idx, worker in enumerate(workers): | |||
if worker is not None: | |||
is_all_completed = False | |||
break | |||
if is_all_completed: | |||
logger.info('All sub porcess is completed!') | |||
break | |||
time.sleep(0.001) | |||
def parallel_run_case_in_env(env_name, env, test_suite_env_map, isolated_cases, | |||
result_dir, parallel): | |||
logger.info('Running case in env: %s' % env_name) | |||
# install requirements and deps # run_config['envs'][env] | |||
if 'requirements' in env: | |||
install_requirements(env['requirements']) | |||
if 'dependencies' in env: | |||
install_packages(env['dependencies']) | |||
# case worker processes | |||
worker_processes = [None] * parallel | |||
for test_suite_file in isolated_cases: # run case in subprocess | |||
if test_suite_file in test_suite_env_map and test_suite_env_map[ | |||
test_suite_file] == env_name: | |||
cmd = [ | |||
'python', | |||
'tests/run.py', | |||
'--pattern', | |||
test_suite_file, | |||
'--result_dir', | |||
result_dir, | |||
] | |||
worker_idx = wait_for_free_worker(worker_processes) | |||
worker_process = async_run_command_with_popen(cmd, worker_idx) | |||
os.set_blocking(worker_process.stdout.fileno(), False) | |||
worker_processes[worker_idx] = worker_process | |||
else: | |||
pass # case not in run list. | |||
# run remain cases in a process. | |||
remain_suite_files = [] | |||
for k, v in test_suite_env_map.items(): | |||
if k not in isolated_cases and v == env_name: | |||
remain_suite_files.append(k) | |||
if len(remain_suite_files) == 0: | |||
return | |||
# roughly split case in parallel | |||
part_count = math.ceil(len(remain_suite_files) / parallel) | |||
suites_chunks = [ | |||
remain_suite_files[x:x + part_count] | |||
for x in range(0, len(remain_suite_files), part_count) | |||
] | |||
for suites_chunk in suites_chunks: | |||
worker_idx = wait_for_free_worker(worker_processes) | |||
cmd = [ | |||
'python', 'tests/run.py', '--result_dir', result_dir, '--suites' | |||
] | |||
for suite in suites_chunk: | |||
cmd.append(suite) | |||
worker_process = async_run_command_with_popen(cmd, worker_idx) | |||
os.set_blocking(worker_process.stdout.fileno(), False) | |||
worker_processes[worker_idx] = worker_process | |||
wait_for_workers(worker_processes) | |||
def run_case_in_env(env_name, env, test_suite_env_map, isolated_cases, | |||
result_dir): | |||
# install requirements and deps # run_config['envs'][env] | |||
@@ -264,8 +383,9 @@ def run_in_subprocess(args): | |||
with tempfile.TemporaryDirectory() as temp_result_dir: | |||
for env in set(test_suite_env_map.values()): | |||
run_case_in_env(env, run_config['envs'][env], test_suite_env_map, | |||
isolated_cases, temp_result_dir) | |||
parallel_run_case_in_env(env, run_config['envs'][env], | |||
test_suite_env_map, isolated_cases, | |||
temp_result_dir, args.parallel) | |||
result_dfs = [] | |||
result_path = Path(temp_result_dir) | |||
@@ -312,6 +432,10 @@ class TimeCostTextTestResult(TextTestResult): | |||
self.stream.writeln( | |||
'Test case: %s stop at: %s, cost time: %s(seconds)' % | |||
(test.test_full_name, test.stop_time, test.time_cost)) | |||
if torch.cuda.is_available( | |||
) and test.time_cost > 5.0: # print nvidia-smi | |||
cmd = ['nvidia-smi'] | |||
run_command_with_popen(cmd) | |||
super(TimeCostTextTestResult, self).stopTest(test) | |||
def addSuccess(self, test): | |||
@@ -383,6 +507,8 @@ def main(args): | |||
os.path.abspath(args.test_dir), args.pattern, args.list_tests) | |||
if not args.list_tests: | |||
result = runner.run(test_suite) | |||
logger.info('Running case completed, pid: %s, suites: %s' % | |||
(os.getpid(), args.suites)) | |||
result = collect_test_results(result) | |||
df = test_cases_result_to_df(result) | |||
if args.result_dir is not None: | |||
@@ -417,6 +543,12 @@ if __name__ == '__main__': | |||
'--result_dir', | |||
default=None, | |||
help='Save result to directory, internal use only') | |||
parser.add_argument( | |||
'--parallel', | |||
default=1, | |||
type=int, | |||
help='Set case parallels, default single process, set with gpu number.' | |||
) | |||
parser.add_argument( | |||
'--suites', | |||
nargs='*', | |||
@@ -1,5 +1,5 @@ | |||
# isolate cases in env, we can install different dependencies in each env. | |||
isolated: # test cases that may require excessive anmount of GPU memory, which will be executed in dedicagted process. | |||
isolated: # test cases that may require excessive anmount of GPU memory or run long time, which will be executed in dedicagted process. | |||
- test_text_to_speech.py | |||
- test_multi_modal_embedding.py | |||
- test_ofa_tasks.py | |||
@@ -12,6 +12,33 @@ isolated: # test cases that may require excessive anmount of GPU memory, which | |||
- test_segmentation_pipeline.py | |||
- test_image_inpainting.py | |||
- test_mglm_text_summarization.py | |||
- test_team_transfer_trainer.py | |||
- test_image_denoise_trainer.py | |||
- test_dialog_intent_trainer.py | |||
- test_finetune_mplug.py | |||
- test_image_instance_segmentation_trainer.py | |||
- test_image_portrait_enhancement_trainer.py | |||
- test_translation_trainer.py | |||
- test_unifold.py | |||
- test_automatic_post_editing.py | |||
- test_mplug_tasks.py | |||
- test_movie_scene_segmentation.py | |||
- test_body_3d_keypoints.py | |||
- test_finetune_text_generation.py | |||
- test_clip_trainer.py | |||
- test_ofa_trainer.py | |||
- test_fill_mask.py | |||
- test_hand_2d_keypoints.py | |||
- test_referring_video_object_segmentation.py | |||
- test_easycv_trainer_hand_2d_keypoints.py | |||
- test_card_detection_scrfd_trainer.py | |||
- test_referring_video_object_segmentation_trainer.py | |||
- test_person_image_cartoon.py | |||
- test_image_style_transfer.py | |||
- test_ocr_detection.py | |||
- test_automatic_speech_recognition.py | |||
- test_image_matting.py | |||
- test_skin_retouching.py | |||
envs: | |||
default: # default env, case not in other env will in default, pytorch. | |||
@@ -94,7 +94,7 @@ class TestDialogIntentTrainer(unittest.TestCase): | |||
cfg.Model.update(config['Model']) | |||
if self.debugging: | |||
cfg.Trainer.save_checkpoint = False | |||
cfg.Trainer.num_epochs = 5 | |||
cfg.Trainer.num_epochs = 1 | |||
cfg.Trainer.batch_size_label = 64 | |||
return cfg | |||