From 01bb751425018f90a55a42a7f6beb2e1d3b16997 Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Mon, 22 Aug 2022 16:01:42 +0800 Subject: [PATCH] [to #43653669]feat: auto build docker images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit auto build docker images aone 任务: https://test.aone.alibaba-inc.com/jobs/1824567?buildId=143470479 修改任务分支 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9566518 * [to #43653669]feat: auto build docker images --- .dev_scripts/build_image.sh | 169 ++++++++++++++++++++++++ .dev_scripts/ci_container_test.sh | 4 +- .dev_scripts/dockerci.sh | 5 +- .dockerignore | 11 ++ docker/Dockerfile.ubuntu | 84 ++++++++++++ docker/rcfiles/conda.tuna | 15 +++ docker/rcfiles/ubuntu20.04_sources.tuna | 13 ++ requirements/runtime.txt | 2 + 8 files changed, 300 insertions(+), 3 deletions(-) create mode 100644 .dev_scripts/build_image.sh create mode 100644 .dockerignore create mode 100644 docker/Dockerfile.ubuntu create mode 100644 docker/rcfiles/conda.tuna create mode 100644 docker/rcfiles/ubuntu20.04_sources.tuna diff --git a/.dev_scripts/build_image.sh b/.dev_scripts/build_image.sh new file mode 100644 index 00000000..e6403aed --- /dev/null +++ b/.dev_scripts/build_image.sh @@ -0,0 +1,169 @@ +#!/bin/bash +# default values. +BASE_CPU_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04 +BASE_GPU_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.3.0-cudnn8-devel +MODELSCOPE_REPO_ADDRESS=reg.docker.alibaba-inc.com/modelscope/modelscope +python_version=3.7.13 +torch_version=1.11.0 +cudatoolkit_version=11.3 +tensorflow_version=1.15.5 +modelscope_version=None +is_ci_test=False +is_dsw=False +is_cpu=False +run_ci_test=False +function usage(){ + echo "usage: build.sh " + echo " --python=python_version set python version, default: $python_version" + echo " --torch=torch_version set pytorch version, fefault: $torch_version" + echo " --cudatoolkit=cudatoolkit_version set cudatoolkit version used for pytorch, default: $cudatoolkit_version" + echo " --tensorflow=tensorflow_version set tensorflow version, default: $tensorflow_version" + echo " --modelscope=modelscope_version set modelscope version, default: $modelscope_version" + echo " --test option for run test before push image, only push on ci test pass" + echo " --cpu option for build cpu version" + echo " --dsw option for build dsw version" + echo " --ci option for build ci version" + echo " --push option for push image to remote repo" +} +for i in "$@"; do + case $i in + --python=*) + python_version="${i#*=}" + shift + ;; + --torch=*) + torch_version="${i#*=}" + shift # pytorch version + ;; + --tensorflow=*) + tensorflow_version="${i#*=}" + shift # tensorflow version + ;; + --cudatoolkit=*) + cudatoolkit_version="${i#*=}" + shift # cudatoolkit for pytorch + ;; + --modelscope=*) + modelscope_version="${i#*=}" + shift # cudatoolkit for pytorch + ;; + --test) + run_ci_test=True + shift # will run ci test + ;; + --cpu) + is_cpu=True + shift # is cpu image + ;; + --ci) + is_ci_test=True + shift # is ci, will not install modelscope + ;; + --dsw) + is_dsw=True + shift # is dsw, will set dsw cache location + ;; + --push) + is_push=True + shift # is dsw, will set dsw cache location + ;; + --help) + usage + exit 0 + ;; + -*|--*) + echo "Unknown option $i" + usage + exit 1 + ;; + *) + ;; + esac +done + +if [ "$modelscope_version" == "None" ]; then + echo "ModelScope version must specify!" + exit 1 +fi +if [ "$is_cpu" == "True" ]; then + export BASE_IMAGE=$BASE_CPU_IMAGE + base_tag=ubuntu20.04 + export USE_GPU=False +else + export BASE_IMAGE=$BASE_GPU_IMAGE + base_tag=ubuntu20.04-cuda11.3.0 + export USE_GPU=True +fi +if [[ $python_version == 3.7* ]]; then + base_tag=$base_tag-py37 +elif [[ $python_version == z* ]]; then + base_tag=$base_tag-py38 +elif [[ $python_version == z* ]]; then + base_tag=$base_tag-py39 +else + echo "Unsupport python version: $python_version" + exit 1 +fi + +target_image_tag=$base_tag-torch$torch_version-tf$tensorflow_version +if [ "$is_ci_test" == "True" ]; then + target_image_tag=$target_image_tag-$modelscope_version-ci +else + target_image_tag=$target_image_tag-$modelscope_version-test +fi +export IMAGE_TO_BUILD=$MODELSCOPE_REPO_ADDRESS:$target_image_tag +export PYTHON_VERSION=$python_version +export TORCH_VERSION=$torch_version +export CUDATOOLKIT_VERSION=$cudatoolkit_version +export TENSORFLOW_VERSION=$tensorflow_version +echo -e "Building image with:\npython$python_version\npytorch$torch_version\ntensorflow:$tensorflow_version\ncudatoolkit:$cudatoolkit_version\ncpu:$is_cpu\nis_ci:$is_ci_test\nis_dsw:$is_dsw\n" +docker_file_content=`cat docker/Dockerfile.ubuntu` +if [ "$is_ci_test" != "True" ]; then + echo "Building ModelScope lib, will install ModelScope lib to image" + docker_file_content="${docker_file_content} \nRUN pip install --no-cache-dir modelscope==$modelscope_version -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html" +fi +echo "$is_dsw" +if [ "$is_dsw" == "False" ]; then + echo "Not DSW image" +else + echo "Building dsw image well need set ModelScope lib cache location." + docker_file_content="${docker_file_content} \nENV MODELSCOPE_CACHE=/mnt/workspace/.cache/modelscope" +fi +printf "$docker_file_content" > Dockerfile +docker build -t $IMAGE_TO_BUILD \ + --build-arg USE_GPU \ + --build-arg BASE_IMAGE \ + --build-arg PYTHON_VERSION \ + --build-arg TORCH_VERSION \ + --build-arg CUDATOOLKIT_VERSION \ + --build-arg TENSORFLOW_VERSION \ + -f Dockerfile . + +if [ $? -ne 0 ]; then + echo "Running docker build command error, please check the log!" + exit -1 +fi +if [ "$run_ci_test" == "True" ]; then + echo "Running ci case." + export MODELSCOPE_CACHE=/home/mulin.lyh/model_scope_cache + export MODELSCOPE_HOME_CACHE=/home/mulin.lyh/ci_case_home # for credential + export IMAGE_NAME=$MODELSCOPE_REPO_ADDRESS + export IMAGE_VERSION=$target_image_tag + export MODELSCOPE_DOMAIN=www.modelscope.cn + export HUB_DATASET_ENDPOINT=http://www.modelscope.cn + export CI_TEST=True + export TEST_LEVEL=1 + if [ "$is_ci_test" != "True" ]; then + echo "Testing for dsw image or MaaS-lib image" + export CI_COMMAND="python tests/run.py" + fi + bash .dev_scripts/dockerci.sh + if [ $? -ne 0 ]; then + echo "Running unittest failed, please check the log!" + exit -1 + fi +fi +if [ "$is_push" == "True" ]; then + echo "Pushing image: $IMAGE_TO_BUILD" + docker push $IMAGE_TO_BUILD +fi diff --git a/.dev_scripts/ci_container_test.sh b/.dev_scripts/ci_container_test.sh index 2f68f416..98e9f88d 100644 --- a/.dev_scripts/ci_container_test.sh +++ b/.dev_scripts/ci_container_test.sh @@ -16,5 +16,7 @@ if [ $? -ne 0 ]; then echo "linter test failed, please run 'pre-commit run --all-files' to check" exit -1 fi +# test with install +python setup.py install -PYTHONPATH=. python tests/run.py +python tests/run.py diff --git a/.dev_scripts/dockerci.sh b/.dev_scripts/dockerci.sh index d5ea3c41..383eb909 100644 --- a/.dev_scripts/dockerci.sh +++ b/.dev_scripts/dockerci.sh @@ -1,5 +1,4 @@ #!/bin/bash -IMAGE_NAME=reg.docker.alibaba-inc.com/dinger/modelscope MODELSCOPE_CACHE_DIR_IN_CONTAINER=/modelscope_cache CODE_DIR=$PWD CODE_DIR_IN_CONTAINER=/Maas-lib @@ -8,6 +7,7 @@ gpus='7 6 5 4 3 2 1 0' cpu_sets='0-7 8-15 16-23 24-30 31-37 38-44 45-51 52-58' cpu_sets_arr=($cpu_sets) is_get_file_lock=false +CI_COMMAND=${CI_COMMAND:-'bash .dev_scripts/ci_container_test.sh'} for gpu in $gpus do exec {lock_fd}>"/tmp/gpu$gpu" || exit 1 @@ -31,10 +31,11 @@ do -e HUB_DATASET_ENDPOINT=$HUB_DATASET_ENDPOINT \ -e TEST_ACCESS_TOKEN_CITEST=$TEST_ACCESS_TOKEN_CITEST \ -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \ + -e TEST_LEVEL=$TEST_LEVEL \ --workdir=$CODE_DIR_IN_CONTAINER \ --net host \ ${IMAGE_NAME}:${IMAGE_VERSION} \ - bash .dev_scripts/ci_container_test.sh + $CI_COMMAND if [ $? -ne 0 ]; then echo "Running test case failed, please check the log!" exit -1 diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..4198ecc0 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,11 @@ +.gitignore +tests +data +.dev_scripts +.dockerignore +.git +.gitattributes +.pre-commit-config.yaml +.pre-commit-config_local.yaml +.readthedocs.yaml +Dockfile diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu new file mode 100644 index 00000000..97881007 --- /dev/null +++ b/docker/Dockerfile.ubuntu @@ -0,0 +1,84 @@ +ARG BASE_IMAGE=reg.docker.alibaba-inc.com/modelscope/ubuntu:20.04-cuda11.3.0-cudnn8-devel +FROM $BASE_IMAGE +ARG DEBIAN_FRONTEND=noninteractive +ENV TZ=Asia/Shanghai +ENV CONDA_DIR /opt/conda +ENV PATH="${CONDA_DIR}/bin:${PATH}" +ENV arch=x86_64 +SHELL ["/bin/bash", "-c"] +COPY docker/rcfiles /tmp/resources +RUN apt-get update && apt-get install -y --reinstall ca-certificates && \ + cp /tmp/resources/ubuntu20.04_sources.tuna /etc/apt/sources.list && \ + apt-get update && \ + apt-get install -y locales wget git vim ffmpeg libsm6 tzdata language-pack-zh-hans ttf-wqy-microhei ttf-wqy-zenhei xfonts-wqy libxext6 build-essential ninja-build && \ + wget https://packagecloud.io/github/git-lfs/packages/debian/bullseye/git-lfs_3.2.0_amd64.deb/download -O ./git-lfs_3.2.0_amd64.deb && \ + dpkg -i ./git-lfs_3.2.0_amd64.deb && \ + rm -f ./git-lfs_3.2.0_amd64.deb && \ + locale-gen zh_CN && \ + locale-gen zh_CN.utf8 && \ + update-locale LANG=zh_CN.UTF-8 LC_ALL=zh_CN.UTF-8 LANGUAGE=zh_CN.UTF-8 && \ + ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \ + dpkg-reconfigure --frontend noninteractive tzdata && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN.UTF-8 LC_ALL=zh_CN.UTF-8 + +#install and config python +ARG PYTHON_VERSION=3.7.13 +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${arch}.sh -O ./miniconda.sh && \ + /bin/bash miniconda.sh -b -p /opt/conda && \ + rm -f miniconda.sh && \ + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + cp /tmp/resources/conda.tuna ~/.condarc && \ + source /root/.bashrc && \ + conda install --yes python==${PYTHON_VERSION} && \ + pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple + +ARG USE_GPU=True + +# install pytorch +ARG TORCH_VERSION=1.12.0 +ARG CUDATOOLKIT_VERSION=11.3 +RUN if [ "$USE_GPU" = "True" ] ; then \ + conda install --yes pytorch==$TORCH_VERSION torchvision torchaudio cudatoolkit=$CUDATOOLKIT_VERSION -c pytorch && conda clean --yes --all; \ + else \ + conda install pytorch==$TORCH_VERSION torchvision torchaudio cpuonly -c pytorch; \ + fi + +# install tensorflow +ARG TENSORFLOW_VERSION=1.15.5 +RUN if [ "$USE_GPU" = "True" ] ; then \ + pip install --no-cache-dir --use-deprecated=legacy-resolver tensorflow==$TENSORFLOW_VERSION -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html; \ + else \ + pip install --no-cache-dir tensorflow==$TENSORFLOW_VERSION; \ + fi + +RUN if [ "$USE_GPU" = "True" ] ; then \ + CUDA_HOME=/usr/local/cuda TORCH_CUDA_ARCH_LIST="5.0 5.2 6.0 6.1 7.0 7.5 8.0 8.6" MMCV_WITH_OPS=1 MAX_JOBS=8 FORCE_CUDA=1 pip install --no-cache-dir mmcv-full && pip cache purge; \ + else \ + MMCV_WITH_OPS=1 MAX_JOBS=8 pip install --no-cache-dir mmcv-full && pip cache purge; \ + fi + +# install modelscope +COPY requirements /var/modelscope +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /var/modelscope/runtime.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ + pip install --no-cache-dir -r /var/modelscope/audio.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ + pip install --no-cache-dir -r /var/modelscope/cv.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ + pip install --no-cache-dir -r /var/modelscope/multi-modal.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ + pip install --no-cache-dir -r /var/modelscope/nlp.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ + pip cache purge + +# default shell bash +ENV SHELL=/bin/bash + +# install special package +RUN pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 numpy==1.18.5 datasets==2.1.0 + +RUN if [ "$USE_GPU" = "True" ] ; then \ + pip install --no-cache-dir dgl-cu113 dglgo -f https://data.dgl.ai/wheels/repo.html; \ + else \ + pip install --no-cache-dir dgl dglgo -f https://data.dgl.ai/wheels/repo.html; \ + fi diff --git a/docker/rcfiles/conda.tuna b/docker/rcfiles/conda.tuna new file mode 100644 index 00000000..ce8a2908 --- /dev/null +++ b/docker/rcfiles/conda.tuna @@ -0,0 +1,15 @@ +channels: + - defaults +show_channel_urls: true +default_channels: + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2 +custom_channels: + conda-forge: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud + msys2: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud + bioconda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud + menpo: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud + pytorch: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud + pytorch-lts: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud + simpleitk: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud diff --git a/docker/rcfiles/ubuntu20.04_sources.tuna b/docker/rcfiles/ubuntu20.04_sources.tuna new file mode 100644 index 00000000..a247bbfa --- /dev/null +++ b/docker/rcfiles/ubuntu20.04_sources.tuna @@ -0,0 +1,13 @@ +# 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释 +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse + +# 预发布软件源,不建议启用 +# deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse diff --git a/requirements/runtime.txt b/requirements/runtime.txt index e2b78f06..c059b4ba 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -8,6 +8,8 @@ numpy opencv-python oss2 Pillow>=6.2.0 +# for pyarrow 9.0.0 event_loop core dump +pyarrow>=6.0.0,!=9.0.0 pyyaml requests scipy