diff --git a/.dev_scripts/ci_container_test.sh b/.dev_scripts/ci_container_test.sh index 4fd2778f..81c28513 100644 --- a/.dev_scripts/ci_container_test.sh +++ b/.dev_scripts/ci_container_test.sh @@ -2,13 +2,7 @@ echo "Testing envs" printenv echo "ENV END" if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then - awk -F: '/^[^#]/ { print $1 }' requirements/framework.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html - awk -F: '/^[^#]/ { print $1 }' requirements/audio.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html - awk -F: '/^[^#]/ { print $1 }' requirements/cv.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html - awk -F: '/^[^#]/ { print $1 }' requirements/multi-modal.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html - awk -F: '/^[^#]/ { print $1 }' requirements/nlp.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html pip install -r requirements/tests.txt - git config --global --add safe.directory /Maas-lib git config --global user.email tmp git config --global user.name tmp.com @@ -19,9 +13,22 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then pre-commit run -c .pre-commit-config_local.yaml --all-files if [ $? -ne 0 ]; then echo "linter test failed, please run 'pre-commit run --all-files' to check" + echo "From the repository folder" + echo "Run 'pip install -r requirements/tests.txt' install test dependencies." + echo "Run 'pre-commit install' install pre-commit hooks." + echo "Finally run linter with command: 'pre-commit run --all-files' to check." + echo "Ensure there is no failure!!!!!!!!" exit -1 fi fi + + awk -F: '/^[^#]/ { print $1 }' requirements/framework.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html + awk -F: '/^[^#]/ { print $1 }' requirements/audio.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html + awk -F: '/^[^#]/ { print $1 }' requirements/cv.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html + awk -F: '/^[^#]/ { print $1 }' requirements/multi-modal.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html + awk -F: '/^[^#]/ { print $1 }' requirements/nlp.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html + awk -F: '/^[^#]/ { print $1 }' requirements/science.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html + pip install -r requirements/tests.txt # test with install python setup.py install else diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index 6dafbc3e..160e2604 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -7,6 +7,7 @@ ENV PATH="${CONDA_DIR}/bin:${PATH}" ENV arch=x86_64 SHELL ["/bin/bash", "-c"] COPY docker/rcfiles /tmp/resources +COPY docker/jupyter_plugins /tmp/resources/jupyter_plugins RUN apt-get update && apt-get install -y --reinstall ca-certificates && \ cp /tmp/resources/ubuntu20.04_sources.tuna /etc/apt/sources.list && \ apt-get update && \ @@ -26,7 +27,7 @@ ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN.UTF-8 LC_ALL=zh_CN.UTF-8 #install and config python ARG PYTHON_VERSION=3.7.13 -RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${arch}.sh -O ./miniconda.sh && \ +RUN wget --quiet https://mirrors.aliyun.com/anaconda/miniconda/Miniconda3-latest-Linux-${arch}.sh -O ./miniconda.sh && \ /bin/bash miniconda.sh -b -p /opt/conda && \ rm -f miniconda.sh && \ ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ @@ -34,8 +35,8 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${a cp /tmp/resources/conda.tuna ~/.condarc && \ source /root/.bashrc && \ conda install --yes python==${PYTHON_VERSION} && \ - pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ - pip config set install.trusted-host pypi.tuna.tsinghua.edu.cn + pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \ + pip config set install.trusted-host mirrors.aliyun.com ARG USE_GPU=True @@ -70,16 +71,38 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r /var/modelscope/cv.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir -r /var/modelscope/multi-modal.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip install --no-cache-dir -r /var/modelscope/nlp.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ + pip install --no-cache-dir -r /var/modelscope/science.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \ pip cache purge # default shell bash ENV SHELL=/bin/bash # install special package -RUN pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 datasets==2.1.0 numpy==1.18.5 ipykernel fairseq fasttext https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/xtcocotools-1.12-cp37-cp37m-linux_x86_64.whl - RUN if [ "$USE_GPU" = "True" ] ; then \ pip install --no-cache-dir dgl-cu113 dglgo -f https://data.dgl.ai/wheels/repo.html; \ else \ pip install --no-cache-dir dgl dglgo -f https://data.dgl.ai/wheels/repo.html; \ fi + +# install jupyter plugin +RUN mkdir -p /root/.local/share/jupyter/labextensions/ && \ + cp -r /tmp/resources/jupyter_plugins/* /root/.local/share/jupyter/labextensions/ + +COPY docker/scripts/modelscope_env_init.sh /usr/local/bin/ms_env_init.sh +RUN pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/xtcocotools-1.12-cp37-cp37m-linux_x86_64.whl --force + +# for uniford +COPY docker/scripts/install_unifold.sh /tmp/install_unifold.sh +RUN if [ "$USE_GPU" = "True" ] ; then \ + bash /tmp/install_unifold.sh; \ + else \ + echo 'cpu unsupport uniford'; \ + fi + +RUN pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 datasets==2.1.0 numpy==1.18.5 ipykernel fairseq fasttext deepspeed +COPY docker/scripts/install_apex.sh /tmp/install_apex.sh +RUN if [ "$USE_GPU" = "True" ] ; then \ + bash /tmp/install_apex.sh; \ + else \ + echo 'cpu unsupport uniford'; \ + fi diff --git a/docker/jupyter_plugins/jupyterlab_active_log/package.json b/docker/jupyter_plugins/jupyterlab_active_log/package.json new file mode 100644 index 00000000..d2e0d0db --- /dev/null +++ b/docker/jupyter_plugins/jupyterlab_active_log/package.json @@ -0,0 +1,99 @@ +{ + "name": "jupyterlab_active_log", + "version": "0.1.0", + "description": "A JupyterLab extension.", + "keywords": [ + "jupyter", + "jupyterlab", + "jupyterlab-extension" + ], + "homepage": "https://github.com/github_username/jupyterlab_active_log", + "bugs": { + "url": "https://github.com/github_username/jupyterlab_active_log/issues" + }, + "license": "BSD-3-Clause", + "files": [ + "lib/**/*.{d.ts,eot,gif,html,jpg,js,js.map,json,png,svg,woff2,ttf}", + "style/**/*.{css,js,eot,gif,html,jpg,json,png,svg,woff2,ttf}" + ], + "main": "lib/index.js", + "types": "lib/index.d.ts", + "style": "style/index.css", + "repository": { + "type": "git", + "url": "https://github.com/github_username/jupyterlab_active_log.git" + }, + "scripts": { + "build": "jlpm build:lib && jlpm build:labextension:dev", + "build:prod": "jlpm clean && jlpm build:lib && jlpm build:labextension", + "build:labextension": "jupyter labextension build .", + "build:labextension:dev": "jupyter labextension build --development True .", + "build:lib": "tsc", + "clean": "jlpm clean:lib", + "clean:lib": "rimraf lib tsconfig.tsbuildinfo", + "clean:lintcache": "rimraf .eslintcache .stylelintcache", + "clean:labextension": "rimraf jupyterlab_active_log/labextension", + "clean:all": "jlpm clean:lib && jlpm clean:labextension && jlpm clean:lintcache", + "eslint": "jlpm eslint:check --fix", + "eslint:check": "eslint . --cache --ext .ts,.tsx", + "install:extension": "jlpm build", + "lint": "jlpm stylelint && jlpm prettier && jlpm eslint", + "lint:check": "jlpm stylelint:check && jlpm prettier:check && jlpm eslint:check", + "prettier": "jlpm prettier:base --write --list-different", + "prettier:base": "prettier \"**/*{.ts,.tsx,.js,.jsx,.css,.json,.md}\"", + "prettier:check": "jlpm prettier:base --check", + "stylelint": "jlpm stylelint:check --fix", + "stylelint:check": "stylelint --cache \"style/**/*.css\"", + "watch": "run-p watch:src watch:labextension", + "watch:src": "tsc -w", + "watch:labextension": "jupyter labextension watch ." + }, + "dependencies": { + "@jupyterlab/application": "^3.1.0" + }, + "devDependencies": { + "@jupyterlab/builder": "^3.1.0", + "@typescript-eslint/eslint-plugin": "^4.8.1", + "@typescript-eslint/parser": "^4.8.1", + "eslint": "^7.14.0", + "eslint-config-prettier": "^6.15.0", + "eslint-plugin-prettier": "^3.1.4", + "npm-run-all": "^4.1.5", + "prettier": "^2.1.1", + "rimraf": "^3.0.2", + "stylelint": "^14.3.0", + "stylelint-config-prettier": "^9.0.3", + "stylelint-config-recommended": "^6.0.0", + "stylelint-config-standard": "~24.0.0", + "stylelint-prettier": "^2.0.0", + "typescript": "~4.1.3" + }, + "sideEffects": [ + "style/*.css", + "style/index.js" + ], + "styleModule": "style/index.js", + "publishConfig": { + "access": "public" + }, + "jupyterlab": { + "extension": true, + "outputDir": "jupyterlab_active_log/labextension", + "_build": { + "load": "static/remoteEntry.eb3177c3791d7658cc12.js", + "extension": "./extension", + "style": "./style" + } + }, + "jupyter-releaser": { + "hooks": { + "before-build-npm": [ + "python -m pip install jupyterlab~=3.1", + "jlpm" + ], + "before-build-python": [ + "jlpm clean:all" + ] + } + } +} diff --git a/docker/jupyter_plugins/jupyterlab_active_log/static/568.a92ae44b87625ab09aed.js b/docker/jupyter_plugins/jupyterlab_active_log/static/568.a92ae44b87625ab09aed.js new file mode 100644 index 00000000..b70adee6 --- /dev/null +++ b/docker/jupyter_plugins/jupyterlab_active_log/static/568.a92ae44b87625ab09aed.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkjupyterlab_active_log=self.webpackChunkjupyterlab_active_log||[]).push([[568],{568:(t,e,a)=>{a.r(e),a.d(e,{default:()=>i});const i={id:"jupyterlab_active_log:plugin",autoStart:!0,activate:t=>{console.log("JupyterLab extension jupyterlab_active_log is activated!"),window.consts=Object.assign(Object.assign({},window.consts),{recordUrl:"https://modelscope.cn/api/v1/notebooks/activelog",timerDuration:1e4,timerParams:function(){const t=location.pathname.split("/");let e;return t.length>=2&&(e=t[1]),{site:"dsw",id:e,ext:{pathname:location.pathname}}}});const e=document.body,a=e.insertBefore(document.createElement("script"),e.firstChild);a.setAttribute("id","timer-sdk"),a.setAttribute("src","https://g.alicdn.com/alifanyi/translate-js-sdk/timer.js ")}}}}]); diff --git a/docker/jupyter_plugins/jupyterlab_active_log/static/747.63b4c3d22bfe458b352b.js b/docker/jupyter_plugins/jupyterlab_active_log/static/747.63b4c3d22bfe458b352b.js new file mode 100644 index 00000000..2129fc3d --- /dev/null +++ b/docker/jupyter_plugins/jupyterlab_active_log/static/747.63b4c3d22bfe458b352b.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkjupyterlab_active_log=self.webpackChunkjupyterlab_active_log||[]).push([[747],{150:(e,n,t)=>{t.d(n,{Z:()=>a});var r=t(645),o=t.n(r)()((function(e){return e[1]}));o.push([e.id,"/*\n See the JupyterLab Developer Guide for useful CSS Patterns:\n\n https://jupyterlab.readthedocs.io/en/stable/developer/css.html\n*/\n",""]);const a=o},645:e=>{e.exports=function(e){var n=[];return n.toString=function(){return this.map((function(n){var t=e(n);return n[2]?"@media ".concat(n[2]," {").concat(t,"}"):t})).join("")},n.i=function(e,t,r){"string"==typeof e&&(e=[[null,e,""]]);var o={};if(r)for(var a=0;a{var r,o=function(){var e={};return function(n){if(void 0===e[n]){var t=document.querySelector(n);if(window.HTMLIFrameElement&&t instanceof window.HTMLIFrameElement)try{t=t.contentDocument.head}catch(e){t=null}e[n]=t}return e[n]}}(),a=[];function i(e){for(var n=-1,t=0;t{t.r(n);var r=t(379),o=t.n(r),a=t(150);o()(a.Z,{insert:"head",singleton:!1}),a.Z.locals}}]); diff --git a/docker/jupyter_plugins/jupyterlab_active_log/static/remoteEntry.eb3177c3791d7658cc12.js b/docker/jupyter_plugins/jupyterlab_active_log/static/remoteEntry.eb3177c3791d7658cc12.js new file mode 100644 index 00000000..ec49e973 --- /dev/null +++ b/docker/jupyter_plugins/jupyterlab_active_log/static/remoteEntry.eb3177c3791d7658cc12.js @@ -0,0 +1 @@ +var _JUPYTERLAB;(()=>{"use strict";var e,r,t={293:(e,r,t)=>{var o={"./index":()=>t.e(568).then((()=>()=>t(568))),"./extension":()=>t.e(568).then((()=>()=>t(568))),"./style":()=>t.e(747).then((()=>()=>t(747)))},a=(e,r)=>(t.R=r,r=t.o(o,e)?o[e]():Promise.resolve().then((()=>{throw new Error('Module "'+e+'" does not exist in container.')})),t.R=void 0,r),n=(e,r)=>{if(t.S){var o="default",a=t.S[o];if(a&&a!==e)throw new Error("Container initialization failed as it has already been initialized with a different share scope");return t.S[o]=e,t.I(o,r)}};t.d(r,{get:()=>a,init:()=>n})}},o={};function a(e){var r=o[e];if(void 0!==r)return r.exports;var n=o[e]={id:e,exports:{}};return t[e](n,n.exports,a),n.exports}a.m=t,a.c=o,a.n=e=>{var r=e&&e.__esModule?()=>e.default:()=>e;return a.d(r,{a:r}),r},a.d=(e,r)=>{for(var t in r)a.o(r,t)&&!a.o(e,t)&&Object.defineProperty(e,t,{enumerable:!0,get:r[t]})},a.f={},a.e=e=>Promise.all(Object.keys(a.f).reduce(((r,t)=>(a.f[t](e,r),r)),[])),a.u=e=>e+"."+{568:"a92ae44b87625ab09aed",747:"63b4c3d22bfe458b352b"}[e]+".js?v="+{568:"a92ae44b87625ab09aed",747:"63b4c3d22bfe458b352b"}[e],a.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),a.o=(e,r)=>Object.prototype.hasOwnProperty.call(e,r),e={},r="jupyterlab_active_log:",a.l=(t,o,n,i)=>{if(e[t])e[t].push(o);else{var l,u;if(void 0!==n)for(var c=document.getElementsByTagName("script"),d=0;d{l.onerror=l.onload=null,clearTimeout(f);var a=e[t];if(delete e[t],l.parentNode&&l.parentNode.removeChild(l),a&&a.forEach((e=>e(o))),r)return r(o)},f=setTimeout(p.bind(null,void 0,{type:"timeout",target:l}),12e4);l.onerror=p.bind(null,l.onerror),l.onload=p.bind(null,l.onload),u&&document.head.appendChild(l)}},a.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{a.S={};var e={},r={};a.I=(t,o)=>{o||(o=[]);var n=r[t];if(n||(n=r[t]={}),!(o.indexOf(n)>=0)){if(o.push(n),e[t])return e[t];a.o(a.S,t)||(a.S[t]={});var i=a.S[t],l="jupyterlab_active_log",u=[];return"default"===t&&((e,r,t,o)=>{var n=i[e]=i[e]||{},u=n[r];(!u||!u.loaded&&(1!=!u.eager?o:l>u.from))&&(n[r]={get:()=>a.e(568).then((()=>()=>a(568))),from:l,eager:!1})})("jupyterlab_active_log","0.1.0"),e[t]=u.length?Promise.all(u).then((()=>e[t]=1)):1}}})(),(()=>{var e;a.g.importScripts&&(e=a.g.location+"");var r=a.g.document;if(!e&&r&&(r.currentScript&&(e=r.currentScript.src),!e)){var t=r.getElementsByTagName("script");t.length&&(e=t[t.length-1].src)}if(!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),a.p=e})(),(()=>{var e={346:0};a.f.j=(r,t)=>{var o=a.o(e,r)?e[r]:void 0;if(0!==o)if(o)t.push(o[2]);else{var n=new Promise(((t,a)=>o=e[r]=[t,a]));t.push(o[2]=n);var i=a.p+a.u(r),l=new Error;a.l(i,(t=>{if(a.o(e,r)&&(0!==(o=e[r])&&(e[r]=void 0),o)){var n=t&&("load"===t.type?"missing":t.type),i=t&&t.target&&t.target.src;l.message="Loading chunk "+r+" failed.\n("+n+": "+i+")",l.name="ChunkLoadError",l.type=n,l.request=i,o[1](l)}}),"chunk-"+r,r)}};var r=(r,t)=>{var o,n,[i,l,u]=t,c=0;if(i.some((r=>0!==e[r]))){for(o in l)a.o(l,o)&&(a.m[o]=l[o]);u&&u(a)}for(r&&r(t);c/dev/null 2>&1 || { echo 'git not installed' ; exit 0; } + +if [ -z "$MODELSCOPE_USERNAME" ] || [ -z "$MODELSCOPE_GITLAB_ACCESS_TOKEN" ]; then + : +else + git config --global credential.helper store + echo "http://${MODELSCOPE_USERNAME}:${MODELSCOPE_GITLAB_ACCESS_TOKEN}@www.modelscope.cn">~/.git-credentials + echo "https://${MODELSCOPE_USERNAME}:${MODELSCOPE_GITLAB_ACCESS_TOKEN}@www.modelscope.cn">>~/.git-credentials + chmod go-rwx ~/.git-credentials +fi +if [ -z "$MODELSCOPE_USERNAME" ] || [ -z "$MODELSCOPE_USEREMAIL" ]; then + : +else + git config --system user.name ${MODELSCOPE_USERNAME} + git config --system user.email ${MODELSCOPE_USEREMAIL} +fi +if [ -z "$MODELSCOPE_ENVIRONMENT" ]; then + : +else + git config --system --add http.http://www.modelscope.cn.extraHeader "Modelscope_Environment: $MODELSCOPE_ENVIRONMENT" + git config --system --add http.https://www.modelscope.cn.extraHeader "Modelscope_Environment: $MODELSCOPE_ENVIRONMENT" +fi + +if [ -z "$MODELSCOPE_USERNAME" ]; then + : +else + git config --system --add http.http://www.modelscope.cn.extraHeader "Modelscope_User: $MODELSCOPE_USERNAME" + git config --system --add http.https://www.modelscope.cn.extraHeader "Modelscope_User: $MODELSCOPE_USERNAME" +fi + +if [ -z "$MODELSCOPE_USERID" ]; then + : +else + git config --system --add http.http://www.modelscope.cn.extraHeader "Modelscope_Userid: $MODELSCOPE_USERID" + git config --system --add http.https://www.modelscope.cn.extraHeader "Modelscope_Userid: $MODELSCOPE_USERID" +fi + +if [ -z "$MODELSCOPE_HAVANAID" ]; then + : +else + git config --system --add http.http://www.modelscope.cn.extraHeader "Modelscope_Havanaid: $MODELSCOPE_HAVANAID" + git config --system --add http.https://www.modelscope.cn.extraHeader "Modelscope_Havanaid: $MODELSCOPE_HAVANAID" +fi diff --git a/docs/source/conf.py b/docs/source/conf.py index 39e0d881..4371c927 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -25,7 +25,7 @@ version_file = '../../modelscope/version.py' def get_version(): - with open(version_file, 'r') as f: + with open(version_file, 'r', encoding='utf-8') as f: exec(compile(f.read(), version_file, 'exec')) return locals()['__version__'] diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index f2ff822d..17c21d44 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -739,7 +739,7 @@ class ModelScopeConfig: with open( os.path.join(ModelScopeConfig.path_credential, ModelScopeConfig.USER_INFO_FILE_NAME), - 'r') as f: + 'r', encoding='utf-8') as f: info = f.read() return info.split(':')[0], info.split(':')[1] except FileNotFoundError: @@ -760,7 +760,7 @@ class ModelScopeConfig: with open( os.path.join(ModelScopeConfig.path_credential, ModelScopeConfig.GIT_TOKEN_FILE_NAME), - 'r') as f: + 'r', encoding='utf-8') as f: token = f.read() except FileNotFoundError: pass diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index c7c3e729..ccd36349 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -32,6 +32,7 @@ class Models(object): image_reid_person = 'passvitb' image_inpainting = 'FFTInpainting' video_summarization = 'pgl-video-summarization' + language_guided_video_summarization = 'clip-it-language-guided-video-summarization' swinL_semantic_segmentation = 'swinL-semantic-segmentation' vitadapter_semantic_segmentation = 'vitadapter-semantic-segmentation' text_driven_segmentation = 'text-driven-segmentation' @@ -200,6 +201,7 @@ class Pipelines(object): video_single_object_tracking = 'ostrack-vitb-video-single-object-tracking' image_panoptic_segmentation = 'image-panoptic-segmentation' video_summarization = 'googlenet_pgl_video_summarization' + language_guided_video_summarization = 'clip-it-video-summarization' image_semantic_segmentation = 'image-semantic-segmentation' image_reid_person = 'passvitb-image-reid-person' image_inpainting = 'fft-inpainting' diff --git a/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py b/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py index cc47d0c4..9378c32a 100644 --- a/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py +++ b/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py @@ -21,7 +21,7 @@ class KanTtsText2MelDataset(Dataset): self.cache = cache - with open(config_filename) as f: + with open(config_filename, encoding='utf-8') as f: self._config = json.loads(f.read()) # Load metadata: diff --git a/modelscope/models/audio/tts/sambert_hifi.py b/modelscope/models/audio/tts/sambert_hifi.py index a9b55795..9a14219e 100644 --- a/modelscope/models/audio/tts/sambert_hifi.py +++ b/modelscope/models/audio/tts/sambert_hifi.py @@ -60,7 +60,7 @@ class SambertHifigan(Model): with zipfile.ZipFile(zip_file, 'r') as zip_ref: zip_ref.extractall(model_dir) voice_cfg_path = os.path.join(self.__voice_path, 'voices.json') - with open(voice_cfg_path, 'r') as f: + with open(voice_cfg_path, 'r', encoding='utf-8') as f: voice_cfg = json.load(f) if 'voices' not in voice_cfg: raise TtsModelConfigurationException( diff --git a/modelscope/models/cv/__init__.py b/modelscope/models/cv/__init__.py index 64039863..de972032 100644 --- a/modelscope/models/cv/__init__.py +++ b/modelscope/models/cv/__init__.py @@ -10,10 +10,10 @@ from . import (action_recognition, animal_recognition, body_2d_keypoints, image_panoptic_segmentation, image_portrait_enhancement, image_reid_person, image_semantic_segmentation, image_to_image_generation, image_to_image_translation, - movie_scene_segmentation, object_detection, - product_retrieval_embedding, realtime_object_detection, - referring_video_object_segmentation, salient_detection, - shop_segmentation, super_resolution, + language_guided_video_summarization, movie_scene_segmentation, + object_detection, product_retrieval_embedding, + realtime_object_detection, referring_video_object_segmentation, + salient_detection, shop_segmentation, super_resolution, video_single_object_tracking, video_summarization, virual_tryon) # yapf: enable diff --git a/modelscope/models/cv/language_guided_video_summarization/__init__.py b/modelscope/models/cv/language_guided_video_summarization/__init__.py new file mode 100755 index 00000000..73f7bd03 --- /dev/null +++ b/modelscope/models/cv/language_guided_video_summarization/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .summarizer import ( + ClipItVideoSummarization, ) + +else: + _import_structure = { + 'summarizer': [ + 'ClipItVideoSummarization', + ] + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/models/cv/language_guided_video_summarization/summarizer.py b/modelscope/models/cv/language_guided_video_summarization/summarizer.py new file mode 100755 index 00000000..654dc3ea --- /dev/null +++ b/modelscope/models/cv/language_guided_video_summarization/summarizer.py @@ -0,0 +1,194 @@ +# Part of the implementation is borrowed and modified from BMT and video_features, +# publicly available at https://github.com/v-iashin/BMT +# and https://github.com/v-iashin/video_features + +import argparse +import os +import os.path as osp +from copy import deepcopy +from typing import Dict, Union + +import numpy as np +import torch +import torch.nn as nn +from bmt_clipit.sample.single_video_prediction import (caption_proposals, + generate_proposals, + load_cap_model, + load_prop_model) +from bmt_clipit.utilities.proposal_utils import non_max_suppresion +from torch.nn.parallel import DataParallel, DistributedDataParallel +from videofeatures_clipit.models.i3d.extract_i3d import ExtractI3D +from videofeatures_clipit.models.vggish.extract_vggish import ExtractVGGish +from videofeatures_clipit.utils.utils import (fix_tensorflow_gpu_allocation, + form_list_from_user_input) + +from modelscope.metainfo import Models +from modelscope.models.base import Tensor, TorchModel +from modelscope.models.builder import MODELS +from modelscope.models.cv.language_guided_video_summarization.transformer import \ + Transformer +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +def extract_text(args): + # Loading models and other essential stuff + cap_cfg, cap_model, train_dataset = load_cap_model( + args.pretrained_cap_model_path, args.device_id) + prop_cfg, prop_model = load_prop_model(args.device_id, + args.prop_generator_model_path, + args.pretrained_cap_model_path, + args.max_prop_per_vid) + # Proposal + proposals = generate_proposals(prop_model, args.features, + train_dataset.pad_idx, prop_cfg, + args.device_id, args.duration_in_secs) + # NMS if specified + if args.nms_tiou_thresh is not None: + proposals = non_max_suppresion(proposals.squeeze(), + args.nms_tiou_thresh) + proposals = proposals.unsqueeze(0) + # Captions for each proposal + captions = caption_proposals(cap_model, args.features, train_dataset, + cap_cfg, args.device_id, proposals, + args.duration_in_secs) + return captions + + +def extract_video_features(video_path, tmp_path, feature_type, i3d_flow_path, + i3d_rgb_path, kinetics_class_labels, pwc_path, + vggish_model_path, vggish_pca_path, extraction_fps, + device): + default_args = dict( + device=device, + extraction_fps=extraction_fps, + feature_type=feature_type, + file_with_video_paths=None, + i3d_flow_path=i3d_flow_path, + i3d_rgb_path=i3d_rgb_path, + keep_frames=False, + kinetics_class_labels=kinetics_class_labels, + min_side_size=256, + pwc_path=pwc_path, + show_kinetics_pred=False, + stack_size=64, + step_size=64, + tmp_path=tmp_path, + vggish_model_path=vggish_model_path, + vggish_pca_path=vggish_pca_path, + ) + args = argparse.Namespace(**default_args) + + if args.feature_type == 'i3d': + extractor = ExtractI3D(args) + elif args.feature_type == 'vggish': + extractor = ExtractVGGish(args) + + feats = extractor(video_path) + return feats + + +def video_features_to_txt(duration_in_secs, pretrained_cap_model_path, + prop_generator_model_path, features, device_id): + default_args = dict( + device_id=device_id, + duration_in_secs=duration_in_secs, + features=features, + pretrained_cap_model_path=pretrained_cap_model_path, + prop_generator_model_path=prop_generator_model_path, + max_prop_per_vid=100, + nms_tiou_thresh=0.4, + ) + args = argparse.Namespace(**default_args) + txt = extract_text(args) + return txt + + +@MODELS.register_module( + Tasks.language_guided_video_summarization, + module_name=Models.language_guided_video_summarization) +class ClipItVideoSummarization(TorchModel): + + def __init__(self, model_dir: str, *args, **kwargs): + """initialize the video summarization model from the `model_dir` path. + + Args: + model_dir (str): the model path. + """ + super().__init__(model_dir, *args, **kwargs) + + model_path = osp.join(model_dir, ModelFile.TORCH_MODEL_FILE) + + self.loss = nn.MSELoss() + self.model = Transformer() + if torch.cuda.is_available(): + self._device = torch.device('cuda') + else: + self._device = torch.device('cpu') + self.model = self.model.to(self._device) + + self.model = self.load_pretrained(self.model, model_path) + + if self.training: + self.model.train() + else: + self.model.eval() + + def load_pretrained(self, net, load_path, strict=True, param_key='params'): + if isinstance(net, (DataParallel, DistributedDataParallel)): + net = net.module + load_net = torch.load( + load_path, map_location=lambda storage, loc: storage) + if param_key is not None: + if param_key not in load_net and 'params' in load_net: + param_key = 'params' + logger.info( + f'Loading: {param_key} does not exist, use params.') + if param_key in load_net: + load_net = load_net[param_key] + logger.info( + f'Loading {net.__class__.__name__} model from {load_path}, with param key: [{param_key}].' + ) + # remove unnecessary 'module.' + for k, v in deepcopy(load_net).items(): + if k.startswith('module.'): + load_net[k[7:]] = v + load_net.pop(k) + net.load_state_dict(load_net, strict=strict) + logger.info('load model done.') + return net + + def _train_forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: + frame_features = input['frame_features'] + txt_features = input['txt_features'] + gtscore = input['gtscore'] + preds, attn_weights = self.model(frame_features, txt_features, + frame_features) + return {'loss': self.loss(preds, gtscore)} + + def _inference_forward(self, input: Dict[str, + Tensor]) -> Dict[str, Tensor]: + frame_features = input['frame_features'] + txt_features = input['txt_features'] + y, dec_output = self.model(frame_features, txt_features, + frame_features) + return {'scores': y} + + def forward(self, input: Dict[str, + Tensor]) -> Dict[str, Union[list, Tensor]]: + """return the result by the model + + Args: + input (Dict[str, Tensor]): the preprocessed data + + Returns: + Dict[str, Union[list, Tensor]]: results + """ + for key, value in input.items(): + input[key] = input[key].to(self._device) + if self.training: + return self._train_forward(input) + else: + return self._inference_forward(input) diff --git a/modelscope/models/cv/language_guided_video_summarization/transformer/__init__.py b/modelscope/models/cv/language_guided_video_summarization/transformer/__init__.py new file mode 100755 index 00000000..68dccccf --- /dev/null +++ b/modelscope/models/cv/language_guided_video_summarization/transformer/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .models import ( + Transformer, ) + +else: + _import_structure = { + 'models': [ + 'Transformer', + ] + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/models/cv/language_guided_video_summarization/transformer/layers.py b/modelscope/models/cv/language_guided_video_summarization/transformer/layers.py new file mode 100755 index 00000000..6782c209 --- /dev/null +++ b/modelscope/models/cv/language_guided_video_summarization/transformer/layers.py @@ -0,0 +1,48 @@ +# Part of the implementation is borrowed and modified from attention-is-all-you-need-pytorch, +# publicly available at https://github.com/jadore801120/attention-is-all-you-need-pytorch +import torch +import torch.nn as nn + +from .sub_layers import MultiHeadAttention, PositionwiseFeedForward + + +class EncoderLayer(nn.Module): + """Compose with two layers""" + + def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): + super(EncoderLayer, self).__init__() + self.slf_attn = MultiHeadAttention( + n_head, d_model, d_k, d_v, dropout=dropout) + self.pos_ffn = PositionwiseFeedForward( + d_model, d_inner, dropout=dropout) + + def forward(self, enc_input, slf_attn_mask=None): + enc_output, enc_slf_attn = self.slf_attn( + enc_input, enc_input, enc_input, mask=slf_attn_mask) + enc_output = self.pos_ffn(enc_output) + return enc_output, enc_slf_attn + + +class DecoderLayer(nn.Module): + """Compose with three layers""" + + def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): + super(DecoderLayer, self).__init__() + self.slf_attn = MultiHeadAttention( + n_head, d_model, d_k, d_v, dropout=dropout) + self.enc_attn = MultiHeadAttention( + n_head, d_model, d_k, d_v, dropout=dropout) + self.pos_ffn = PositionwiseFeedForward( + d_model, d_inner, dropout=dropout) + + def forward(self, + dec_input, + enc_output, + slf_attn_mask=None, + dec_enc_attn_mask=None): + dec_output, dec_slf_attn = self.slf_attn( + dec_input, dec_input, dec_input, mask=slf_attn_mask) + dec_output, dec_enc_attn = self.enc_attn( + dec_output, enc_output, enc_output, mask=dec_enc_attn_mask) + dec_output = self.pos_ffn(dec_output) + return dec_output, dec_slf_attn, dec_enc_attn diff --git a/modelscope/models/cv/language_guided_video_summarization/transformer/models.py b/modelscope/models/cv/language_guided_video_summarization/transformer/models.py new file mode 100755 index 00000000..f4ae34ee --- /dev/null +++ b/modelscope/models/cv/language_guided_video_summarization/transformer/models.py @@ -0,0 +1,229 @@ +# Part of the implementation is borrowed and modified from attention-is-all-you-need-pytorch, +# publicly available at https://github.com/jadore801120/attention-is-all-you-need-pytorch + +import numpy as np +import torch +import torch.nn as nn + +from .layers import DecoderLayer, EncoderLayer +from .sub_layers import MultiHeadAttention + + +class PositionalEncoding(nn.Module): + + def __init__(self, d_hid, n_position=200): + super(PositionalEncoding, self).__init__() + + # Not a parameter + self.register_buffer( + 'pos_table', self._get_sinusoid_encoding_table(n_position, d_hid)) + + def _get_sinusoid_encoding_table(self, n_position, d_hid): + """Sinusoid position encoding table""" + + # TODO: make it with torch instead of numpy + + def get_position_angle_vec(position): + return [ + position / np.power(10000, 2 * (hid_j // 2) / d_hid) + for hid_j in range(d_hid) + ] + + sinusoid_table = np.array( + [get_position_angle_vec(pos_i) for pos_i in range(n_position)]) + sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i + sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1 + + return torch.FloatTensor(sinusoid_table).unsqueeze(0) + + def forward(self, x): + return x + self.pos_table[:, :x.size(1)].clone().detach() + + +class Encoder(nn.Module): + """A encoder model with self attention mechanism.""" + + def __init__(self, + d_word_vec=1024, + n_layers=6, + n_head=8, + d_k=64, + d_v=64, + d_model=512, + d_inner=2048, + dropout=0.1, + n_position=200): + + super().__init__() + + self.position_enc = PositionalEncoding( + d_word_vec, n_position=n_position) + self.dropout = nn.Dropout(p=dropout) + self.layer_stack = nn.ModuleList([ + EncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) + for _ in range(n_layers) + ]) + self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) + self.d_model = d_model + + def forward(self, enc_output, return_attns=False): + + enc_slf_attn_list = [] + # -- Forward + enc_output = self.dropout(self.position_enc(enc_output)) + enc_output = self.layer_norm(enc_output) + + for enc_layer in self.layer_stack: + enc_output, enc_slf_attn = enc_layer(enc_output) + enc_slf_attn_list += [enc_slf_attn] if return_attns else [] + + if return_attns: + return enc_output, enc_slf_attn_list + return enc_output, + + +class Decoder(nn.Module): + """A decoder model with self attention mechanism.""" + + def __init__(self, + d_word_vec=1024, + n_layers=6, + n_head=8, + d_k=64, + d_v=64, + d_model=512, + d_inner=2048, + n_position=200, + dropout=0.1): + + super().__init__() + + self.position_enc = PositionalEncoding( + d_word_vec, n_position=n_position) + self.dropout = nn.Dropout(p=dropout) + self.layer_stack = nn.ModuleList([ + DecoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) + for _ in range(n_layers) + ]) + self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) + self.d_model = d_model + + def forward(self, + dec_output, + enc_output, + src_mask=None, + trg_mask=None, + return_attns=False): + + dec_slf_attn_list, dec_enc_attn_list = [], [] + + # -- Forward + dec_output = self.dropout(self.position_enc(dec_output)) + dec_output = self.layer_norm(dec_output) + + for dec_layer in self.layer_stack: + dec_output, dec_slf_attn, dec_enc_attn = dec_layer( + dec_output, + enc_output, + slf_attn_mask=trg_mask, + dec_enc_attn_mask=src_mask) + dec_slf_attn_list += [dec_slf_attn] if return_attns else [] + dec_enc_attn_list += [dec_enc_attn] if return_attns else [] + + if return_attns: + return dec_output, dec_slf_attn_list, dec_enc_attn_list + return dec_output, + + +class Transformer(nn.Module): + """A sequence to sequence model with attention mechanism.""" + + def __init__(self, + num_sentence=7, + txt_atten_head=4, + d_frame_vec=512, + d_model=512, + d_inner=2048, + n_layers=6, + n_head=8, + d_k=256, + d_v=256, + dropout=0.1, + n_position=4000): + + super().__init__() + + self.d_model = d_model + + self.layer_norm_img_src = nn.LayerNorm(d_frame_vec, eps=1e-6) + self.layer_norm_img_trg = nn.LayerNorm(d_frame_vec, eps=1e-6) + self.layer_norm_txt = nn.LayerNorm( + num_sentence * d_frame_vec, eps=1e-6) + + self.linear_txt = nn.Linear( + in_features=num_sentence * d_frame_vec, out_features=d_model) + self.lg_attention = MultiHeadAttention( + n_head=txt_atten_head, d_model=d_model, d_k=d_k, d_v=d_v) + + self.encoder = Encoder( + n_position=n_position, + d_word_vec=d_frame_vec, + d_model=d_model, + d_inner=d_inner, + n_layers=n_layers, + n_head=n_head, + d_k=d_k, + d_v=d_v, + dropout=dropout) + + self.decoder = Decoder( + n_position=n_position, + d_word_vec=d_frame_vec, + d_model=d_model, + d_inner=d_inner, + n_layers=n_layers, + n_head=n_head, + d_k=d_k, + d_v=d_v, + dropout=dropout) + + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + assert d_model == d_frame_vec, 'the dimensions of all module outputs shall be the same.' + + self.linear_1 = nn.Linear(in_features=d_model, out_features=d_model) + self.linear_2 = nn.Linear( + in_features=self.linear_1.out_features, out_features=1) + + self.drop = nn.Dropout(p=0.5) + self.norm_y = nn.LayerNorm(normalized_shape=d_model, eps=1e-6) + self.norm_linear = nn.LayerNorm( + normalized_shape=self.linear_1.out_features, eps=1e-6) + self.relu = nn.ReLU() + self.sigmoid = nn.Sigmoid() + + def forward(self, src_seq, src_txt, trg_seq): + + features_txt = self.linear_txt(src_txt) + atten_seq, txt_attn = self.lg_attention(src_seq, features_txt, + features_txt) + + enc_output, *_ = self.encoder(atten_seq) + dec_output, *_ = self.decoder(trg_seq, enc_output) + + y = self.drop(enc_output) + y = self.norm_y(y) + + # 2-layer NN (Regressor Network) + y = self.linear_1(y) + y = self.relu(y) + y = self.drop(y) + y = self.norm_linear(y) + + y = self.linear_2(y) + y = self.sigmoid(y) + y = y.view(1, -1) + + return y, dec_output diff --git a/modelscope/models/cv/language_guided_video_summarization/transformer/modules.py b/modelscope/models/cv/language_guided_video_summarization/transformer/modules.py new file mode 100755 index 00000000..03ef8eaf --- /dev/null +++ b/modelscope/models/cv/language_guided_video_summarization/transformer/modules.py @@ -0,0 +1,27 @@ +# Part of the implementation is borrowed and modified from attention-is-all-you-need-pytorch, +# publicly available at https://github.com/jadore801120/attention-is-all-you-need-pytorch + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class ScaledDotProductAttention(nn.Module): + """Scaled Dot-Product Attention""" + + def __init__(self, temperature, attn_dropout=0.1): + super().__init__() + self.temperature = temperature + self.dropout = nn.Dropout(attn_dropout) + + def forward(self, q, k, v, mask=None): + + attn = torch.matmul(q / self.temperature, k.transpose(2, 3)) + + if mask is not None: + attn = attn.masked_fill(mask == 0, -1e9) + + attn = self.dropout(F.softmax(attn, dim=-1)) + output = torch.matmul(attn, v) + + return output, attn diff --git a/modelscope/models/cv/language_guided_video_summarization/transformer/sub_layers.py b/modelscope/models/cv/language_guided_video_summarization/transformer/sub_layers.py new file mode 100755 index 00000000..42e10abb --- /dev/null +++ b/modelscope/models/cv/language_guided_video_summarization/transformer/sub_layers.py @@ -0,0 +1,83 @@ +# Part of the implementation is borrowed and modified from attention-is-all-you-need-pytorch, +# publicly available at https://github.com/jadore801120/attention-is-all-you-need-pytorch + +import numpy as np +import torch.nn as nn +import torch.nn.functional as F + +from .modules import ScaledDotProductAttention + + +class MultiHeadAttention(nn.Module): + """Multi-Head Attention module""" + + def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): + super().__init__() + + self.n_head = n_head + self.d_k = d_k + self.d_v = d_v + + self.w_qs = nn.Linear(d_model, n_head * d_k, bias=False) + self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False) + self.w_vs = nn.Linear(d_model, n_head * d_v, bias=False) + self.fc = nn.Linear(n_head * d_v, d_model, bias=False) + + self.attention = ScaledDotProductAttention(temperature=d_k**0.5) + + self.dropout = nn.Dropout(dropout) + self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) + + def forward(self, q, k, v, mask=None): + + d_k, d_v, n_head = self.d_k, self.d_v, self.n_head + sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1) + + residual = q + + # Pass through the pre-attention projection: b x lq x (n*dv) + # Separate different heads: b x lq x n x dv + q = self.w_qs(q).view(sz_b, len_q, n_head, d_k) + k = self.w_ks(k).view(sz_b, len_k, n_head, d_k) + v = self.w_vs(v).view(sz_b, len_v, n_head, d_v) + + # Transpose for attention dot product: b x n x lq x dv + q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2) + + if mask is not None: + mask = mask.unsqueeze(1) # For head axis broadcasting. + + q, attn = self.attention(q, k, v, mask=mask) + + # Transpose to move the head dimension back: b x lq x n x dv + # Combine the last two dimensions to concatenate all the heads together: b x lq x (n*dv) + q = q.transpose(1, 2).contiguous().view(sz_b, len_q, -1) + q = self.dropout(self.fc(q)) + q += residual + + q = self.layer_norm(q) + + return q, attn + + +class PositionwiseFeedForward(nn.Module): + """A two-feed-forward-layer module""" + + def __init__(self, d_in, d_hid, dropout=0.1): + super().__init__() + self.w_1 = nn.Linear(d_in, d_hid) # position-wise + self.w_2 = nn.Linear(d_hid, d_in) # position-wise + self.layer_norm = nn.LayerNorm(d_in, eps=1e-6) + self.dropout = nn.Dropout(dropout) + + def forward(self, x): + + residual = x + + x = self.w_2(F.relu(self.w_1(x))) + x = self.dropout(x) + x += residual + + x = self.layer_norm(x) + + return x diff --git a/modelscope/models/cv/tinynas_classfication/plain_net_utils.py b/modelscope/models/cv/tinynas_classfication/plain_net_utils.py index 844535ed..1f5c8852 100644 --- a/modelscope/models/cv/tinynas_classfication/plain_net_utils.py +++ b/modelscope/models/cv/tinynas_classfication/plain_net_utils.py @@ -39,7 +39,7 @@ class PlainNet(nn.Module): plainnet_struct_txt = self.module_opt.plainnet_struct_txt if plainnet_struct_txt is not None: - with open(plainnet_struct_txt, 'r') as fid: + with open(plainnet_struct_txt, 'r', encoding='utf-8') as fid: the_line = fid.readlines()[0].strip() self.plainnet_struct = the_line pass diff --git a/modelscope/models/multi_modal/clip/bert_tokenizer.py b/modelscope/models/multi_modal/clip/bert_tokenizer.py index 8d356f42..1ee715c9 100644 --- a/modelscope/models/multi_modal/clip/bert_tokenizer.py +++ b/modelscope/models/multi_modal/clip/bert_tokenizer.py @@ -120,7 +120,7 @@ def load_vocab(vocab_file): """Loads a vocabulary file into a dictionary.""" vocab = collections.OrderedDict() index = 0 - with open(vocab_file, 'r') as reader: + with open(vocab_file, 'r', encoding='utf-8') as reader: while True: token = convert_to_unicode(reader.readline()) if not token: diff --git a/modelscope/models/multi_modal/clip/model.py b/modelscope/models/multi_modal/clip/model.py index 9b82e4a1..c2d82dca 100644 --- a/modelscope/models/multi_modal/clip/model.py +++ b/modelscope/models/multi_modal/clip/model.py @@ -523,8 +523,10 @@ class CLIPForMultiModalEmbedding(TorchModel): logger.info(f'Loading text model config from {text_model_config_file}') assert os.path.exists(text_model_config_file) - with open(vision_model_config_file, - 'r') as fv, open(text_model_config_file, 'r') as ft: + with open( + vision_model_config_file, 'r', + encoding='utf-8') as fv,\ + open(text_model_config_file, 'r', encoding='utf-8') as ft: self.model_info = json.load(fv) for k, v in json.load(ft).items(): self.model_info[k] = v diff --git a/modelscope/models/multi_modal/diffusion/model.py b/modelscope/models/multi_modal/diffusion/model.py index 4229391f..5150a0c3 100644 --- a/modelscope/models/multi_modal/diffusion/model.py +++ b/modelscope/models/multi_modal/diffusion/model.py @@ -76,7 +76,7 @@ class DiffusionModel(nn.Module): super(DiffusionModel, self).__init__() # including text and generator config model_config = json.load( - open('{}/model_config.json'.format(model_dir))) + open('{}/model_config.json'.format(model_dir), encoding='utf-8')) # text encoder text_config = model_config['text_config'] @@ -142,7 +142,9 @@ class DiffusionForTextToImageSynthesis(Model): # diffusion process diffusion_params = json.load( - open('{}/diffusion_config.json'.format(model_dir))) + open( + '{}/diffusion_config.json'.format(model_dir), + encoding='utf-8')) self.diffusion_generator = make_diffusion( **diffusion_params['generator_config']) self.diffusion_upsampler_256 = make_diffusion( diff --git a/modelscope/models/multi_modal/diffusion/structbert.py b/modelscope/models/multi_modal/diffusion/structbert.py index d5d678ed..16c1407f 100644 --- a/modelscope/models/multi_modal/diffusion/structbert.py +++ b/modelscope/models/multi_modal/diffusion/structbert.py @@ -130,7 +130,7 @@ class BertConfig(object): @classmethod def from_json_file(cls, json_file): """Constructs a `BertConfig` from a json file of parameters.""" - with open(json_file, 'r') as reader: + with open(json_file, 'r', encoding='utf-8') as reader: text = reader.read() return cls.from_dict(json.loads(text)) diff --git a/modelscope/models/multi_modal/diffusion/tokenizer.py b/modelscope/models/multi_modal/diffusion/tokenizer.py index 82c09661..e2c951b1 100644 --- a/modelscope/models/multi_modal/diffusion/tokenizer.py +++ b/modelscope/models/multi_modal/diffusion/tokenizer.py @@ -67,7 +67,7 @@ def load_vocab(vocab_file): """Loads a vocabulary file into a dictionary.""" vocab = collections.OrderedDict() index = 0 - with open(vocab_file, 'r') as reader: + with open(vocab_file, 'r', encoding='utf-8') as reader: while True: token = convert_to_unicode(reader.readline()) if not token: diff --git a/modelscope/models/multi_modal/gemm/gemm_base.py b/modelscope/models/multi_modal/gemm/gemm_base.py index 806c469c..c77a682a 100644 --- a/modelscope/models/multi_modal/gemm/gemm_base.py +++ b/modelscope/models/multi_modal/gemm/gemm_base.py @@ -522,7 +522,9 @@ class GEMMModel(nn.Module): def __init__(self, model_dir): super().__init__() - with open('{}/encoder_config.json'.format(model_dir), 'r') as f: + with open( + '{}/encoder_config.json'.format(model_dir), 'r', + encoding='utf-8') as f: model_config = json.loads(f.read()) model_name = list(model_config.keys())[0] config_args = model_config[model_name] diff --git a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py index 0cc040c6..813f750e 100644 --- a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py +++ b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py @@ -35,7 +35,9 @@ class VideoCLIPForMultiModalEmbedding(TorchModel): def __init__(self, model_dir, **kwargs): super().__init__(model_dir=model_dir, **kwargs) # model config parameters - with open(f'{model_dir}/{ModelFile.CONFIGURATION}', 'r') as json_file: + with open( + f'{model_dir}/{ModelFile.CONFIGURATION}', 'r', + encoding='utf-8') as json_file: model_config = json.load(json_file) model_config = model_config['paras'] model_config['model_dir'] = model_dir diff --git a/modelscope/models/multi_modal/mplug/configuration_mplug.py b/modelscope/models/multi_modal/mplug/configuration_mplug.py index 914678c5..946ebb82 100644 --- a/modelscope/models/multi_modal/mplug/configuration_mplug.py +++ b/modelscope/models/multi_modal/mplug/configuration_mplug.py @@ -111,6 +111,6 @@ class MPlugConfig(PretrainedConfig): @classmethod def from_yaml_file(cls, yaml_file: Union[str, os.PathLike]) -> Dict[str, Any]: - with open(yaml_file, 'r') as reader: + with open(yaml_file, 'r', encoding='utf-8') as reader: config_dict = yaml.load(reader, Loader=yaml.Loader) return cls(**config_dict) diff --git a/modelscope/models/multi_modal/multi_stage_diffusion/model.py b/modelscope/models/multi_modal/multi_stage_diffusion/model.py index 59bd837d..58fd6698 100644 --- a/modelscope/models/multi_modal/multi_stage_diffusion/model.py +++ b/modelscope/models/multi_modal/multi_stage_diffusion/model.py @@ -50,7 +50,8 @@ class UnCLIP(nn.Module): def __init__(self, model_dir): super(UnCLIP, self).__init__() self.model_dir = model_dir - self.config = json.load(open(f'{model_dir}/{ModelFile.CONFIGURATION}')) + self.config = json.load( + open(f'{model_dir}/{ModelFile.CONFIGURATION}', encoding='utf-8')) # modules self.clip = CLIP(**self.config['clip']).fp16() diff --git a/modelscope/models/multi_modal/ofa_for_all_tasks.py b/modelscope/models/multi_modal/ofa_for_all_tasks.py index fc578b25..77dff54a 100644 --- a/modelscope/models/multi_modal/ofa_for_all_tasks.py +++ b/modelscope/models/multi_modal/ofa_for_all_tasks.py @@ -312,7 +312,7 @@ class OfaForAllTasks(TorchModel): if self.cfg.model.get('answer2label', None): ans2label_file = osp.join(self.model_dir, self.cfg.model.answer2label) - with open(ans2label_file, 'r') as reader: + with open(ans2label_file, 'r', encoding='utf-8') as reader: self.ans2label_dict = json.load(reader) def save_pretrained(self, diff --git a/modelscope/models/nlp/mglm/arguments.py b/modelscope/models/nlp/mglm/arguments.py index 13b3aeab..4fa33c65 100755 --- a/modelscope/models/nlp/mglm/arguments.py +++ b/modelscope/models/nlp/mglm/arguments.py @@ -743,7 +743,7 @@ def get_args(): if hasattr(args, 'deepspeed' ) and args.deepspeed and args.deepspeed_config is not None: - with open(args.deepspeed_config) as file: + with open(args.deepspeed_config, encoding='utf-8') as file: deepspeed_config = json.load(file) if 'train_micro_batch_size_per_gpu' in deepspeed_config: args.batch_size = deepspeed_config[ diff --git a/modelscope/models/nlp/mglm/data_utils/corpora.py b/modelscope/models/nlp/mglm/data_utils/corpora.py index 7c6f58f8..cf756c0a 100755 --- a/modelscope/models/nlp/mglm/data_utils/corpora.py +++ b/modelscope/models/nlp/mglm/data_utils/corpora.py @@ -156,7 +156,7 @@ class DataReader: def read_input_to_queue(): for path in paths: print_rank_0(f'Start reading {path}') - with open(path) as file: + with open(path, encoding='utf-8') as file: items = json.load(file) for item in items: task_queue.put(item) diff --git a/modelscope/models/nlp/mglm/data_utils/datasets.py b/modelscope/models/nlp/mglm/data_utils/datasets.py index 777b7d43..39ffaea3 100644 --- a/modelscope/models/nlp/mglm/data_utils/datasets.py +++ b/modelscope/models/nlp/mglm/data_utils/datasets.py @@ -511,12 +511,12 @@ class json_dataset(data.Dataset): def load_json_stream(self, load_path): if not self.loose_json: - jsons = json.load(open(load_path, 'r')) + jsons = json.load(open(load_path, 'r', encoding='utf-8')) generator = iter(jsons) else: def gen_helper(): - with open(load_path, 'r') as f: + with open(load_path, 'r', encoding='utf-8') as f: for row in f: yield json.loads(row) diff --git a/modelscope/models/nlp/mglm/data_utils/extraction.py b/modelscope/models/nlp/mglm/data_utils/extraction.py index 53027e4f..da062f34 100644 --- a/modelscope/models/nlp/mglm/data_utils/extraction.py +++ b/modelscope/models/nlp/mglm/data_utils/extraction.py @@ -29,7 +29,9 @@ with open(output_path, 'w') as output: print(filename) article_lines = [] article_open = False - with open(filename, mode='r', newline='\n') as file: + with open( + filename, mode='r', newline='\n', + encoding='utf-8') as file: for line in file: line = line.rstrip() if ' List[InputExample]: examples = [] - with open(path) as f: + with open(path, encoding='utf-8') as f: reader = csv.reader(f, delimiter=',') for idx, row in enumerate(reader): label, headline, body = row @@ -1209,7 +1209,7 @@ class YelpPolarityProcessor(DataProcessor): def _create_examples(path: str, set_type: str) -> List[InputExample]: examples = [] - with open(path) as f: + with open(path, encoding='utf-8') as f: reader = csv.reader(f, delimiter=',') for idx, row in enumerate(reader): label, body = row @@ -1419,7 +1419,7 @@ class SquadProcessor(DataProcessor): @staticmethod def _create_examples(path: str, set_type: str) -> List[InputExample]: examples = [] - with open(path) as f: + with open(path, encoding='utf-8') as f: data = json.load(f)['data'] for idx, passage in enumerate(data): diff --git a/modelscope/models/nlp/mglm/tasks/superglue/pvp.py b/modelscope/models/nlp/mglm/tasks/superglue/pvp.py index ff394172..e149f503 100644 --- a/modelscope/models/nlp/mglm/tasks/superglue/pvp.py +++ b/modelscope/models/nlp/mglm/tasks/superglue/pvp.py @@ -538,7 +538,7 @@ class PVP(ABC): dict) # type: Dict[int, Dict[str, List[str]]] current_pattern_id = None - with open(path, 'r') as fh: + with open(path, 'r', encoding='utf-8') as fh: for line in fh.read().splitlines(): if line.isdigit(): current_pattern_id = int(line) diff --git a/modelscope/models/nlp/mglm/utils.py b/modelscope/models/nlp/mglm/utils.py index 2bfcf8c0..0e781189 100644 --- a/modelscope/models/nlp/mglm/utils.py +++ b/modelscope/models/nlp/mglm/utils.py @@ -77,7 +77,7 @@ def print_and_save_args(args, verbose=True, log_dir=None): with open(json_file, 'w') as output: json.dump(vars(args), output, sort_keys=True) if args.deepspeed and args.deepspeed_config is not None: - with open(args.deepspeed_config) as file: + with open(args.deepspeed_config, encoding='utf-8') as file: deepspeed_config = json.load(file) deepspeed_json_file = os.path.join(log_dir, 'config_gpt_large.json') @@ -324,7 +324,7 @@ def get_checkpoint_iteration(load_path): print_rank_0(' will not load any checkpoints and will start from ' 'random') return load_path, 0, False, False - with open(tracker_filename, 'r') as f: + with open(tracker_filename, 'r', encoding='utf-8') as f: metastring = f.read().strip() release = metastring == 'release' # try: diff --git a/modelscope/models/science/unifold/data/residue_constants.py b/modelscope/models/science/unifold/data/residue_constants.py index beebfe89..2701ee38 100644 --- a/modelscope/models/science/unifold/data/residue_constants.py +++ b/modelscope/models/science/unifold/data/residue_constants.py @@ -443,7 +443,7 @@ def load_stereo_chemical_props(): stereo_chemical_props_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'stereo_chemical_props.txt') - with open(stereo_chemical_props_path, 'rt') as f: + with open(stereo_chemical_props_path, 'rt', encoding='utf-8') as f: stereo_chemical_props = f.read() lines_iter = iter(stereo_chemical_props.splitlines()) # Load bond lengths. diff --git a/modelscope/models/science/unifold/dataset.py b/modelscope/models/science/unifold/dataset.py index 29e1a8b0..f14c2ef7 100644 --- a/modelscope/models/science/unifold/dataset.py +++ b/modelscope/models/science/unifold/dataset.py @@ -250,7 +250,7 @@ class UnifoldDataset(UnicoreDataset): self.path = data_path def load_json(filename): - return json.load(open(filename, 'r')) + return json.load(open(filename, 'r', encoding='utf-8')) sample_weight = load_json( os.path.join(self.path, @@ -400,7 +400,8 @@ class UnifoldMultimerDataset(UnifoldDataset): self.pdb_assembly = json.load( open( os.path.join(self.data_path, - json_prefix + 'pdb_assembly.json'))) + json_prefix + 'pdb_assembly.json'), + encoding='utf-8')) self.pdb_chains = self.get_chains(self.inverse_multi_label) self.monomer_feature_path = os.path.join(self.data_path, 'pdb_features') diff --git a/modelscope/models/science/unifold/msa/pipeline.py b/modelscope/models/science/unifold/msa/pipeline.py index b7889bff..8037e50e 100644 --- a/modelscope/models/science/unifold/msa/pipeline.py +++ b/modelscope/models/science/unifold/msa/pipeline.py @@ -99,7 +99,7 @@ def run_msa_tool( f.write(result[msa_format]) else: logging.warning('Reading MSA from file %s', msa_out_path) - with open(msa_out_path, 'r') as f: + with open(msa_out_path, 'r', encoding='utf-8') as f: result = {msa_format: f.read()} return result @@ -153,7 +153,7 @@ class DataPipeline: def process(self, input_fasta_path: str, msa_output_dir: str) -> FeatureDict: """Runs alignment tools on the input sequence and creates features.""" - with open(input_fasta_path) as f: + with open(input_fasta_path, encoding='utf-8') as f: input_fasta_str = f.read() input_seqs, input_descs = parsers.parse_fasta(input_fasta_str) if len(input_seqs) != 1: diff --git a/modelscope/models/science/unifold/msa/templates.py b/modelscope/models/science/unifold/msa/templates.py index fe3bcef9..d1ff8cf1 100644 --- a/modelscope/models/science/unifold/msa/templates.py +++ b/modelscope/models/science/unifold/msa/templates.py @@ -155,7 +155,7 @@ def _parse_release_dates(path: str) -> Mapping[str, datetime.datetime]: """Parses release dates file, returns a mapping from PDBs to release dates.""" if path.endswith('txt'): release_dates = {} - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: for line in f: pdb_id, date = line.split(':') date = date.strip() diff --git a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py b/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py index 68cbf918..49991b11 100644 --- a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py +++ b/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py @@ -106,14 +106,14 @@ class MovieSceneSegmentationDataset(TorchTaskDataset): self.tmpl = '{}/shot_{}_img_{}.jpg' # video_id, shot_id, shot_num if not self.test_mode: - with open(self.ann_file) as f: + with open(self.ann_file, encoding='utf-8') as f: self.anno_data = json.load(f) self.vidsid2label = { f"{it['video_id']}_{it['shot_id']}": it['boundary_label'] for it in self.anno_data } else: - with open(self.ann_file) as f: + with open(self.ann_file, encoding='utf-8') as f: self.anno_data = json.load(f) def init_sampler(self, cfg): diff --git a/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py b/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py index c90351e9..8b6d22a4 100644 --- a/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py +++ b/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py @@ -146,7 +146,7 @@ class ReferringVideoObjectSegmentationDataset(TorchTaskDataset): saved_annotations_file_path = osp.join( root_path, f'sentences_single_frame_{subset}_annotations.json') if osp.exists(saved_annotations_file_path): - with open(saved_annotations_file_path, 'r') as f: + with open(saved_annotations_file_path, 'r', encoding='utf-8') as f: text_annotations_by_frame = [tuple(a) for a in json.load(f)] return text_annotations_by_frame elif (distributed and dist.get_rank() == 0) or not distributed: @@ -203,7 +203,7 @@ class ReferringVideoObjectSegmentationDataset(TorchTaskDataset): json.dump(text_annotations_by_frame, f) if distributed: dist.barrier() - with open(saved_annotations_file_path, 'r') as f: + with open(saved_annotations_file_path, 'r', encoding='utf-8') as f: text_annotations_by_frame = [tuple(a) for a in json.load(f)] return text_annotations_by_frame @@ -267,8 +267,10 @@ def get_text_annotations_gt(root_path, subset): osp.join(root_path, 'Release/videoset.csv'), header=None) # 'vid', 'label', 'start_time', 'end_time', 'height', 'width', 'total_frames', 'annotated_frames', 'subset' a2d_data_info.columns = ['vid', '', '', '', '', '', '', '', 'subset'] - with open(osp.join(root_path, 'text_annotations/missed_videos.txt'), - 'r') as f: + with open( + osp.join(root_path, 'text_annotations/missed_videos.txt'), + 'r', + encoding='utf-8') as f: unused_videos = f.read().splitlines() subsets = {'train': 0, 'test': 1} # filter unused videos and videos which do not belong to our train/test subset: diff --git a/modelscope/msdatasets/task_datasets/video_summarization_dataset.py b/modelscope/msdatasets/task_datasets/video_summarization_dataset.py index 34eb0450..02639be8 100644 --- a/modelscope/msdatasets/task_datasets/video_summarization_dataset.py +++ b/modelscope/msdatasets/task_datasets/video_summarization_dataset.py @@ -26,7 +26,7 @@ class VideoSummarizationDataset(TorchTaskDataset): self.list_n_frames = [] self.list_positions = [] - with open(self.split_filename) as f: + with open(self.split_filename, encoding='utf-8') as f: data = json.loads(f.read()) for i, split in enumerate(data): if i == self.split_index: diff --git a/modelscope/pipelines/audio/asr_inference_pipeline.py b/modelscope/pipelines/audio/asr_inference_pipeline.py index 6a4864bf..da339083 100644 --- a/modelscope/pipelines/audio/asr_inference_pipeline.py +++ b/modelscope/pipelines/audio/asr_inference_pipeline.py @@ -116,7 +116,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): } if self.framework == Frameworks.torch: - config_file = open(inputs['asr_model_config']) + config_file = open(inputs['asr_model_config'], encoding='utf-8') root = yaml.full_load(config_file) config_file.close() frontend_conf = None diff --git a/modelscope/pipelines/cv/__init__.py b/modelscope/pipelines/cv/__init__.py index 97cd8761..5e9220bd 100644 --- a/modelscope/pipelines/cv/__init__.py +++ b/modelscope/pipelines/cv/__init__.py @@ -59,6 +59,7 @@ if TYPE_CHECKING: from .mtcnn_face_detection_pipeline import MtcnnFaceDetectionPipelin from .hand_static_pipeline import HandStaticPipeline from .referring_video_object_segmentation_pipeline import ReferringVideoObjectSegmentationPipeline + from .language_guided_video_summarization_pipeline import LanguageGuidedVideoSummarizationPipeline else: _import_structure = { @@ -132,6 +133,9 @@ else: 'referring_video_object_segmentation_pipeline': [ 'ReferringVideoObjectSegmentationPipeline' ], + 'language_guided_video_summarization_pipeline': [ + 'LanguageGuidedVideoSummarizationPipeline' + ] } import sys diff --git a/modelscope/pipelines/cv/animal_recognition_pipeline.py b/modelscope/pipelines/cv/animal_recognition_pipeline.py index 671a5b4c..6d395a46 100644 --- a/modelscope/pipelines/cv/animal_recognition_pipeline.py +++ b/modelscope/pipelines/cv/animal_recognition_pipeline.py @@ -109,7 +109,7 @@ class AnimalRecognitionPipeline(Pipeline): def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: label_mapping_path = osp.join(self.local_path, 'label_mapping.txt') - with open(label_mapping_path, 'r') as f: + with open(label_mapping_path, 'r', encoding='utf-8') as f: label_mapping = f.readlines() score = torch.max(inputs['outputs']) inputs = { diff --git a/modelscope/pipelines/cv/general_recognition_pipeline.py b/modelscope/pipelines/cv/general_recognition_pipeline.py index 80f6f88a..c1136882 100644 --- a/modelscope/pipelines/cv/general_recognition_pipeline.py +++ b/modelscope/pipelines/cv/general_recognition_pipeline.py @@ -110,7 +110,7 @@ class GeneralRecognitionPipeline(Pipeline): def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: label_mapping_path = osp.join(self.local_path, 'meta_info.txt') - with open(label_mapping_path, 'r') as f: + with open(label_mapping_path, 'r', encoding='utf-8') as f: label_mapping = f.readlines() score = torch.max(inputs['outputs']) inputs = { diff --git a/modelscope/pipelines/cv/language_guided_video_summarization_pipeline.py b/modelscope/pipelines/cv/language_guided_video_summarization_pipeline.py new file mode 100755 index 00000000..059dadb7 --- /dev/null +++ b/modelscope/pipelines/cv/language_guided_video_summarization_pipeline.py @@ -0,0 +1,250 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import os +import os.path as osp +import random +import shutil +import tempfile +from typing import Any, Dict + +import clip +import cv2 +import numpy as np +import torch +from PIL import Image + +from modelscope.metainfo import Pipelines +from modelscope.models.cv.language_guided_video_summarization import \ + ClipItVideoSummarization +from modelscope.models.cv.language_guided_video_summarization.summarizer import ( + extract_video_features, video_features_to_txt) +from modelscope.models.cv.video_summarization import summary_format +from modelscope.models.cv.video_summarization.summarizer import ( + generate_summary, get_change_points) +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import Input, Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.utils.config import Config +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.language_guided_video_summarization, + module_name=Pipelines.language_guided_video_summarization) +class LanguageGuidedVideoSummarizationPipeline(Pipeline): + + def __init__(self, model: str, **kwargs): + """ + use `model` to create a language guided video summarization pipeline for prediction + Args: + model: model id on modelscope hub. + """ + super().__init__(model=model, auto_collate=False, **kwargs) + logger.info(f'loading model from {model}') + self.model_dir = model + + self.tmp_dir = kwargs.get('tmp_dir', None) + if self.tmp_dir is None: + self.tmp_dir = tempfile.TemporaryDirectory().name + + config_path = osp.join(model, ModelFile.CONFIGURATION) + logger.info(f'loading config from {config_path}') + self.cfg = Config.from_file(config_path) + + self.clip_model, self.clip_preprocess = clip.load( + 'ViT-B/32', + device=self.device, + download_root=os.path.join(self.model_dir, 'clip')) + + self.clipit_model = ClipItVideoSummarization(model) + self.clipit_model = self.clipit_model.to(self.device).eval() + + logger.info('load model done') + + def preprocess(self, input: Input) -> Dict[str, Any]: + if not isinstance(input, tuple): + raise TypeError(f'input should be a str,' + f' but got {type(input)}') + + video_path, sentences = input + + if not os.path.exists(self.tmp_dir): + os.makedirs(self.tmp_dir) + + frames = [] + picks = [] + cap = cv2.VideoCapture(video_path) + self.fps = cap.get(cv2.CAP_PROP_FPS) + self.frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) + frame_idx = 0 + # extract 1 frame every 15 frames in the video and save the frame index + while (cap.isOpened()): + ret, frame = cap.read() + if not ret: + break + if frame_idx % 15 == 0: + frames.append(frame) + picks.append(frame_idx) + frame_idx += 1 + n_frame = frame_idx + + if sentences is None: + logger.info('input sentences is none, using sentences from video!') + + tmp_path = os.path.join(self.tmp_dir, 'tmp') + i3d_flow_path = os.path.join(self.model_dir, 'i3d/i3d_flow.pt') + i3d_rgb_path = os.path.join(self.model_dir, 'i3d/i3d_rgb.pt') + kinetics_class_labels = os.path.join(self.model_dir, + 'i3d/label_map.txt') + pwc_path = os.path.join(self.model_dir, 'i3d/pwc_net.pt') + vggish_model_path = os.path.join(self.model_dir, + 'vggish/vggish_model.ckpt') + vggish_pca_path = os.path.join(self.model_dir, + 'vggish/vggish_pca_params.npz') + + device = torch.device( + 'cuda' if torch.cuda.is_available() else 'cpu') + i3d_feats = extract_video_features( + video_path=video_path, + feature_type='i3d', + tmp_path=tmp_path, + i3d_flow_path=i3d_flow_path, + i3d_rgb_path=i3d_rgb_path, + kinetics_class_labels=kinetics_class_labels, + pwc_path=pwc_path, + vggish_model_path=vggish_model_path, + vggish_pca_path=vggish_pca_path, + extraction_fps=2, + device=device) + rgb = i3d_feats['rgb'] + flow = i3d_feats['flow'] + + device = '/gpu:0' if torch.cuda.is_available() else '/cpu:0' + vggish = extract_video_features( + video_path=video_path, + feature_type='vggish', + tmp_path=tmp_path, + i3d_flow_path=i3d_flow_path, + i3d_rgb_path=i3d_rgb_path, + kinetics_class_labels=kinetics_class_labels, + pwc_path=pwc_path, + vggish_model_path=vggish_model_path, + vggish_pca_path=vggish_pca_path, + extraction_fps=2, + device=device) + audio = vggish['audio'] + + duration_in_secs = float(self.frame_count) / self.fps + + txt = video_features_to_txt( + duration_in_secs=duration_in_secs, + pretrained_cap_model_path=os.path.join( + self.model_dir, 'bmt/sample/best_cap_model.pt'), + prop_generator_model_path=os.path.join( + self.model_dir, 'bmt/sample/best_prop_model.pt'), + features={ + 'rgb': rgb, + 'flow': flow, + 'audio': audio + }, + device_id=0) + sentences = [item['sentence'] for item in txt] + + clip_image_features = [] + for frame in frames: + x = self.clip_preprocess( + Image.fromarray(cv2.cvtColor( + frame, cv2.COLOR_BGR2RGB))).unsqueeze(0).to(self.device) + with torch.no_grad(): + f = self.clip_model.encode_image(x).squeeze(0).cpu().numpy() + clip_image_features.append(f) + + clip_txt_features = [] + for sentence in sentences: + text_input = clip.tokenize(sentence).to(self.device) + with torch.no_grad(): + text_feature = self.clip_model.encode_text(text_input).squeeze( + 0).cpu().numpy() + clip_txt_features.append(text_feature) + clip_txt_features = self.sample_txt_feateures(clip_txt_features) + clip_txt_features = np.array(clip_txt_features).reshape((1, -1)) + + result = { + 'video_name': video_path, + 'clip_image_features': np.array(clip_image_features), + 'clip_txt_features': np.array(clip_txt_features), + 'n_frame': n_frame, + 'picks': np.array(picks) + } + return result + + def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + clip_image_features = input['clip_image_features'] + clip_txt_features = input['clip_txt_features'] + clip_image_features = self.norm_feature(clip_image_features) + clip_txt_features = self.norm_feature(clip_txt_features) + + change_points, n_frame_per_seg = get_change_points( + clip_image_features, input['n_frame']) + + summary = self.inference(clip_image_features, clip_txt_features, + input['n_frame'], input['picks'], + change_points) + + output = summary_format(summary, self.fps) + + return {OutputKeys.OUTPUT: output} + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + if os.path.exists(self.tmp_dir): + shutil.rmtree(self.tmp_dir) + return inputs + + def inference(self, clip_image_features, clip_txt_features, n_frames, + picks, change_points): + clip_image_features = torch.from_numpy( + np.array(clip_image_features, np.float32)).unsqueeze(0) + clip_txt_features = torch.from_numpy( + np.array(clip_txt_features, np.float32)).unsqueeze(0) + picks = np.array(picks, np.int32) + + with torch.no_grad(): + results = self.clipit_model( + dict( + frame_features=clip_image_features, + txt_features=clip_txt_features)) + scores = results['scores'] + if not scores.device.type == 'cpu': + scores = scores.cpu() + scores = scores.squeeze(0).numpy().tolist() + summary = generate_summary([change_points], [scores], [n_frames], + [picks])[0] + + return summary.tolist() + + def sample_txt_feateures(self, feat, num=7): + while len(feat) < num: + feat.append(feat[-1]) + idxes = list(np.arange(0, len(feat))) + samples_idx = [] + for ii in range(num): + idx = random.choice(idxes) + while idx in samples_idx: + idx = random.choice(idxes) + samples_idx.append(idx) + samples_idx.sort() + + samples = [] + for idx in samples_idx: + samples.append(feat[idx]) + return samples + + def norm_feature(self, frames_feat): + for ii in range(len(frames_feat)): + frame_feat = frames_feat[ii] + frames_feat[ii] = frame_feat / np.linalg.norm(frame_feat) + frames_feat = frames_feat.reshape((frames_feat.shape[0], -1)) + return frames_feat diff --git a/modelscope/pipelines/cv/ocr_recognition_pipeline.py b/modelscope/pipelines/cv/ocr_recognition_pipeline.py index e81467a1..d90f8db6 100644 --- a/modelscope/pipelines/cv/ocr_recognition_pipeline.py +++ b/modelscope/pipelines/cv/ocr_recognition_pipeline.py @@ -49,7 +49,7 @@ class OCRRecognitionPipeline(Pipeline): self.infer_model.load_state_dict( torch.load(model_path, map_location=self.device)) self.labelMapping = dict() - with open(label_path, 'r') as f: + with open(label_path, 'r', encoding='utf-8') as f: lines = f.readlines() cnt = 2 for line in lines: diff --git a/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py b/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py index cfbf2607..f0a717a5 100644 --- a/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py +++ b/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py @@ -138,6 +138,19 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline): video_np = rearrange(self.video, 't c h w -> t h w c').numpy() / 255.0 + # set font for text query in output video + if self.model.cfg.pipeline.output_font: + try: + font = ImageFont.truetype( + font=self.model.cfg.pipeline.output_font, + size=self.model.cfg.pipeline.output_font_size) + except OSError: + logger.error('can\'t open resource %s, load default font' + % self.model.cfg.pipeline.output_font) + font = ImageFont.load_default() + else: + font = ImageFont.load_default() + # del video pred_masks_per_frame = rearrange( torch.stack(inputs), 'q t 1 h w -> t q h w').numpy() @@ -158,12 +171,6 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline): W, H = vid_frame.size draw = ImageDraw.Draw(vid_frame) - if self.model.cfg.pipeline.output_font: - font = ImageFont.truetype( - font=self.model.cfg.pipeline.output_font, - size=self.model.cfg.pipeline.output_font_size) - else: - font = ImageFont.load_default() for i, (text_query, color) in enumerate( zip(self.text_queries, colors), start=1): w, h = draw.textsize(text_query, font=font) @@ -173,9 +180,6 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline): fill=tuple(color) + (255, ), font=font) masked_video.append(np.array(vid_frame)) - print(type(vid_frame)) - print(type(masked_video[0])) - print(masked_video[0].shape) # generate and save the output clip: assert self.model.cfg.pipeline.output_path diff --git a/modelscope/pipelines/cv/tinynas_classification_pipeline.py b/modelscope/pipelines/cv/tinynas_classification_pipeline.py index a470e58b..4dfd5c51 100644 --- a/modelscope/pipelines/cv/tinynas_classification_pipeline.py +++ b/modelscope/pipelines/cv/tinynas_classification_pipeline.py @@ -82,7 +82,7 @@ class TinynasClassificationPipeline(Pipeline): def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: label_mapping_path = osp.join(self.path, 'label_map.txt') - f = open(label_mapping_path) + f = open(label_mapping_path, encoding='utf-8') content = f.read() f.close() label_dict = eval(content) diff --git a/modelscope/pipelines/cv/video_category_pipeline.py b/modelscope/pipelines/cv/video_category_pipeline.py index e4c73649..4c52205e 100644 --- a/modelscope/pipelines/cv/video_category_pipeline.py +++ b/modelscope/pipelines/cv/video_category_pipeline.py @@ -36,7 +36,7 @@ class VideoCategoryPipeline(Pipeline): super().__init__(model=model, **kwargs) config_path = osp.join(self.model, ModelFile.CONFIGURATION) logger.info(f'loading configuration from {config_path}') - with open(config_path, 'r') as f: + with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) self.frame_num = config['frame_num'] self.level_1_num = config['level_1_num'] diff --git a/modelscope/pipelines/nlp/table_question_answering_pipeline.py b/modelscope/pipelines/nlp/table_question_answering_pipeline.py index b75a8153..bde78196 100644 --- a/modelscope/pipelines/nlp/table_question_answering_pipeline.py +++ b/modelscope/pipelines/nlp/table_question_answering_pipeline.py @@ -231,19 +231,6 @@ class TableQuestionAnsweringPipeline(Pipeline): header_ids = table['header_id'] + ['null'] sql = result['sql'] - str_sel_list, sql_sel_list = [], [] - for idx, sel in enumerate(sql['sel']): - header_name = header_names[sel] - header_id = '`%s`.`%s`' % (table['table_id'], header_ids[sel]) - if sql['agg'][idx] == 0: - str_sel_list.append(header_name) - sql_sel_list.append(header_id) - else: - str_sel_list.append(self.agg_ops[sql['agg'][idx]] + '(' - + header_name + ')') - sql_sel_list.append(self.agg_ops[sql['agg'][idx]] + '(' - + header_id + ')') - str_cond_list, sql_cond_list = [], [] where_conds, orderby_conds = [], [] for cond in sql['conds']: @@ -285,9 +272,34 @@ class TableQuestionAnsweringPipeline(Pipeline): if is_in: str_orderby += ' LIMIT %d' % (limit_num) sql_orderby += ' LIMIT %d' % (limit_num) + # post process null column + for idx, sel in enumerate(sql['sel']): + if sel == len(header_ids) - 1: + primary_sel = 0 + for index, attrib in enumerate(table['header_attribute']): + if attrib == 'PRIMARY': + primary_sel = index + break + if primary_sel not in sql['sel']: + sql['sel'][idx] = primary_sel + else: + del sql['sel'][idx] else: str_orderby = '' + str_sel_list, sql_sel_list = [], [] + for idx, sel in enumerate(sql['sel']): + header_name = header_names[sel] + header_id = '`%s`.`%s`' % (table['table_id'], header_ids[sel]) + if sql['agg'][idx] == 0: + str_sel_list.append(header_name) + sql_sel_list.append(header_id) + else: + str_sel_list.append(self.agg_ops[sql['agg'][idx]] + '(' + + header_name + ')') + sql_sel_list.append(self.agg_ops[sql['agg'][idx]] + '(' + + header_id + ')') + if len(str_cond_list) != 0 and len(str_orderby) != 0: final_str = 'SELECT %s FROM %s WHERE %s ORDER BY %s' % ( ', '.join(str_sel_list), table['table_name'], str_where_conds, diff --git a/modelscope/pipelines/science/protein_structure_pipeline.py b/modelscope/pipelines/science/protein_structure_pipeline.py index 3dc51c72..1ef9aa29 100644 --- a/modelscope/pipelines/science/protein_structure_pipeline.py +++ b/modelscope/pipelines/science/protein_structure_pipeline.py @@ -59,8 +59,9 @@ def load_feature_for_one_target( else: uniprot_msa_dir = data_folder - sequence_ids = open(os.path.join(data_folder, - 'chains.txt')).readline().split() + sequence_ids = open( + os.path.join(data_folder, 'chains.txt'), + encoding='utf-8').readline().split() if symmetry_group is None: batch, _ = load_and_process( diff --git a/modelscope/preprocessors/audio.py b/modelscope/preprocessors/audio.py index 1e659218..f02381ad 100644 --- a/modelscope/preprocessors/audio.py +++ b/modelscope/preprocessors/audio.py @@ -15,7 +15,7 @@ from modelscope.utils.constant import Fields def load_kaldi_feature_transform(filename): - fp = open(filename, 'r') + fp = open(filename, 'r', encoding='utf-8') all_str = fp.read() pos1 = all_str.find('AddShift') pos2 = all_str.find('[', pos1) diff --git a/modelscope/preprocessors/kws.py b/modelscope/preprocessors/kws.py index 6f09d545..33847702 100644 --- a/modelscope/preprocessors/kws.py +++ b/modelscope/preprocessors/kws.py @@ -78,7 +78,7 @@ class WavToLists(Preprocessor): assert os.path.exists( inputs['config_path']), 'model config yaml file does not exist' - config_file = open(inputs['config_path']) + config_file = open(inputs['config_path'], encoding='utf-8') root = yaml.full_load(config_file) config_file.close() diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py index 3a3ae820..52cde61c 100644 --- a/modelscope/preprocessors/multi_modal.py +++ b/modelscope/preprocessors/multi_modal.py @@ -145,8 +145,9 @@ class CLIPPreprocessor(Preprocessor): self.image_resolution = kwargs['resolution'] else: self.image_resolution = json.load( - open('{}/vision_model_config.json'.format( - model_dir)))['image_resolution'] + open( + '{}/vision_model_config.json'.format(model_dir), + encoding='utf-8'))['image_resolution'] self.img_preprocess = self._build_image_transform() # key mapping # specify the input keys, compatible with training and inference whose key names may be different diff --git a/modelscope/preprocessors/nlp/nlp_base.py b/modelscope/preprocessors/nlp/nlp_base.py index 45efc6e7..7fe28eb5 100644 --- a/modelscope/preprocessors/nlp/nlp_base.py +++ b/modelscope/preprocessors/nlp/nlp_base.py @@ -59,8 +59,10 @@ class NLPBasePreprocessor(Preprocessor, ABC): self.use_fast = False elif self.use_fast is None and os.path.isfile( os.path.join(model_dir, 'tokenizer_config.json')): - with open(os.path.join(model_dir, 'tokenizer_config.json'), - 'r') as f: + with open( + os.path.join(model_dir, 'tokenizer_config.json'), + 'r', + encoding='utf-8') as f: json_config = json.load(f) self.use_fast = json_config.get('use_fast') self.use_fast = False if self.use_fast is None else self.use_fast diff --git a/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py b/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py index 2923157e..5aa662fc 100644 --- a/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py +++ b/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py @@ -35,7 +35,10 @@ class DialogIntentPredictionPreprocessor(Preprocessor): self.model_dir, config=self.config) self.categories = None - with open(os.path.join(self.model_dir, 'categories.json'), 'r') as f: + with open( + os.path.join(self.model_dir, 'categories.json'), + 'r', + encoding='utf-8') as f: self.categories = json.load(f) assert len(self.categories) == 77 diff --git a/modelscope/preprocessors/nlp/space/dst_processors.py b/modelscope/preprocessors/nlp/space/dst_processors.py index 1f9920a9..1b6159b5 100644 --- a/modelscope/preprocessors/nlp/space/dst_processors.py +++ b/modelscope/preprocessors/nlp/space/dst_processors.py @@ -184,7 +184,7 @@ class multiwoz22Processor(DSTProcessor): # Loads the dialogue_acts.json and returns a list # of slot-value pairs. def load_acts(self, input_file): - with open(input_file) as f: + with open(input_file, encoding='utf-8') as f: acts = json.load(f) s_dict = {} for d in acts: diff --git a/modelscope/preprocessors/nlp/space/fields/gen_field.py b/modelscope/preprocessors/nlp/space/fields/gen_field.py index 1d1879fe..20b2c48a 100644 --- a/modelscope/preprocessors/nlp/space/fields/gen_field.py +++ b/modelscope/preprocessors/nlp/space/fields/gen_field.py @@ -359,12 +359,14 @@ class MultiWOZBPETextField(BPETextField): test_list = [ line.strip().lower() for line in open( os.path.join(kwargs['data_dir'], 'testListFile.json'), - 'r').readlines() + 'r', + encoding='utf-8').readlines() ] dev_list = [ line.strip().lower() for line in open( os.path.join(kwargs['data_dir'], 'valListFile.json'), - 'r').readlines() + 'r', + encoding='utf-8').readlines() ] self.dev_files, self.test_files = {}, {} diff --git a/modelscope/preprocessors/nlp/space/tokenizer.py b/modelscope/preprocessors/nlp/space/tokenizer.py index 1bd0ce11..798ce3b7 100644 --- a/modelscope/preprocessors/nlp/space/tokenizer.py +++ b/modelscope/preprocessors/nlp/space/tokenizer.py @@ -531,7 +531,7 @@ class GPT2Tokenizer(object): special_tokens=None, max_len=None): self.max_len = max_len if max_len is not None else int(1e12) - self.encoder = json.load(open(vocab_file)) + self.encoder = json.load(open(vocab_file, encoding='utf-8')) self.decoder = {v: k for k, v in self.encoder.items()} self.errors = errors # how to handle errors in decoding self.byte_encoder = bytes_to_unicode() diff --git a/modelscope/preprocessors/nlp/space_T_cn/fields/database.py b/modelscope/preprocessors/nlp/space_T_cn/fields/database.py index 2fef8d7e..1300cc95 100644 --- a/modelscope/preprocessors/nlp/space_T_cn/fields/database.py +++ b/modelscope/preprocessors/nlp/space_T_cn/fields/database.py @@ -20,9 +20,9 @@ class Database: self.connection_obj = sqlite3.connect( ':memory:', check_same_thread=False) self.type_dict = {'text': 'TEXT', 'number': 'INT', 'date': 'TEXT'} - self.tables = self.init_tables(table_file_path=table_file_path) self.syn_dict = self.init_syn_dict( syn_dict_file_path=syn_dict_file_path) + self.tables = self.init_tables(table_file_path=table_file_path) def __del__(self): if self.is_use_sqlite: @@ -32,12 +32,12 @@ class Database: tables = {} lines = [] if type(table_file_path) == str: - with open(table_file_path, 'r') as fo: + with open(table_file_path, 'r', encoding='utf-8') as fo: for line in fo: lines.append(line) elif type(table_file_path) == list: for path in table_file_path: - with open(path, 'r') as fo: + with open(path, 'r', encoding='utf-8') as fo: for line in fo: lines.append(line) else: @@ -75,6 +75,10 @@ class Database: continue word = str(cell).strip().lower() trie_set[ii].insert(word, word) + if word in self.syn_dict.keys(): + for term in self.syn_dict[word]: + if term.strip() != '': + trie_set[ii].insert(term, word) table['value_trie'] = trie_set diff --git a/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py b/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py index 00c7bcd7..0ebd857e 100644 --- a/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py +++ b/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py @@ -45,7 +45,7 @@ class ConversationalTextToSqlPreprocessor(Preprocessor): and torch.cuda.is_available() else 'cpu' self.processor = None self.table_path = os.path.join(self.model_dir, 'tables.json') - self.tables = json.load(open(self.table_path, 'r')) + self.tables = json.load(open(self.table_path, 'r', encoding='utf-8')) self.output_tables = None self.path_cache = [] self.graph_processor = GraphProcessor() @@ -89,7 +89,7 @@ class ConversationalTextToSqlPreprocessor(Preprocessor): 'local_db_path'] not in self.path_cache: self.path_cache.append(data['local_db_path']) path = os.path.join(data['local_db_path'], 'tables.json') - self.tables = json.load(open(path, 'r')) + self.tables = json.load(open(path, 'r', encoding='utf-8')) self.processor.db_dir = os.path.join(data['local_db_path'], 'db') self.output_tables = process_tables(self.processor, self.tables) Example.configuration( diff --git a/modelscope/preprocessors/ofa/base.py b/modelscope/preprocessors/ofa/base.py index 55b3895d..e5c30ff8 100644 --- a/modelscope/preprocessors/ofa/base.py +++ b/modelscope/preprocessors/ofa/base.py @@ -76,7 +76,7 @@ class OfaBasePreprocessor: self.constraint_trie = None if self.cfg.model.get('answer2label', None): ans2label_file = osp.join(model_dir, self.cfg.model.answer2label) - with open(ans2label_file, 'r') as reader: + with open(ans2label_file, 'r', encoding='utf-8') as reader: ans2label_dict = json.load(reader) self.ans2label = ans2label_dict self.label2ans = {v: k for k, v in self.ans2label.items()} diff --git a/modelscope/preprocessors/science/uni_fold.py b/modelscope/preprocessors/science/uni_fold.py index 2a44c885..ae72433c 100644 --- a/modelscope/preprocessors/science/uni_fold.py +++ b/modelscope/preprocessors/science/uni_fold.py @@ -201,7 +201,7 @@ def run_mmseqs2( a3m_lines = {} for a3m_file in a3m_files: update_M, M = True, None - with open(a3m_file, 'r') as f: + with open(a3m_file, 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: if len(line) > 0: diff --git a/modelscope/trainers/nlp/csanmt_translation_trainer.py b/modelscope/trainers/nlp/csanmt_translation_trainer.py index c93599c7..08a3a351 100644 --- a/modelscope/trainers/nlp/csanmt_translation_trainer.py +++ b/modelscope/trainers/nlp/csanmt_translation_trainer.py @@ -1,6 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os.path as osp +import time from typing import Dict, Optional import tensorflow as tf @@ -122,8 +123,7 @@ class CsanmtTranslationTrainer(BaseTrainer): self.params['scale_l1'] = self.cfg['train']['scale_l1'] self.params['scale_l2'] = self.cfg['train']['scale_l2'] self.params['train_max_len'] = self.cfg['train']['train_max_len'] - self.params['max_training_steps'] = self.cfg['train'][ - 'max_training_steps'] + self.params['num_of_epochs'] = self.cfg['train']['num_of_epochs'] self.params['save_checkpoints_steps'] = self.cfg['train'][ 'save_checkpoints_steps'] self.params['num_of_samples'] = self.cfg['train']['num_of_samples'] @@ -144,14 +144,15 @@ class CsanmtTranslationTrainer(BaseTrainer): vocab_src = osp.join(self.model_dir, self.params['vocab_src']) vocab_trg = osp.join(self.model_dir, self.params['vocab_trg']) + epoch = 0 iteration = 0 with self._session.as_default() as tf_session: while True: - iteration += 1 - if iteration >= self.params['max_training_steps']: + epoch += 1 + if epoch >= self.params['num_of_epochs']: break - + tf.logging.info('%s: Epoch %i' % (__name__, epoch)) train_input_fn = input_fn( train_src, train_trg, @@ -160,36 +161,44 @@ class CsanmtTranslationTrainer(BaseTrainer): batch_size_words=self.params['train_batch_size_words'], max_len=self.params['train_max_len'], num_gpus=self.params['num_gpus'] - if self.params['num_gpus'] > 0 else 1, + if self.params['num_gpus'] > 1 else 1, is_train=True, session=tf_session, - iteration=iteration) + epoch=epoch) features, labels = train_input_fn - features_batch, labels_batch = tf_session.run( - [features, labels]) - - feed_dict = { - self.source_wids: features_batch, - self.target_wids: labels_batch - } - sess_outputs = self._session.run( - self.output, feed_dict=feed_dict) - loss_step = sess_outputs['loss'] - logger.info('Iteration: {}, step loss: {:.6f}'.format( - iteration, loss_step)) - - if iteration % self.params['save_checkpoints_steps'] == 0: - tf.logging.info('%s: Saving model on step: %d.' % - (__name__, iteration)) - ck_path = self.model_dir + 'model.ckpt' - self.model_saver.save( - tf_session, - ck_path, - global_step=tf.train.get_global_step()) - - tf.logging.info('%s: NMT training completed at time: %s.') + try: + while True: + features_batch, labels_batch = tf_session.run( + [features, labels]) + iteration += 1 + feed_dict = { + self.source_wids: features_batch, + self.target_wids: labels_batch + } + sess_outputs = self._session.run( + self.output, feed_dict=feed_dict) + loss_step = sess_outputs['loss'] + logger.info('Iteration: {}, step loss: {:.6f}'.format( + iteration, loss_step)) + + if iteration % self.params[ + 'save_checkpoints_steps'] == 0: + tf.logging.info('%s: Saving model on step: %d.' % + (__name__, iteration)) + ck_path = self.model_dir + 'model.ckpt' + self.model_saver.save( + tf_session, + ck_path, + global_step=tf.train.get_global_step()) + + except tf.errors.OutOfRangeError: + tf.logging.info('epoch %d end!' % (epoch)) + + tf.logging.info( + '%s: NMT training completed at time: %s.' % + (__name__, time.asctime(time.localtime(time.time())))) def evaluate(self, checkpoint_path: Optional[str] = None, @@ -222,7 +231,7 @@ def input_fn(src_file, num_gpus=1, is_train=True, session=None, - iteration=None): + epoch=None): src_vocab = tf.lookup.StaticVocabularyTable( tf.lookup.TextFileInitializer( src_vocab_file, @@ -291,7 +300,7 @@ def input_fn(src_file, if is_train: session.run(iterator.initializer) - if iteration == 1: + if epoch == 1: session.run(tf.tables_initializer()) return features, labels diff --git a/modelscope/trainers/nlp/space/eval.py b/modelscope/trainers/nlp/space/eval.py index f315ff07..2db40cae 100644 --- a/modelscope/trainers/nlp/space/eval.py +++ b/modelscope/trainers/nlp/space/eval.py @@ -771,7 +771,8 @@ class CamRestEvaluator(GenericEvaluator): def get_entities(self, entity_path): entities_flat = [] entitiy_to_slot_dict = {} - raw_entities = json.loads(open(entity_path).read().lower()) + raw_entities = json.loads( + open(entity_path, encoding='utf-8').read().lower()) for s in raw_entities['informable']: entities_flat.extend(raw_entities['informable'][s]) for v in raw_entities['informable'][s]: diff --git a/modelscope/utils/audio/audio_utils.py b/modelscope/utils/audio/audio_utils.py index 32e2fa54..1ae5c8d2 100644 --- a/modelscope/utils/audio/audio_utils.py +++ b/modelscope/utils/audio/audio_utils.py @@ -47,7 +47,7 @@ def update_conf(origin_config_file, new_config_file, conf_item: [str, str]): else: return None - with open(origin_config_file) as f: + with open(origin_config_file, encoding='utf-8') as f: lines = f.readlines() with open(new_config_file, 'w') as f: for line in lines: diff --git a/modelscope/utils/config.py b/modelscope/utils/config.py index e46da7df..b3512251 100644 --- a/modelscope/utils/config.py +++ b/modelscope/utils/config.py @@ -178,7 +178,7 @@ class Config: if cfg_text: text = cfg_text elif filename: - with open(filename, 'r') as f: + with open(filename, 'r', encoding='utf-8') as f: text = f.read() else: text = '' diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index f0a97dbd..b1bccc4c 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -80,6 +80,7 @@ class CVTasks(object): video_embedding = 'video-embedding' virtual_try_on = 'virtual-try-on' movie_scene_segmentation = 'movie-scene-segmentation' + language_guided_video_summarization = 'language-guided-video-summarization' # video segmentation referring_video_object_segmentation = 'referring-video-object-segmentation' diff --git a/modelscope/utils/hub.py b/modelscope/utils/hub.py index 105b3ffa..93cc20e2 100644 --- a/modelscope/utils/hub.py +++ b/modelscope/utils/hub.py @@ -124,7 +124,7 @@ def parse_label_mapping(model_dir): label2id = None label_path = os.path.join(model_dir, ModelFile.LABEL_MAPPING) if os.path.exists(label_path): - with open(label_path) as f: + with open(label_path, encoding='utf-8') as f: label_mapping = json.load(f) label2id = {name: idx for name, idx in label_mapping.items()} diff --git a/modelscope/utils/nlp/space/clean_dataset.py b/modelscope/utils/nlp/space/clean_dataset.py index 2c971b10..cbd0ebde 100644 --- a/modelscope/utils/nlp/space/clean_dataset.py +++ b/modelscope/utils/nlp/space/clean_dataset.py @@ -59,7 +59,9 @@ def clean_text(data_dir, text): text) # 'abc.xyz' -> 'abc . xyz' text = re.sub(r'(\w+)\.\.? ', r'\1 . ', text) # if 'abc. ' -> 'abc . ' - with open(os.path.join(data_dir, 'mapping.pair'), 'r') as fin: + with open( + os.path.join(data_dir, 'mapping.pair'), 'r', + encoding='utf-8') as fin: for line in fin.readlines(): fromx, tox = line.replace('\n', '').split('\t') text = ' ' + text + ' ' diff --git a/modelscope/utils/nlp/space/db_ops.py b/modelscope/utils/nlp/space/db_ops.py index d1d14ef9..27198b23 100644 --- a/modelscope/utils/nlp/space/db_ops.py +++ b/modelscope/utils/nlp/space/db_ops.py @@ -15,7 +15,9 @@ class MultiWozDB(object): self.dbs = {} self.sql_dbs = {} for domain in all_domains: - with open(os.path.join(db_dir, db_paths[domain]), 'r') as f: + with open( + os.path.join(db_dir, db_paths[domain]), 'r', + encoding='utf-8') as f: self.dbs[domain] = json.loads(f.read().lower()) def oneHotVector(self, domain, num): diff --git a/modelscope/utils/nlp/space/utils.py b/modelscope/utils/nlp/space/utils.py index 56e67671..70cb03a0 100644 --- a/modelscope/utils/nlp/space/utils.py +++ b/modelscope/utils/nlp/space/utils.py @@ -146,9 +146,9 @@ class MultiWOZVocab(object): def load_vocab(self, vocab_path): self._freq_dict = json.loads( - open(vocab_path + '.freq.json', 'r').read()) + open(vocab_path + '.freq.json', 'r', encoding='utf-8').read()) self._word2idx = json.loads( - open(vocab_path + '.word2idx.json', 'r').read()) + open(vocab_path + '.word2idx.json', 'r', encoding='utf-8').read()) self._idx2word = {} for w, idx in self._word2idx.items(): self._idx2word[idx] = w diff --git a/requirements/cv.txt b/requirements/cv.txt index f29b296b..43eba7f9 100644 --- a/requirements/cv.txt +++ b/requirements/cv.txt @@ -1,5 +1,7 @@ albumentations>=1.0.3 av>=9.2.0 +bmt_clipit>=1.0 +clip>=1.0 easydict fairscale>=0.4.1 fastai>=1.0.51 @@ -19,6 +21,7 @@ moviepy>=1.0.3 networkx>=2.5 numba onnxruntime>=1.10 +opencv-python pai-easycv>=0.6.3.9 pandas psutil @@ -32,3 +35,4 @@ tf_slim timm>=0.4.9 torchmetrics>=0.6.2 torchvision +videofeatures_clipit>=1.0 diff --git a/requirements/framework.txt b/requirements/framework.txt index a86c0cc5..52601579 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -1,6 +1,6 @@ addict attrs -# version beyond 2.5.2 introduces compatbility issue and is being resolved +# version beyond 2.5.2 introduces compatibility issue and is being resolved datasets<=2.5.2 easydict einops @@ -8,7 +8,6 @@ filelock>=3.3.0 gast>=0.2.2 jsonplus numpy -opencv-python oss2 Pillow>=6.2.0 # for pyarrow 9.0.0 event_loop core dump diff --git a/setup.py b/setup.py index eff2f8ba..d709dadc 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ def get_hash(): def get_version(): - with open(version_file, 'r') as f: + with open(version_file, 'r', encoding='utf-8') as f: exec(compile(f.read(), version_file, 'exec')) return locals()['__version__'] @@ -109,7 +109,7 @@ def parse_requirements(fname='requirements.txt', with_version=True): yield info def parse_require_file(fpath): - with open(fpath, 'r') as f: + with open(fpath, 'r', encoding='utf-8') as f: for line in f.readlines(): line = line.strip() if line.startswith('http'): diff --git a/tests/pipelines/test_language_guided_video_summarization.py b/tests/pipelines/test_language_guided_video_summarization.py new file mode 100755 index 00000000..0f06d4f2 --- /dev/null +++ b/tests/pipelines/test_language_guided_video_summarization.py @@ -0,0 +1,49 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import os +import shutil +import tempfile +import unittest + +import torch + +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.demo_utils import DemoCompatibilityCheck +from modelscope.utils.test_utils import test_level + + +class LanguageGuidedVideoSummarizationTest(unittest.TestCase, + DemoCompatibilityCheck): + + def setUp(self) -> None: + self.task = Tasks.language_guided_video_summarization + self.model_id = 'damo/cv_clip-it_video-summarization_language-guided_en' + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_modelhub(self): + video_path = 'data/test/videos/video_category_test_video.mp4' + # input can be sentences such as sentences=['phone', 'hand'], or sentences=None + sentences = None + summarization_pipeline = pipeline( + Tasks.language_guided_video_summarization, model=self.model_id) + result = summarization_pipeline((video_path, sentences)) + + print(f'video summarization output: \n{result}.') + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_modelhub_default_model(self): + video_path = 'data/test/videos/video_category_test_video.mp4' + summarization_pipeline = pipeline( + Tasks.language_guided_video_summarization) + result = summarization_pipeline(video_path) + + print(f'video summarization output:\n {result}.') + + @unittest.skip('demo compatibility test is only enabled on a needed-basis') + def test_demo_compatibility(self): + self.compatibility_check() + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/pipelines/test_referring_video_object_segmentation.py b/tests/pipelines/test_referring_video_object_segmentation.py index 4d8206b3..3e81d9c3 100644 --- a/tests/pipelines/test_referring_video_object_segmentation.py +++ b/tests/pipelines/test_referring_video_object_segmentation.py @@ -14,7 +14,7 @@ class ReferringVideoObjectSegmentationTest(unittest.TestCase, self.task = Tasks.referring_video_object_segmentation self.model_id = 'damo/cv_swin-t_referring_video-object-segmentation' - @unittest.skip('skip since the model is set to private for now') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_referring_video_object_segmentation(self): input_location = 'data/test/videos/referring_video_object_segmentation_test_video.mp4' text_queries = [ @@ -31,7 +31,7 @@ class ReferringVideoObjectSegmentationTest(unittest.TestCase, else: raise ValueError('process error') - @unittest.skip('skip since the model is set to private for now') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_referring_video_object_segmentation_with_default_task(self): input_location = 'data/test/videos/referring_video_object_segmentation_test_video.mp4' text_queries = [ diff --git a/tests/pipelines/test_table_question_answering.py b/tests/pipelines/test_table_question_answering.py index 825d8f23..9faed993 100644 --- a/tests/pipelines/test_table_question_answering.py +++ b/tests/pipelines/test_table_question_answering.py @@ -24,13 +24,10 @@ def tableqa_tracking_and_print_results_with_history( 'utterance': [ '有哪些风险类型?', '风险类型有多少种?', - '珠江流域的小(2)型水库的库容总量是多少?', + '珠江流域的小型水库的库容总量是多少?', '那平均值是多少?', '那水库的名称呢?', '换成中型的呢?', - '枣庄营业厅的电话', - '那地址呢?', - '枣庄营业厅的电话和地址', ] } for p in pipelines: @@ -55,9 +52,7 @@ def tableqa_tracking_and_print_results_without_history( 'utterance': [ '有哪些风险类型?', '风险类型有多少种?', - '珠江流域的小(2)型水库的库容总量是多少?', - '枣庄营业厅的电话', - '枣庄营业厅的电话和地址', + '珠江流域的小型水库的库容总量是多少?', ] } for p in pipelines: @@ -77,13 +72,10 @@ def tableqa_tracking_and_print_results_with_tableid( 'utterance': [ ['有哪些风险类型?', 'fund'], ['风险类型有多少种?', 'reservoir'], - ['珠江流域的小(2)型水库的库容总量是多少?', 'reservoir'], + ['珠江流域的小型水库的库容总量是多少?', 'reservoir'], ['那平均值是多少?', 'reservoir'], ['那水库的名称呢?', 'reservoir'], ['换成中型的呢?', 'reservoir'], - ['枣庄营业厅的电话', 'business'], - ['那地址呢?', 'business'], - ['枣庄营业厅的电话和地址', 'business'], ], } for p in pipelines: @@ -157,7 +149,7 @@ class TableQuestionAnswering(unittest.TestCase): os.path.join(model.model_dir, 'databases')) ], syn_dict_file_path=os.path.join(model.model_dir, 'synonym.txt'), - is_use_sqlite=False) + is_use_sqlite=True) preprocessor = TableQuestionAnsweringPreprocessor( model_dir=model.model_dir, db=db) pipelines = [ diff --git a/tests/run.py b/tests/run.py index b286ecb5..0759379f 100644 --- a/tests/run.py +++ b/tests/run.py @@ -247,7 +247,7 @@ def run_in_subprocess(args): test_suite_env_map[test_suite_file] = 'default' if args.run_config is not None and Path(args.run_config).exists(): - with open(args.run_config) as f: + with open(args.run_config, encoding='utf-8') as f: run_config = yaml.load(f, Loader=yaml.FullLoader) if 'isolated' in run_config: isolated_cases = run_config['isolated'] diff --git a/tests/run_config.yaml b/tests/run_config.yaml index d51e2606..faee2869 100644 --- a/tests/run_config.yaml +++ b/tests/run_config.yaml @@ -12,6 +12,7 @@ isolated: # test cases that may require excessive anmount of GPU memory, which - test_segmentation_pipeline.py - test_movie_scene_segmentation.py - test_image_inpainting.py + - test_mglm_text_summarization.py envs: default: # default env, case not in other env will in default, pytorch. diff --git a/tests/trainers/easycv/test_easycv_trainer.py b/tests/trainers/easycv/test_easycv_trainer.py index 4bd63c55..5d714097 100644 --- a/tests/trainers/easycv/test_easycv_trainer.py +++ b/tests/trainers/easycv/test_easycv_trainer.py @@ -109,7 +109,7 @@ class EasyCVTrainerTestSingleGpu(unittest.TestCase): json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) self.assertEqual(len(json_files), 1) - with open(json_files[0], 'r') as f: + with open(json_files[0], 'r', encoding='utf-8') as f: lines = [i.strip() for i in f.readlines()] self.assertDictContainsSubset( @@ -185,7 +185,7 @@ class EasyCVTrainerTestMultiGpus(DistributedTestCase): json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) self.assertEqual(len(json_files), 1) - with open(json_files[0], 'r') as f: + with open(json_files[0], 'r', encoding='utf-8') as f: lines = [i.strip() for i in f.readlines()] self.assertDictContainsSubset( diff --git a/tests/trainers/test_image_denoise_trainer.py b/tests/trainers/test_image_denoise_trainer.py index b742dcae..3b5882bd 100644 --- a/tests/trainers/test_image_denoise_trainer.py +++ b/tests/trainers/test_image_denoise_trainer.py @@ -62,7 +62,7 @@ class ImageDenoiseTrainerTest(unittest.TestCase): trainer.train() results_files = os.listdir(self.tmp_dir) self.assertIn(f'{trainer.timestamp}.log.json', results_files) - for i in range(2): + for i in range(1): self.assertIn(f'epoch_{i+1}.pth', results_files) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @@ -73,13 +73,13 @@ class ImageDenoiseTrainerTest(unittest.TestCase): model=model, train_dataset=self.dataset_train, eval_dataset=self.dataset_val, - max_epochs=2, + max_epochs=1, work_dir=self.tmp_dir) trainer = build_trainer(default_args=kwargs) trainer.train() results_files = os.listdir(self.tmp_dir) self.assertIn(f'{trainer.timestamp}.log.json', results_files) - for i in range(2): + for i in range(1): self.assertIn(f'epoch_{i+1}.pth', results_files) diff --git a/tests/trainers/test_referring_video_object_segmentation_trainer.py b/tests/trainers/test_referring_video_object_segmentation_trainer.py index 7b03eb4d..fb152954 100644 --- a/tests/trainers/test_referring_video_object_segmentation_trainer.py +++ b/tests/trainers/test_referring_video_object_segmentation_trainer.py @@ -7,8 +7,8 @@ import zipfile from modelscope.hub.snapshot_download import snapshot_download from modelscope.metainfo import Trainers -from modelscope.models.cv.movie_scene_segmentation import \ - MovieSceneSegmentationModel +from modelscope.models.cv.referring_video_object_segmentation import \ + ReferringVideoObjectSegmentation from modelscope.msdatasets import MsDataset from modelscope.trainers import build_trainer from modelscope.utils.config import Config, ConfigDict @@ -46,7 +46,6 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase): dataset_name=train_data_cfg.name, split=train_data_cfg.split, cfg=train_data_cfg.cfg, - namespace='damo', test_mode=train_data_cfg.test_mode) assert next( iter(self.train_dataset.config_kwargs['split_config'].values())) @@ -55,14 +54,13 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase): dataset_name=test_data_cfg.name, split=test_data_cfg.split, cfg=test_data_cfg.cfg, - namespace='damo', test_mode=test_data_cfg.test_mode) assert next( iter(self.test_dataset.config_kwargs['split_config'].values())) self.max_epochs = max_epochs - @unittest.skip('skip since the model is set to private for now') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_trainer(self): kwargs = dict( model=self.model_id, @@ -77,11 +75,11 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase): results_files = os.listdir(trainer.work_dir) self.assertIn(f'{trainer.timestamp}.log.json', results_files) - @unittest.skip('skip since the model is set to private for now') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_trainer_with_model_and_args(self): cache_path = snapshot_download(self.model_id) - model = MovieSceneSegmentationModel.from_pretrained(cache_path) + model = ReferringVideoObjectSegmentation.from_pretrained(cache_path) kwargs = dict( cfg_file=os.path.join(cache_path, ModelFile.CONFIGURATION), model=model, diff --git a/tests/trainers/test_trainer.py b/tests/trainers/test_trainer.py index c73a56a3..5d466ee0 100644 --- a/tests/trainers/test_trainer.py +++ b/tests/trainers/test_trainer.py @@ -248,7 +248,7 @@ class TrainerTest(unittest.TestCase): results_files = os.listdir(self.tmp_dir) json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json') - with open(json_file, 'r') as f: + with open(json_file, 'r', encoding='utf-8') as f: lines = [i.strip() for i in f.readlines()] self.assertDictContainsSubset( { @@ -367,7 +367,7 @@ class TrainerTest(unittest.TestCase): trainer.train() results_files = os.listdir(self.tmp_dir) json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json') - with open(json_file, 'r') as f: + with open(json_file, 'r', encoding='utf-8') as f: lines = [i.strip() for i in f.readlines()] self.assertDictContainsSubset( { diff --git a/tests/trainers/test_trainer_gpu.py b/tests/trainers/test_trainer_gpu.py index 0176704a..c003f3c9 100644 --- a/tests/trainers/test_trainer_gpu.py +++ b/tests/trainers/test_trainer_gpu.py @@ -142,7 +142,7 @@ class TrainerTestSingleGpu(unittest.TestCase): json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) self.assertEqual(len(json_files), 1) - with open(json_files[0], 'r') as f: + with open(json_files[0], 'r', encoding='utf-8') as f: lines = [i.strip() for i in f.readlines()] self.assertDictContainsSubset( { @@ -236,7 +236,7 @@ class TrainerTestMultiGpus(DistributedTestCase): json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) self.assertEqual(len(json_files), 1) - with open(json_files[0], 'r') as f: + with open(json_files[0], 'r', encoding='utf-8') as f: lines = [i.strip() for i in f.readlines()] self.assertDictContainsSubset( @@ -320,7 +320,7 @@ class TrainerTestMultiGpus(DistributedTestCase): json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) self.assertEqual(len(json_files), 1) - with open(json_files[0], 'r') as f: + with open(json_files[0], 'r', encoding='utf-8') as f: lines = [i.strip() for i in f.readlines()] print(results_files, lines) diff --git a/tests/trainers/test_translation_trainer.py b/tests/trainers/test_translation_trainer.py index 71bed241..7be23145 100644 --- a/tests/trainers/test_translation_trainer.py +++ b/tests/trainers/test_translation_trainer.py @@ -6,11 +6,17 @@ from modelscope.utils.test_utils import test_level class TranslationTest(unittest.TestCase): - model_id = 'damo/nlp_csanmt_translation_zh2en' @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_run_with_model_name(self): - trainer = CsanmtTranslationTrainer(model=self.model_id) + def test_run_with_model_name_for_en2zh(self): + model_id = 'damo/nlp_csanmt_translation_en2zh' + trainer = CsanmtTranslationTrainer(model=model_id) + trainer.train() + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name_for_en2fr(self): + model_id = 'damo/nlp_csanmt_translation_en2fr' + trainer = CsanmtTranslationTrainer(model=model_id) trainer.train()