Merge pull request #29 from modelscope/merge_master_internal_1116

Merge master internal to master
2 years ago · 2e41326849
--- a/.dev_scripts/ci_container_test.sh
+++ b/.dev_scripts/ci_container_test.sh
@@ -2,13 +2,7 @@ echo "Testing envs"
 printenv
 echo "ENV END"
 if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
    awk -F: '/^[^#]/ { print $1 }' requirements/framework.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
    awk -F: '/^[^#]/ { print $1 }' requirements/audio.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
    awk -F: '/^[^#]/ { print $1 }' requirements/cv.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
    awk -F: '/^[^#]/ { print $1 }' requirements/multi-modal.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
    awk -F: '/^[^#]/ { print $1 }' requirements/nlp.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
    pip install -r requirements/tests.txt

    git config --global --add safe.directory /Maas-lib
    git config --global user.email tmp
    git config --global user.name tmp.com
@@ -19,9 +13,22 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then
        pre-commit run -c .pre-commit-config_local.yaml --all-files
        if [ $? -ne 0 ]; then
            echo "linter test failed, please run 'pre-commit run --all-files' to check"
            echo "From the repository folder"
            echo "Run 'pip install -r requirements/tests.txt' install test dependencies."
            echo "Run 'pre-commit install' install pre-commit hooks."
            echo "Finally run linter with command: 'pre-commit run --all-files' to check."
            echo "Ensure there is no failure!!!!!!!!"
            exit -1
        fi
    fi

    awk -F: '/^[^#]/ { print $1 }' requirements/framework.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
    awk -F: '/^[^#]/ { print $1 }' requirements/audio.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
    awk -F: '/^[^#]/ { print $1 }' requirements/cv.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
    awk -F: '/^[^#]/ { print $1 }' requirements/multi-modal.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
    awk -F: '/^[^#]/ { print $1 }' requirements/nlp.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
    awk -F: '/^[^#]/ { print $1 }' requirements/science.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
     pip install -r requirements/tests.txt
    # test with install
    python setup.py install
 else
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,5 +1,5 @@
 repos:
  - repo: https://gitlab.com/pycqa/flake8.git
  - repo: https://github.com/PyCQA/flake8
    rev: 4.0.0
    hooks:
      - id: flake8
--- a/docker/Dockerfile.ubuntu
+++ b/docker/Dockerfile.ubuntu
@@ -7,6 +7,7 @@ ENV PATH="${CONDA_DIR}/bin:${PATH}"
 ENV arch=x86_64
 SHELL ["/bin/bash", "-c"]
 COPY docker/rcfiles /tmp/resources
 COPY docker/jupyter_plugins /tmp/resources/jupyter_plugins
 RUN apt-get update && apt-get install -y --reinstall ca-certificates && \
    cp /tmp/resources/ubuntu20.04_sources.tuna /etc/apt/sources.list && \
    apt-get update && \
@@ -26,7 +27,7 @@ ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN.UTF-8 LC_ALL=zh_CN.UTF-8

 #install and config python
 ARG PYTHON_VERSION=3.7.13
 RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${arch}.sh -O ./miniconda.sh && \
 RUN wget --quiet https://mirrors.aliyun.com/anaconda/miniconda/Miniconda3-latest-Linux-${arch}.sh -O ./miniconda.sh && \
    /bin/bash  miniconda.sh -b -p /opt/conda && \
    rm  -f miniconda.sh && \
    ln  -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
@@ -34,8 +35,8 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${a
    cp /tmp/resources/conda.tuna  ~/.condarc && \
    source /root/.bashrc && \
    conda install --yes python==${PYTHON_VERSION} && \
    pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
    pip config set install.trusted-host pypi.tuna.tsinghua.edu.cn
    pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
    pip config set install.trusted-host mirrors.aliyun.com

 ARG USE_GPU=True

@@ -70,16 +71,38 @@ RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir -r /var/modelscope/cv.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
    pip install --no-cache-dir -r /var/modelscope/multi-modal.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
    pip install --no-cache-dir -r /var/modelscope/nlp.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
    pip install --no-cache-dir -r /var/modelscope/science.txt -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html && \
    pip cache purge

 # default shell bash
 ENV SHELL=/bin/bash

 # install special package
 RUN pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 datasets==2.1.0 numpy==1.18.5 ipykernel fairseq fasttext https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/xtcocotools-1.12-cp37-cp37m-linux_x86_64.whl

 RUN if [ "$USE_GPU" = "True" ] ; then \
        pip install --no-cache-dir dgl-cu113 dglgo -f https://data.dgl.ai/wheels/repo.html; \
    else \
        pip install --no-cache-dir dgl dglgo -f https://data.dgl.ai/wheels/repo.html; \
    fi

 # install  jupyter plugin
 RUN mkdir -p /root/.local/share/jupyter/labextensions/ && \
    cp -r  /tmp/resources/jupyter_plugins/*  /root/.local/share/jupyter/labextensions/

 COPY docker/scripts/modelscope_env_init.sh /usr/local/bin/ms_env_init.sh
 RUN pip install --no-cache-dir https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/xtcocotools-1.12-cp37-cp37m-linux_x86_64.whl --force

 # for uniford
 COPY docker/scripts/install_unifold.sh /tmp/install_unifold.sh
 RUN if [ "$USE_GPU" = "True" ] ; then \
        bash /tmp/install_unifold.sh; \
    else \
     echo 'cpu unsupport uniford'; \
    fi

 RUN pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 datasets==2.1.0 numpy==1.18.5 ipykernel fairseq fasttext deepspeed
 COPY docker/scripts/install_apex.sh /tmp/install_apex.sh
 RUN if [ "$USE_GPU" = "True" ] ; then \
        bash /tmp/install_apex.sh; \
    else \
     echo 'cpu unsupport uniford'; \
    fi
--- a/docker/jupyter_plugins/jupyterlab_active_log/package.json
+++ b/docker/jupyter_plugins/jupyterlab_active_log/package.json
@@ -0,0 +1,99 @@
 {
  "name": "jupyterlab_active_log",
  "version": "0.1.0",
  "description": "A JupyterLab extension.",
  "keywords": [
    "jupyter",
    "jupyterlab",
    "jupyterlab-extension"
  ],
  "homepage": "https://github.com/github_username/jupyterlab_active_log",
  "bugs": {
    "url": "https://github.com/github_username/jupyterlab_active_log/issues"
  },
  "license": "BSD-3-Clause",
  "files": [
    "lib/**/*.{d.ts,eot,gif,html,jpg,js,js.map,json,png,svg,woff2,ttf}",
    "style/**/*.{css,js,eot,gif,html,jpg,json,png,svg,woff2,ttf}"
  ],
  "main": "lib/index.js",
  "types": "lib/index.d.ts",
  "style": "style/index.css",
  "repository": {
    "type": "git",
    "url": "https://github.com/github_username/jupyterlab_active_log.git"
  },
  "scripts": {
    "build": "jlpm build:lib && jlpm build:labextension:dev",
    "build:prod": "jlpm clean && jlpm build:lib && jlpm build:labextension",
    "build:labextension": "jupyter labextension build .",
    "build:labextension:dev": "jupyter labextension build --development True .",
    "build:lib": "tsc",
    "clean": "jlpm clean:lib",
    "clean:lib": "rimraf lib tsconfig.tsbuildinfo",
    "clean:lintcache": "rimraf .eslintcache .stylelintcache",
    "clean:labextension": "rimraf jupyterlab_active_log/labextension",
    "clean:all": "jlpm clean:lib && jlpm clean:labextension && jlpm clean:lintcache",
    "eslint": "jlpm eslint:check --fix",
    "eslint:check": "eslint . --cache --ext .ts,.tsx",
    "install:extension": "jlpm build",
    "lint": "jlpm stylelint && jlpm prettier && jlpm eslint",
    "lint:check": "jlpm stylelint:check && jlpm prettier:check && jlpm eslint:check",
    "prettier": "jlpm prettier:base --write --list-different",
    "prettier:base": "prettier \"**/*{.ts,.tsx,.js,.jsx,.css,.json,.md}\"",
    "prettier:check": "jlpm prettier:base --check",
    "stylelint": "jlpm stylelint:check --fix",
    "stylelint:check": "stylelint --cache \"style/**/*.css\"",
    "watch": "run-p watch:src watch:labextension",
    "watch:src": "tsc -w",
    "watch:labextension": "jupyter labextension watch ."
  },
  "dependencies": {
    "@jupyterlab/application": "^3.1.0"
  },
  "devDependencies": {
    "@jupyterlab/builder": "^3.1.0",
    "@typescript-eslint/eslint-plugin": "^4.8.1",
    "@typescript-eslint/parser": "^4.8.1",
    "eslint": "^7.14.0",
    "eslint-config-prettier": "^6.15.0",
    "eslint-plugin-prettier": "^3.1.4",
    "npm-run-all": "^4.1.5",
    "prettier": "^2.1.1",
    "rimraf": "^3.0.2",
    "stylelint": "^14.3.0",
    "stylelint-config-prettier": "^9.0.3",
    "stylelint-config-recommended": "^6.0.0",
    "stylelint-config-standard": "~24.0.0",
    "stylelint-prettier": "^2.0.0",
    "typescript": "~4.1.3"
  },
  "sideEffects": [
    "style/*.css",
    "style/index.js"
  ],
  "styleModule": "style/index.js",
  "publishConfig": {
    "access": "public"
  },
  "jupyterlab": {
    "extension": true,
    "outputDir": "jupyterlab_active_log/labextension",
    "_build": {
      "load": "static/remoteEntry.eb3177c3791d7658cc12.js",
      "extension": "./extension",
      "style": "./style"
    }
  },
  "jupyter-releaser": {
    "hooks": {
      "before-build-npm": [
        "python -m pip install jupyterlab~=3.1",
        "jlpm"
      ],
      "before-build-python": [
        "jlpm clean:all"
      ]
    }
  }
 }
--- a/docker/jupyter_plugins/jupyterlab_active_log/static/568.a92ae44b87625ab09aed.js
+++ b/docker/jupyter_plugins/jupyterlab_active_log/static/568.a92ae44b87625ab09aed.js
@@ -0,0 +1 @@
 "use strict";(self.webpackChunkjupyterlab_active_log=self.webpackChunkjupyterlab_active_log||[]).push([[568],{568:(t,e,a)=>{a.r(e),a.d(e,{default:()=>i});const i={id:"jupyterlab_active_log:plugin",autoStart:!0,activate:t=>{console.log("JupyterLab extension jupyterlab_active_log is activated!"),window.consts=Object.assign(Object.assign({},window.consts),{recordUrl:"https://modelscope.cn/api/v1/notebooks/activelog",timerDuration:1e4,timerParams:function(){const t=location.pathname.split("/");let e;return t.length>=2&&(e=t[1]),{site:"dsw",id:e,ext:{pathname:location.pathname}}}});const e=document.body,a=e.insertBefore(document.createElement("script"),e.firstChild);a.setAttribute("id","timer-sdk"),a.setAttribute("src","https://g.alicdn.com/alifanyi/translate-js-sdk/timer.js  ")}}}}]);
--- a/docker/jupyter_plugins/jupyterlab_active_log/static/747.63b4c3d22bfe458b352b.js
+++ b/docker/jupyter_plugins/jupyterlab_active_log/static/747.63b4c3d22bfe458b352b.js
@@ -0,0 +1 @@
 "use strict";(self.webpackChunkjupyterlab_active_log=self.webpackChunkjupyterlab_active_log||[]).push([[747],{150:(e,n,t)=>{t.d(n,{Z:()=>a});var r=t(645),o=t.n(r)()((function(e){return e[1]}));o.push([e.id,"/*\n    See the JupyterLab Developer Guide for useful CSS Patterns:\n\n    https://jupyterlab.readthedocs.io/en/stable/developer/css.html\n*/\n",""]);const a=o},645:e=>{e.exports=function(e){var n=[];return n.toString=function(){return this.map((function(n){var t=e(n);return n[2]?"@media ".concat(n[2]," {").concat(t,"}"):t})).join("")},n.i=function(e,t,r){"string"==typeof e&&(e=[[null,e,""]]);var o={};if(r)for(var a=0;a<this.length;a++){var i=this[a][0];null!=i&&(o[i]=!0)}for(var c=0;c<e.length;c++){var s=[].concat(e[c]);r&&o[s[0]]||(t&&(s[2]?s[2]="".concat(t," and ").concat(s[2]):s[2]=t),n.push(s))}},n}},379:(e,n,t)=>{var r,o=function(){var e={};return function(n){if(void 0===e[n]){var t=document.querySelector(n);if(window.HTMLIFrameElement&&t instanceof window.HTMLIFrameElement)try{t=t.contentDocument.head}catch(e){t=null}e[n]=t}return e[n]}}(),a=[];function i(e){for(var n=-1,t=0;t<a.length;t++)if(a[t].identifier===e){n=t;break}return n}function c(e,n){for(var t={},r=[],o=0;o<e.length;o++){var c=e[o],s=n.base?c[0]+n.base:c[0],u=t[s]||0,l="".concat(s," ").concat(u);t[s]=u+1;var f=i(l),d={css:c[1],media:c[2],sourceMap:c[3]};-1!==f?(a[f].references++,a[f].updater(d)):a.push({identifier:l,updater:v(d,n),references:1}),r.push(l)}return r}function s(e){var n=document.createElement("style"),r=e.attributes||{};if(void 0===r.nonce){var a=t.nc;a&&(r.nonce=a)}if(Object.keys(r).forEach((function(e){n.setAttribute(e,r[e])})),"function"==typeof e.insert)e.insert(n);else{var i=o(e.insert||"head");if(!i)throw new Error("Couldn't find a style target. This probably means that the value for the 'insert' parameter is invalid.");i.appendChild(n)}return n}var u,l=(u=[],function(e,n){return u[e]=n,u.filter(Boolean).join("\n")});function f(e,n,t,r){var o=t?"":r.media?"@media ".concat(r.media," {").concat(r.css,"}"):r.css;if(e.styleSheet)e.styleSheet.cssText=l(n,o);else{var a=document.createTextNode(o),i=e.childNodes;i[n]&&e.removeChild(i[n]),i.length?e.insertBefore(a,i[n]):e.appendChild(a)}}function d(e,n,t){var r=t.css,o=t.media,a=t.sourceMap;if(o?e.setAttribute("media",o):e.removeAttribute("media"),a&&"undefined"!=typeof btoa&&(r+="\n/*# sourceMappingURL=data:application/json;base64,".concat(btoa(unescape(encodeURIComponent(JSON.stringify(a))))," */")),e.styleSheet)e.styleSheet.cssText=r;else{for(;e.firstChild;)e.removeChild(e.firstChild);e.appendChild(document.createTextNode(r))}}var p=null,h=0;function v(e,n){var t,r,o;if(n.singleton){var a=h++;t=p||(p=s(n)),r=f.bind(null,t,a,!1),o=f.bind(null,t,a,!0)}else t=s(n),r=d.bind(null,t,n),o=function(){!function(e){if(null===e.parentNode)return!1;e.parentNode.removeChild(e)}(t)};return r(e),function(n){if(n){if(n.css===e.css&&n.media===e.media&&n.sourceMap===e.sourceMap)return;r(e=n)}else o()}}e.exports=function(e,n){(n=n||{}).singleton||"boolean"==typeof n.singleton||(n.singleton=(void 0===r&&(r=Boolean(window&&document&&document.all&&!window.atob)),r));var t=c(e=e||[],n);return function(e){if(e=e||[],"[object Array]"===Object.prototype.toString.call(e)){for(var r=0;r<t.length;r++){var o=i(t[r]);a[o].references--}for(var s=c(e,n),u=0;u<t.length;u++){var l=i(t[u]);0===a[l].references&&(a[l].updater(),a.splice(l,1))}t=s}}}},747:(e,n,t)=>{t.r(n);var r=t(379),o=t.n(r),a=t(150);o()(a.Z,{insert:"head",singleton:!1}),a.Z.locals}}]);
--- a/docker/jupyter_plugins/jupyterlab_active_log/static/remoteEntry.eb3177c3791d7658cc12.js
+++ b/docker/jupyter_plugins/jupyterlab_active_log/static/remoteEntry.eb3177c3791d7658cc12.js
@@ -0,0 +1 @@
 var _JUPYTERLAB;(()=>{"use strict";var e,r,t={293:(e,r,t)=>{var o={"./index":()=>t.e(568).then((()=>()=>t(568))),"./extension":()=>t.e(568).then((()=>()=>t(568))),"./style":()=>t.e(747).then((()=>()=>t(747)))},a=(e,r)=>(t.R=r,r=t.o(o,e)?o[e]():Promise.resolve().then((()=>{throw new Error('Module "'+e+'" does not exist in container.')})),t.R=void 0,r),n=(e,r)=>{if(t.S){var o="default",a=t.S[o];if(a&&a!==e)throw new Error("Container initialization failed as it has already been initialized with a different share scope");return t.S[o]=e,t.I(o,r)}};t.d(r,{get:()=>a,init:()=>n})}},o={};function a(e){var r=o[e];if(void 0!==r)return r.exports;var n=o[e]={id:e,exports:{}};return t[e](n,n.exports,a),n.exports}a.m=t,a.c=o,a.n=e=>{var r=e&&e.__esModule?()=>e.default:()=>e;return a.d(r,{a:r}),r},a.d=(e,r)=>{for(var t in r)a.o(r,t)&&!a.o(e,t)&&Object.defineProperty(e,t,{enumerable:!0,get:r[t]})},a.f={},a.e=e=>Promise.all(Object.keys(a.f).reduce(((r,t)=>(a.f[t](e,r),r)),[])),a.u=e=>e+"."+{568:"a92ae44b87625ab09aed",747:"63b4c3d22bfe458b352b"}[e]+".js?v="+{568:"a92ae44b87625ab09aed",747:"63b4c3d22bfe458b352b"}[e],a.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),a.o=(e,r)=>Object.prototype.hasOwnProperty.call(e,r),e={},r="jupyterlab_active_log:",a.l=(t,o,n,i)=>{if(e[t])e[t].push(o);else{var l,u;if(void 0!==n)for(var c=document.getElementsByTagName("script"),d=0;d<c.length;d++){var s=c[d];if(s.getAttribute("src")==t||s.getAttribute("data-webpack")==r+n){l=s;break}}l||(u=!0,(l=document.createElement("script")).charset="utf-8",l.timeout=120,a.nc&&l.setAttribute("nonce",a.nc),l.setAttribute("data-webpack",r+n),l.src=t),e[t]=[o];var p=(r,o)=>{l.onerror=l.onload=null,clearTimeout(f);var a=e[t];if(delete e[t],l.parentNode&&l.parentNode.removeChild(l),a&&a.forEach((e=>e(o))),r)return r(o)},f=setTimeout(p.bind(null,void 0,{type:"timeout",target:l}),12e4);l.onerror=p.bind(null,l.onerror),l.onload=p.bind(null,l.onload),u&&document.head.appendChild(l)}},a.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{a.S={};var e={},r={};a.I=(t,o)=>{o||(o=[]);var n=r[t];if(n||(n=r[t]={}),!(o.indexOf(n)>=0)){if(o.push(n),e[t])return e[t];a.o(a.S,t)||(a.S[t]={});var i=a.S[t],l="jupyterlab_active_log",u=[];return"default"===t&&((e,r,t,o)=>{var n=i[e]=i[e]||{},u=n[r];(!u||!u.loaded&&(1!=!u.eager?o:l>u.from))&&(n[r]={get:()=>a.e(568).then((()=>()=>a(568))),from:l,eager:!1})})("jupyterlab_active_log","0.1.0"),e[t]=u.length?Promise.all(u).then((()=>e[t]=1)):1}}})(),(()=>{var e;a.g.importScripts&&(e=a.g.location+"");var r=a.g.document;if(!e&&r&&(r.currentScript&&(e=r.currentScript.src),!e)){var t=r.getElementsByTagName("script");t.length&&(e=t[t.length-1].src)}if(!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),a.p=e})(),(()=>{var e={346:0};a.f.j=(r,t)=>{var o=a.o(e,r)?e[r]:void 0;if(0!==o)if(o)t.push(o[2]);else{var n=new Promise(((t,a)=>o=e[r]=[t,a]));t.push(o[2]=n);var i=a.p+a.u(r),l=new Error;a.l(i,(t=>{if(a.o(e,r)&&(0!==(o=e[r])&&(e[r]=void 0),o)){var n=t&&("load"===t.type?"missing":t.type),i=t&&t.target&&t.target.src;l.message="Loading chunk "+r+" failed.\n("+n+": "+i+")",l.name="ChunkLoadError",l.type=n,l.request=i,o[1](l)}}),"chunk-"+r,r)}};var r=(r,t)=>{var o,n,[i,l,u]=t,c=0;if(i.some((r=>0!==e[r]))){for(o in l)a.o(l,o)&&(a.m[o]=l[o]);u&&u(a)}for(r&&r(t);c<i.length;c++)n=i[c],a.o(e,n)&&e[n]&&e[n][0](),e[n]=0},t=self.webpackChunkjupyterlab_active_log=self.webpackChunkjupyterlab_active_log||[];t.forEach(r.bind(null,0)),t.push=r.bind(null,t.push.bind(t))})(),a.nc=void 0;var n=a(293);(_JUPYTERLAB=void 0===_JUPYTERLAB?{}:_JUPYTERLAB).jupyterlab_active_log=n})();
--- a/docker/jupyter_plugins/jupyterlab_active_log/static/style.js
+++ b/docker/jupyter_plugins/jupyterlab_active_log/static/style.js
@@ -0,0 +1,4 @@
 /* This is a generated file of CSS imports */
 /* It was generated by @jupyterlab/builder in Build.ensureAssets() */

 import 'jupyterlab_active_log/style/index.js';
--- a/docker/jupyter_plugins/jupyterlab_active_log/static/third-party-licenses.json
+++ b/docker/jupyter_plugins/jupyterlab_active_log/static/third-party-licenses.json
@@ -0,0 +1,16 @@
 {
  "packages": [
    {
      "name": "css-loader",
      "versionInfo": "5.2.7",
      "licenseId": "MIT",
      "extractedText": "Copyright JS Foundation and other contributors\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n'Software'), to deal in the Software without restriction, including\nwithout limitation the rights to use, copy, modify, merge, publish,\ndistribute, sublicense, and/or sell copies of the Software, and to\npermit persons to whom the Software is furnished to do so, subject to\nthe following conditions:\n\nThe above copyright notice and this permission notice shall be\nincluded in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\nIN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\nCLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\nTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\nSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n"
    },
    {
      "name": "style-loader",
      "versionInfo": "2.0.0",
      "licenseId": "MIT",
      "extractedText": "Copyright JS Foundation and other contributors\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n'Software'), to deal in the Software without restriction, including\nwithout limitation the rights to use, copy, modify, merge, publish,\ndistribute, sublicense, and/or sell copies of the Software, and to\npermit persons to whom the Software is furnished to do so, subject to\nthe following conditions:\n\nThe above copyright notice and this permission notice shall be\nincluded in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\nIN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\nCLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\nTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\nSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n"
    }
  ]
 }
--- a/docker/scripts/install_apex.sh
+++ b/docker/scripts/install_apex.sh
@@ -0,0 +1,5 @@
 git clone https://github.com/NVIDIA/apex
 cd apex
 TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.5;8.0;8.6" pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
 cd ..
 rm -rf apex
--- a/docker/scripts/install_unifold.sh
+++ b/docker/scripts/install_unifold.sh
@@ -0,0 +1,12 @@
 apt-get update && apt-get install -y  hmmer kalign curl cmake \
        && apt-get clean && rm -rf /var/lib/apt/lists/* \
        && git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \
        && mkdir /tmp/hh-suite/build \
        && pushd /tmp/hh-suite/build \
        && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \
        && make -j 4 && make install \
        && ln -s /opt/hhsuite/bin/* /usr/bin \
        && popd \
        && rm -rf /tmp/hh-suite \
        && pip install --no-cache-dir unicore -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html \
        && pip install --no-cache-dir  biopython ipdb
--- a/docker/scripts/modelscope_env_init.sh
+++ b/docker/scripts/modelscope_env_init.sh
@@ -0,0 +1,47 @@
 #!/bin/bash
 set -e
 set -o pipefail
 # chieck git is install
 git --version >/dev/null 2>&1 || { echo 'git not installed' ; exit 0; }

 if [ -z "$MODELSCOPE_USERNAME" ]  || [ -z "$MODELSCOPE_GITLAB_ACCESS_TOKEN" ]; then
    :
 else
    git config --global credential.helper store
    echo "http://${MODELSCOPE_USERNAME}:${MODELSCOPE_GITLAB_ACCESS_TOKEN}@www.modelscope.cn">~/.git-credentials
    echo "https://${MODELSCOPE_USERNAME}:${MODELSCOPE_GITLAB_ACCESS_TOKEN}@www.modelscope.cn">>~/.git-credentials
    chmod go-rwx ~/.git-credentials
 fi
 if [ -z "$MODELSCOPE_USERNAME" ]  || [ -z "$MODELSCOPE_USEREMAIL" ]; then
    :
 else
    git config --system user.name ${MODELSCOPE_USERNAME}
    git config --system user.email ${MODELSCOPE_USEREMAIL}
 fi
 if [ -z "$MODELSCOPE_ENVIRONMENT" ]; then
    :
 else
    git config --system --add http.http://www.modelscope.cn.extraHeader "Modelscope_Environment: $MODELSCOPE_ENVIRONMENT"
    git config --system --add http.https://www.modelscope.cn.extraHeader "Modelscope_Environment: $MODELSCOPE_ENVIRONMENT"
 fi

 if [ -z "$MODELSCOPE_USERNAME" ]; then
    :
 else
    git config --system --add http.http://www.modelscope.cn.extraHeader "Modelscope_User: $MODELSCOPE_USERNAME"
    git config --system --add http.https://www.modelscope.cn.extraHeader "Modelscope_User: $MODELSCOPE_USERNAME"
 fi

 if [ -z "$MODELSCOPE_USERID" ]; then
    :
 else
    git config --system --add http.http://www.modelscope.cn.extraHeader "Modelscope_Userid: $MODELSCOPE_USERID"
    git config --system --add http.https://www.modelscope.cn.extraHeader "Modelscope_Userid: $MODELSCOPE_USERID"
 fi

 if [ -z "$MODELSCOPE_HAVANAID" ]; then
    :
 else
    git config --system --add http.http://www.modelscope.cn.extraHeader "Modelscope_Havanaid: $MODELSCOPE_HAVANAID"
    git config --system --add http.https://www.modelscope.cn.extraHeader "Modelscope_Havanaid: $MODELSCOPE_HAVANAID"
 fi
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -25,7 +25,7 @@ version_file = '../../modelscope/version.py'


 def get_version():
    with open(version_file, 'r') as f:
    with open(version_file, 'r', encoding='utf-8') as f:
        exec(compile(f.read(), version_file, 'exec'))
    return locals()['__version__']

--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -739,7 +739,7 @@ class ModelScopeConfig:
            with open(
                    os.path.join(ModelScopeConfig.path_credential,
                                 ModelScopeConfig.USER_INFO_FILE_NAME),
                    'r') as f:
                    'r', encoding='utf-8') as f:
                info = f.read()
                return info.split(':')[0], info.split(':')[1]
        except FileNotFoundError:
@@ -760,7 +760,7 @@ class ModelScopeConfig:
            with open(
                    os.path.join(ModelScopeConfig.path_credential,
                                 ModelScopeConfig.GIT_TOKEN_FILE_NAME),
                    'r') as f:
                    'r', encoding='utf-8') as f:
                token = f.read()
        except FileNotFoundError:
            pass
--- a/modelscope/metainfo.py
+++ b/modelscope/metainfo.py
@@ -32,6 +32,7 @@ class Models(object):
    image_reid_person = 'passvitb'
    image_inpainting = 'FFTInpainting'
    video_summarization = 'pgl-video-summarization'
    language_guided_video_summarization = 'clip-it-language-guided-video-summarization'
    swinL_semantic_segmentation = 'swinL-semantic-segmentation'
    vitadapter_semantic_segmentation = 'vitadapter-semantic-segmentation'
    text_driven_segmentation = 'text-driven-segmentation'
@@ -200,6 +201,7 @@ class Pipelines(object):
    video_single_object_tracking = 'ostrack-vitb-video-single-object-tracking'
    image_panoptic_segmentation = 'image-panoptic-segmentation'
    video_summarization = 'googlenet_pgl_video_summarization'
    language_guided_video_summarization = 'clip-it-video-summarization'
    image_semantic_segmentation = 'image-semantic-segmentation'
    image_reid_person = 'passvitb-image-reid-person'
    image_inpainting = 'fft-inpainting'
--- a/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py
+++ b/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py
@@ -21,7 +21,7 @@ class KanTtsText2MelDataset(Dataset):

        self.cache = cache

        with open(config_filename) as f:
        with open(config_filename, encoding='utf-8') as f:
            self._config = json.loads(f.read())

        # Load metadata:
--- a/modelscope/models/audio/tts/sambert_hifi.py
+++ b/modelscope/models/audio/tts/sambert_hifi.py
@@ -60,7 +60,7 @@ class SambertHifigan(Model):
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall(model_dir)
        voice_cfg_path = os.path.join(self.__voice_path, 'voices.json')
        with open(voice_cfg_path, 'r') as f:
        with open(voice_cfg_path, 'r', encoding='utf-8') as f:
            voice_cfg = json.load(f)
        if 'voices' not in voice_cfg:
            raise TtsModelConfigurationException(
--- a/modelscope/models/cv/init.py
+++ b/modelscope/models/cv/init.py
@@ -10,10 +10,10 @@ from . import (action_recognition, animal_recognition, body_2d_keypoints,
               image_panoptic_segmentation, image_portrait_enhancement,
               image_reid_person, image_semantic_segmentation,
               image_to_image_generation, image_to_image_translation,
               movie_scene_segmentation, object_detection,
               product_retrieval_embedding, realtime_object_detection,
               referring_video_object_segmentation, salient_detection,
               shop_segmentation, super_resolution,
               language_guided_video_summarization, movie_scene_segmentation,
               object_detection, product_retrieval_embedding,
               realtime_object_detection, referring_video_object_segmentation,
               salient_detection, shop_segmentation, super_resolution,
               video_single_object_tracking, video_summarization, virual_tryon)

 # yapf: enable
--- a/modelscope/models/cv/language_guided_video_summarization/init.py
+++ b/modelscope/models/cv/language_guided_video_summarization/init.py
@@ -0,0 +1,25 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import TYPE_CHECKING

 from modelscope.utils.import_utils import LazyImportModule

 if TYPE_CHECKING:
    from .summarizer import (
        ClipItVideoSummarization, )

 else:
    _import_structure = {
        'summarizer': [
            'ClipItVideoSummarization',
        ]
    }

    import sys

    sys.modules[__name__] = LazyImportModule(
        __name__,
        globals()['__file__'],
        _import_structure,
        module_spec=__spec__,
        extra_objects={},
    )
--- a/modelscope/models/cv/language_guided_video_summarization/summarizer.py
+++ b/modelscope/models/cv/language_guided_video_summarization/summarizer.py
@@ -0,0 +1,194 @@
 # Part of the implementation is borrowed and modified from BMT and video_features,
 # publicly available at https://github.com/v-iashin/BMT
 # and https://github.com/v-iashin/video_features

 import argparse
 import os
 import os.path as osp
 from copy import deepcopy
 from typing import Dict, Union

 import numpy as np
 import torch
 import torch.nn as nn
 from bmt_clipit.sample.single_video_prediction import (caption_proposals,
                                                       generate_proposals,
                                                       load_cap_model,
                                                       load_prop_model)
 from bmt_clipit.utilities.proposal_utils import non_max_suppresion
 from torch.nn.parallel import DataParallel, DistributedDataParallel
 from videofeatures_clipit.models.i3d.extract_i3d import ExtractI3D
 from videofeatures_clipit.models.vggish.extract_vggish import ExtractVGGish
 from videofeatures_clipit.utils.utils import (fix_tensorflow_gpu_allocation,
                                              form_list_from_user_input)

 from modelscope.metainfo import Models
 from modelscope.models.base import Tensor, TorchModel
 from modelscope.models.builder import MODELS
 from modelscope.models.cv.language_guided_video_summarization.transformer import \
    Transformer
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.logger import get_logger

 logger = get_logger()


 def extract_text(args):
    # Loading models and other essential stuff
    cap_cfg, cap_model, train_dataset = load_cap_model(
        args.pretrained_cap_model_path, args.device_id)
    prop_cfg, prop_model = load_prop_model(args.device_id,
                                           args.prop_generator_model_path,
                                           args.pretrained_cap_model_path,
                                           args.max_prop_per_vid)
    # Proposal
    proposals = generate_proposals(prop_model, args.features,
                                   train_dataset.pad_idx, prop_cfg,
                                   args.device_id, args.duration_in_secs)
    # NMS if specified
    if args.nms_tiou_thresh is not None:
        proposals = non_max_suppresion(proposals.squeeze(),
                                       args.nms_tiou_thresh)
        proposals = proposals.unsqueeze(0)
    # Captions for each proposal
    captions = caption_proposals(cap_model, args.features, train_dataset,
                                 cap_cfg, args.device_id, proposals,
                                 args.duration_in_secs)
    return captions


 def extract_video_features(video_path, tmp_path, feature_type, i3d_flow_path,
                           i3d_rgb_path, kinetics_class_labels, pwc_path,
                           vggish_model_path, vggish_pca_path, extraction_fps,
                           device):
    default_args = dict(
        device=device,
        extraction_fps=extraction_fps,
        feature_type=feature_type,
        file_with_video_paths=None,
        i3d_flow_path=i3d_flow_path,
        i3d_rgb_path=i3d_rgb_path,
        keep_frames=False,
        kinetics_class_labels=kinetics_class_labels,
        min_side_size=256,
        pwc_path=pwc_path,
        show_kinetics_pred=False,
        stack_size=64,
        step_size=64,
        tmp_path=tmp_path,
        vggish_model_path=vggish_model_path,
        vggish_pca_path=vggish_pca_path,
    )
    args = argparse.Namespace(**default_args)

    if args.feature_type == 'i3d':
        extractor = ExtractI3D(args)
    elif args.feature_type == 'vggish':
        extractor = ExtractVGGish(args)

    feats = extractor(video_path)
    return feats


 def video_features_to_txt(duration_in_secs, pretrained_cap_model_path,
                          prop_generator_model_path, features, device_id):
    default_args = dict(
        device_id=device_id,
        duration_in_secs=duration_in_secs,
        features=features,
        pretrained_cap_model_path=pretrained_cap_model_path,
        prop_generator_model_path=prop_generator_model_path,
        max_prop_per_vid=100,
        nms_tiou_thresh=0.4,
    )
    args = argparse.Namespace(**default_args)
    txt = extract_text(args)
    return txt


@MODELS.register_module(
    Tasks.language_guided_video_summarization,
    module_name=Models.language_guided_video_summarization)
 class ClipItVideoSummarization(TorchModel):

    def __init__(self, model_dir: str, *args, **kwargs):
        """initialize the video summarization model from the `model_dir` path.

        Args:
            model_dir (str): the model path.
        """
        super().__init__(model_dir, *args, **kwargs)

        model_path = osp.join(model_dir, ModelFile.TORCH_MODEL_FILE)

        self.loss = nn.MSELoss()
        self.model = Transformer()
        if torch.cuda.is_available():
            self._device = torch.device('cuda')
        else:
            self._device = torch.device('cpu')
        self.model = self.model.to(self._device)

        self.model = self.load_pretrained(self.model, model_path)

        if self.training:
            self.model.train()
        else:
            self.model.eval()

    def load_pretrained(self, net, load_path, strict=True, param_key='params'):
        if isinstance(net, (DataParallel, DistributedDataParallel)):
            net = net.module
        load_net = torch.load(
            load_path, map_location=lambda storage, loc: storage)
        if param_key is not None:
            if param_key not in load_net and 'params' in load_net:
                param_key = 'params'
                logger.info(
                    f'Loading: {param_key} does not exist, use params.')
            if param_key in load_net:
                load_net = load_net[param_key]
        logger.info(
            f'Loading {net.__class__.__name__} model from {load_path}, with param key: [{param_key}].'
        )
        # remove unnecessary 'module.'
        for k, v in deepcopy(load_net).items():
            if k.startswith('module.'):
                load_net[k[7:]] = v
                load_net.pop(k)
        net.load_state_dict(load_net, strict=strict)
        logger.info('load model done.')
        return net

    def _train_forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]:
        frame_features = input['frame_features']
        txt_features = input['txt_features']
        gtscore = input['gtscore']
        preds, attn_weights = self.model(frame_features, txt_features,
                                         frame_features)
        return {'loss': self.loss(preds, gtscore)}

    def _inference_forward(self, input: Dict[str,
                                             Tensor]) -> Dict[str, Tensor]:
        frame_features = input['frame_features']
        txt_features = input['txt_features']
        y, dec_output = self.model(frame_features, txt_features,
                                   frame_features)
        return {'scores': y}

    def forward(self, input: Dict[str,
                                  Tensor]) -> Dict[str, Union[list, Tensor]]:
        """return the result by the model

        Args:
            input (Dict[str, Tensor]): the preprocessed data

        Returns:
            Dict[str, Union[list, Tensor]]: results
        """
        for key, value in input.items():
            input[key] = input[key].to(self._device)
        if self.training:
            return self._train_forward(input)
        else:
            return self._inference_forward(input)
--- a/modelscope/models/cv/language_guided_video_summarization/transformer/init.py
+++ b/modelscope/models/cv/language_guided_video_summarization/transformer/init.py
@@ -0,0 +1,25 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from typing import TYPE_CHECKING

 from modelscope.utils.import_utils import LazyImportModule

 if TYPE_CHECKING:
    from .models import (
        Transformer, )

 else:
    _import_structure = {
        'models': [
            'Transformer',
        ]
    }

    import sys

    sys.modules[__name__] = LazyImportModule(
        __name__,
        globals()['__file__'],
        _import_structure,
        module_spec=__spec__,
        extra_objects={},
    )
--- a/modelscope/models/cv/language_guided_video_summarization/transformer/layers.py
+++ b/modelscope/models/cv/language_guided_video_summarization/transformer/layers.py
@@ -0,0 +1,48 @@
 # Part of the implementation is borrowed and modified from attention-is-all-you-need-pytorch,
 # publicly available at https://github.com/jadore801120/attention-is-all-you-need-pytorch
 import torch
 import torch.nn as nn

 from .sub_layers import MultiHeadAttention, PositionwiseFeedForward


 class EncoderLayer(nn.Module):
    """Compose with two layers"""

    def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
        super(EncoderLayer, self).__init__()
        self.slf_attn = MultiHeadAttention(
            n_head, d_model, d_k, d_v, dropout=dropout)
        self.pos_ffn = PositionwiseFeedForward(
            d_model, d_inner, dropout=dropout)

    def forward(self, enc_input, slf_attn_mask=None):
        enc_output, enc_slf_attn = self.slf_attn(
            enc_input, enc_input, enc_input, mask=slf_attn_mask)
        enc_output = self.pos_ffn(enc_output)
        return enc_output, enc_slf_attn


 class DecoderLayer(nn.Module):
    """Compose with three layers"""

    def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
        super(DecoderLayer, self).__init__()
        self.slf_attn = MultiHeadAttention(
            n_head, d_model, d_k, d_v, dropout=dropout)
        self.enc_attn = MultiHeadAttention(
            n_head, d_model, d_k, d_v, dropout=dropout)
        self.pos_ffn = PositionwiseFeedForward(
            d_model, d_inner, dropout=dropout)

    def forward(self,
                dec_input,
                enc_output,
                slf_attn_mask=None,
                dec_enc_attn_mask=None):
        dec_output, dec_slf_attn = self.slf_attn(
            dec_input, dec_input, dec_input, mask=slf_attn_mask)
        dec_output, dec_enc_attn = self.enc_attn(
            dec_output, enc_output, enc_output, mask=dec_enc_attn_mask)
        dec_output = self.pos_ffn(dec_output)
        return dec_output, dec_slf_attn, dec_enc_attn
--- a/modelscope/models/cv/language_guided_video_summarization/transformer/models.py
+++ b/modelscope/models/cv/language_guided_video_summarization/transformer/models.py
@@ -0,0 +1,229 @@
 # Part of the implementation is borrowed and modified from attention-is-all-you-need-pytorch,
 # publicly available at https://github.com/jadore801120/attention-is-all-you-need-pytorch

 import numpy as np
 import torch
 import torch.nn as nn

 from .layers import DecoderLayer, EncoderLayer
 from .sub_layers import MultiHeadAttention


 class PositionalEncoding(nn.Module):

    def __init__(self, d_hid, n_position=200):
        super(PositionalEncoding, self).__init__()

        # Not a parameter
        self.register_buffer(
            'pos_table', self._get_sinusoid_encoding_table(n_position, d_hid))

    def _get_sinusoid_encoding_table(self, n_position, d_hid):
        """Sinusoid position encoding table"""

        # TODO: make it with torch instead of numpy

        def get_position_angle_vec(position):
            return [
                position / np.power(10000, 2 * (hid_j // 2) / d_hid)
                for hid_j in range(d_hid)
            ]

        sinusoid_table = np.array(
            [get_position_angle_vec(pos_i) for pos_i in range(n_position)])
        sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
        sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1

        return torch.FloatTensor(sinusoid_table).unsqueeze(0)

    def forward(self, x):
        return x + self.pos_table[:, :x.size(1)].clone().detach()


 class Encoder(nn.Module):
    """A encoder model with self attention mechanism."""

    def __init__(self,
                 d_word_vec=1024,
                 n_layers=6,
                 n_head=8,
                 d_k=64,
                 d_v=64,
                 d_model=512,
                 d_inner=2048,
                 dropout=0.1,
                 n_position=200):

        super().__init__()

        self.position_enc = PositionalEncoding(
            d_word_vec, n_position=n_position)
        self.dropout = nn.Dropout(p=dropout)
        self.layer_stack = nn.ModuleList([
            EncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)
        ])
        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
        self.d_model = d_model

    def forward(self, enc_output, return_attns=False):

        enc_slf_attn_list = []
        # -- Forward
        enc_output = self.dropout(self.position_enc(enc_output))
        enc_output = self.layer_norm(enc_output)

        for enc_layer in self.layer_stack:
            enc_output, enc_slf_attn = enc_layer(enc_output)
            enc_slf_attn_list += [enc_slf_attn] if return_attns else []

        if return_attns:
            return enc_output, enc_slf_attn_list
        return enc_output,


 class Decoder(nn.Module):
    """A decoder model with self attention mechanism."""

    def __init__(self,
                 d_word_vec=1024,
                 n_layers=6,
                 n_head=8,
                 d_k=64,
                 d_v=64,
                 d_model=512,
                 d_inner=2048,
                 n_position=200,
                 dropout=0.1):

        super().__init__()

        self.position_enc = PositionalEncoding(
            d_word_vec, n_position=n_position)
        self.dropout = nn.Dropout(p=dropout)
        self.layer_stack = nn.ModuleList([
            DecoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)
        ])
        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
        self.d_model = d_model

    def forward(self,
                dec_output,
                enc_output,
                src_mask=None,
                trg_mask=None,
                return_attns=False):

        dec_slf_attn_list, dec_enc_attn_list = [], []

        # -- Forward
        dec_output = self.dropout(self.position_enc(dec_output))
        dec_output = self.layer_norm(dec_output)

        for dec_layer in self.layer_stack:
            dec_output, dec_slf_attn, dec_enc_attn = dec_layer(
                dec_output,
                enc_output,
                slf_attn_mask=trg_mask,
                dec_enc_attn_mask=src_mask)
            dec_slf_attn_list += [dec_slf_attn] if return_attns else []
            dec_enc_attn_list += [dec_enc_attn] if return_attns else []

        if return_attns:
            return dec_output, dec_slf_attn_list, dec_enc_attn_list
        return dec_output,


 class Transformer(nn.Module):
    """A sequence to sequence model with attention mechanism."""

    def __init__(self,
                 num_sentence=7,
                 txt_atten_head=4,
                 d_frame_vec=512,
                 d_model=512,
                 d_inner=2048,
                 n_layers=6,
                 n_head=8,
                 d_k=256,
                 d_v=256,
                 dropout=0.1,
                 n_position=4000):

        super().__init__()

        self.d_model = d_model

        self.layer_norm_img_src = nn.LayerNorm(d_frame_vec, eps=1e-6)
        self.layer_norm_img_trg = nn.LayerNorm(d_frame_vec, eps=1e-6)
        self.layer_norm_txt = nn.LayerNorm(
            num_sentence * d_frame_vec, eps=1e-6)

        self.linear_txt = nn.Linear(
            in_features=num_sentence * d_frame_vec, out_features=d_model)
        self.lg_attention = MultiHeadAttention(
            n_head=txt_atten_head, d_model=d_model, d_k=d_k, d_v=d_v)

        self.encoder = Encoder(
            n_position=n_position,
            d_word_vec=d_frame_vec,
            d_model=d_model,
            d_inner=d_inner,
            n_layers=n_layers,
            n_head=n_head,
            d_k=d_k,
            d_v=d_v,
            dropout=dropout)

        self.decoder = Decoder(
            n_position=n_position,
            d_word_vec=d_frame_vec,
            d_model=d_model,
            d_inner=d_inner,
            n_layers=n_layers,
            n_head=n_head,
            d_k=d_k,
            d_v=d_v,
            dropout=dropout)

        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

        assert d_model == d_frame_vec, 'the dimensions of all module outputs shall be the same.'

        self.linear_1 = nn.Linear(in_features=d_model, out_features=d_model)
        self.linear_2 = nn.Linear(
            in_features=self.linear_1.out_features, out_features=1)

        self.drop = nn.Dropout(p=0.5)
        self.norm_y = nn.LayerNorm(normalized_shape=d_model, eps=1e-6)
        self.norm_linear = nn.LayerNorm(
            normalized_shape=self.linear_1.out_features, eps=1e-6)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, src_seq, src_txt, trg_seq):

        features_txt = self.linear_txt(src_txt)
        atten_seq, txt_attn = self.lg_attention(src_seq, features_txt,
                                                features_txt)

        enc_output, *_ = self.encoder(atten_seq)
        dec_output, *_ = self.decoder(trg_seq, enc_output)

        y = self.drop(enc_output)
        y = self.norm_y(y)

        # 2-layer NN (Regressor Network)
        y = self.linear_1(y)
        y = self.relu(y)
        y = self.drop(y)
        y = self.norm_linear(y)

        y = self.linear_2(y)
        y = self.sigmoid(y)
        y = y.view(1, -1)

        return y, dec_output
--- a/modelscope/models/cv/language_guided_video_summarization/transformer/modules.py
+++ b/modelscope/models/cv/language_guided_video_summarization/transformer/modules.py
@@ -0,0 +1,27 @@
 # Part of the implementation is borrowed and modified from attention-is-all-you-need-pytorch,
 # publicly available at https://github.com/jadore801120/attention-is-all-you-need-pytorch

 import torch
 import torch.nn as nn
 import torch.nn.functional as F


 class ScaledDotProductAttention(nn.Module):
    """Scaled Dot-Product Attention"""

    def __init__(self, temperature, attn_dropout=0.1):
        super().__init__()
        self.temperature = temperature
        self.dropout = nn.Dropout(attn_dropout)

    def forward(self, q, k, v, mask=None):

        attn = torch.matmul(q / self.temperature, k.transpose(2, 3))

        if mask is not None:
            attn = attn.masked_fill(mask == 0, -1e9)

        attn = self.dropout(F.softmax(attn, dim=-1))
        output = torch.matmul(attn, v)

        return output, attn
--- a/modelscope/models/cv/language_guided_video_summarization/transformer/sub_layers.py
+++ b/modelscope/models/cv/language_guided_video_summarization/transformer/sub_layers.py
@@ -0,0 +1,83 @@
 # Part of the implementation is borrowed and modified from attention-is-all-you-need-pytorch,
 # publicly available at https://github.com/jadore801120/attention-is-all-you-need-pytorch

 import numpy as np
 import torch.nn as nn
 import torch.nn.functional as F

 from .modules import ScaledDotProductAttention


 class MultiHeadAttention(nn.Module):
    """Multi-Head Attention module"""

    def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
        super().__init__()

        self.n_head = n_head
        self.d_k = d_k
        self.d_v = d_v

        self.w_qs = nn.Linear(d_model, n_head * d_k, bias=False)
        self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False)
        self.w_vs = nn.Linear(d_model, n_head * d_v, bias=False)
        self.fc = nn.Linear(n_head * d_v, d_model, bias=False)

        self.attention = ScaledDotProductAttention(temperature=d_k**0.5)

        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)

    def forward(self, q, k, v, mask=None):

        d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
        sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1)

        residual = q

        # Pass through the pre-attention projection: b x lq x (n*dv)
        # Separate different heads: b x lq x n x dv
        q = self.w_qs(q).view(sz_b, len_q, n_head, d_k)
        k = self.w_ks(k).view(sz_b, len_k, n_head, d_k)
        v = self.w_vs(v).view(sz_b, len_v, n_head, d_v)

        # Transpose for attention dot product: b x n x lq x dv
        q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)

        if mask is not None:
            mask = mask.unsqueeze(1)  # For head axis broadcasting.

        q, attn = self.attention(q, k, v, mask=mask)

        # Transpose to move the head dimension back: b x lq x n x dv
        # Combine the last two dimensions to concatenate all the heads together: b x lq x (n*dv)
        q = q.transpose(1, 2).contiguous().view(sz_b, len_q, -1)
        q = self.dropout(self.fc(q))
        q += residual

        q = self.layer_norm(q)

        return q, attn


 class PositionwiseFeedForward(nn.Module):
    """A two-feed-forward-layer module"""

    def __init__(self, d_in, d_hid, dropout=0.1):
        super().__init__()
        self.w_1 = nn.Linear(d_in, d_hid)  # position-wise
        self.w_2 = nn.Linear(d_hid, d_in)  # position-wise
        self.layer_norm = nn.LayerNorm(d_in, eps=1e-6)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):

        residual = x

        x = self.w_2(F.relu(self.w_1(x)))
        x = self.dropout(x)
        x += residual

        x = self.layer_norm(x)

        return x
--- a/modelscope/models/cv/tinynas_classfication/plain_net_utils.py
+++ b/modelscope/models/cv/tinynas_classfication/plain_net_utils.py
@@ -39,7 +39,7 @@ class PlainNet(nn.Module):
                plainnet_struct_txt = self.module_opt.plainnet_struct_txt

            if plainnet_struct_txt is not None:
                with open(plainnet_struct_txt, 'r') as fid:
                with open(plainnet_struct_txt, 'r', encoding='utf-8') as fid:
                    the_line = fid.readlines()[0].strip()
                    self.plainnet_struct = the_line
                pass
--- a/modelscope/models/multi_modal/clip/bert_tokenizer.py
+++ b/modelscope/models/multi_modal/clip/bert_tokenizer.py
@@ -120,7 +120,7 @@ def load_vocab(vocab_file):
    """Loads a vocabulary file into a dictionary."""
    vocab = collections.OrderedDict()
    index = 0
    with open(vocab_file, 'r') as reader:
    with open(vocab_file, 'r', encoding='utf-8') as reader:
        while True:
            token = convert_to_unicode(reader.readline())
            if not token:
--- a/modelscope/models/multi_modal/clip/model.py
+++ b/modelscope/models/multi_modal/clip/model.py
@@ -523,8 +523,10 @@ class CLIPForMultiModalEmbedding(TorchModel):
        logger.info(f'Loading text model config from {text_model_config_file}')
        assert os.path.exists(text_model_config_file)

        with open(vision_model_config_file,
                  'r') as fv, open(text_model_config_file, 'r') as ft:
        with open(
                vision_model_config_file, 'r',
                encoding='utf-8') as fv,\
                open(text_model_config_file, 'r', encoding='utf-8') as ft:
            self.model_info = json.load(fv)
            for k, v in json.load(ft).items():
                self.model_info[k] = v
--- a/modelscope/models/multi_modal/diffusion/model.py
+++ b/modelscope/models/multi_modal/diffusion/model.py
@@ -76,7 +76,7 @@ class DiffusionModel(nn.Module):
        super(DiffusionModel, self).__init__()
        # including text and generator config
        model_config = json.load(
            open('{}/model_config.json'.format(model_dir)))
            open('{}/model_config.json'.format(model_dir), encoding='utf-8'))

        # text encoder
        text_config = model_config['text_config']
@@ -142,7 +142,9 @@ class DiffusionForTextToImageSynthesis(Model):

        # diffusion process
        diffusion_params = json.load(
            open('{}/diffusion_config.json'.format(model_dir)))
            open(
                '{}/diffusion_config.json'.format(model_dir),
                encoding='utf-8'))
        self.diffusion_generator = make_diffusion(
            **diffusion_params['generator_config'])
        self.diffusion_upsampler_256 = make_diffusion(
--- a/modelscope/models/multi_modal/diffusion/structbert.py
+++ b/modelscope/models/multi_modal/diffusion/structbert.py
@@ -130,7 +130,7 @@ class BertConfig(object):
    @classmethod
    def from_json_file(cls, json_file):
        """Constructs a `BertConfig` from a json file of parameters."""
        with open(json_file, 'r') as reader:
        with open(json_file, 'r', encoding='utf-8') as reader:
            text = reader.read()
        return cls.from_dict(json.loads(text))

--- a/modelscope/models/multi_modal/diffusion/tokenizer.py
+++ b/modelscope/models/multi_modal/diffusion/tokenizer.py
@@ -67,7 +67,7 @@ def load_vocab(vocab_file):
    """Loads a vocabulary file into a dictionary."""
    vocab = collections.OrderedDict()
    index = 0
    with open(vocab_file, 'r') as reader:
    with open(vocab_file, 'r', encoding='utf-8') as reader:
        while True:
            token = convert_to_unicode(reader.readline())
            if not token:
--- a/modelscope/models/multi_modal/gemm/gemm_base.py
+++ b/modelscope/models/multi_modal/gemm/gemm_base.py
@@ -522,7 +522,9 @@ class GEMMModel(nn.Module):

    def __init__(self, model_dir):
        super().__init__()
        with open('{}/encoder_config.json'.format(model_dir), 'r') as f:
        with open(
                '{}/encoder_config.json'.format(model_dir), 'r',
                encoding='utf-8') as f:
            model_config = json.loads(f.read())
        model_name = list(model_config.keys())[0]
        config_args = model_config[model_name]
--- a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
+++ b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
@@ -35,7 +35,9 @@ class VideoCLIPForMultiModalEmbedding(TorchModel):
    def __init__(self, model_dir, **kwargs):
        super().__init__(model_dir=model_dir, **kwargs)
        # model config parameters
        with open(f'{model_dir}/{ModelFile.CONFIGURATION}', 'r') as json_file:
        with open(
                f'{model_dir}/{ModelFile.CONFIGURATION}', 'r',
                encoding='utf-8') as json_file:
            model_config = json.load(json_file)
        model_config = model_config['paras']
        model_config['model_dir'] = model_dir
--- a/modelscope/models/multi_modal/mplug/configuration_mplug.py
+++ b/modelscope/models/multi_modal/mplug/configuration_mplug.py
@@ -111,6 +111,6 @@ class MPlugConfig(PretrainedConfig):
    @classmethod
    def from_yaml_file(cls, yaml_file: Union[str,
                                             os.PathLike]) -> Dict[str, Any]:
        with open(yaml_file, 'r') as reader:
        with open(yaml_file, 'r', encoding='utf-8') as reader:
            config_dict = yaml.load(reader, Loader=yaml.Loader)
        return cls(**config_dict)
--- a/modelscope/models/multi_modal/multi_stage_diffusion/model.py
+++ b/modelscope/models/multi_modal/multi_stage_diffusion/model.py
@@ -50,7 +50,8 @@ class UnCLIP(nn.Module):
    def __init__(self, model_dir):
        super(UnCLIP, self).__init__()
        self.model_dir = model_dir
        self.config = json.load(open(f'{model_dir}/{ModelFile.CONFIGURATION}'))
        self.config = json.load(
            open(f'{model_dir}/{ModelFile.CONFIGURATION}', encoding='utf-8'))

        # modules
        self.clip = CLIP(**self.config['clip']).fp16()
--- a/modelscope/models/multi_modal/ofa_for_all_tasks.py
+++ b/modelscope/models/multi_modal/ofa_for_all_tasks.py
@@ -312,7 +312,7 @@ class OfaForAllTasks(TorchModel):
        if self.cfg.model.get('answer2label', None):
            ans2label_file = osp.join(self.model_dir,
                                      self.cfg.model.answer2label)
            with open(ans2label_file, 'r') as reader:
            with open(ans2label_file, 'r', encoding='utf-8') as reader:
                self.ans2label_dict = json.load(reader)

    def save_pretrained(self,
--- a/modelscope/models/nlp/mglm/arguments.py
+++ b/modelscope/models/nlp/mglm/arguments.py
@@ -743,7 +743,7 @@ def get_args():

    if hasattr(args, 'deepspeed'
               ) and args.deepspeed and args.deepspeed_config is not None:
        with open(args.deepspeed_config) as file:
        with open(args.deepspeed_config, encoding='utf-8') as file:
            deepspeed_config = json.load(file)
        if 'train_micro_batch_size_per_gpu' in deepspeed_config:
            args.batch_size = deepspeed_config[
--- a/modelscope/models/nlp/mglm/data_utils/corpora.py
+++ b/modelscope/models/nlp/mglm/data_utils/corpora.py
@@ -156,7 +156,7 @@ class DataReader:
        def read_input_to_queue():
            for path in paths:
                print_rank_0(f'Start reading {path}')
                with open(path) as file:
                with open(path, encoding='utf-8') as file:
                    items = json.load(file)
                    for item in items:
                        task_queue.put(item)
--- a/modelscope/models/nlp/mglm/data_utils/datasets.py
+++ b/modelscope/models/nlp/mglm/data_utils/datasets.py
@@ -511,12 +511,12 @@ class json_dataset(data.Dataset):

    def load_json_stream(self, load_path):
        if not self.loose_json:
            jsons = json.load(open(load_path, 'r'))
            jsons = json.load(open(load_path, 'r', encoding='utf-8'))
            generator = iter(jsons)
        else:

            def gen_helper():
                with open(load_path, 'r') as f:
                with open(load_path, 'r', encoding='utf-8') as f:
                    for row in f:
                        yield json.loads(row)

--- a/modelscope/models/nlp/mglm/data_utils/extraction.py
+++ b/modelscope/models/nlp/mglm/data_utils/extraction.py
@@ -29,7 +29,9 @@ with open(output_path, 'w') as output:
            print(filename)
            article_lines = []
            article_open = False
            with open(filename, mode='r', newline='\n') as file:
            with open(
                    filename, mode='r', newline='\n',
                    encoding='utf-8') as file:
                for line in file:
                    line = line.rstrip()
                    if '<doc id=' in line:
--- a/modelscope/models/nlp/mglm/data_utils/tokenization_gpt2.py
+++ b/modelscope/models/nlp/mglm/data_utils/tokenization_gpt2.py
@@ -179,7 +179,7 @@ class GPT2Tokenizer(object):
                 special_tokens=None,
                 max_len=None):
        self.max_len = max_len if max_len is not None else int(1e12)
        self.encoder = json.load(open(vocab_file))
        self.encoder = json.load(open(vocab_file), encoding='utf-8')
        self.decoder = {v: k for k, v in self.encoder.items()}
        self.errors = errors  # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
--- a/modelscope/models/nlp/mglm/process_grid.py
+++ b/modelscope/models/nlp/mglm/process_grid.py
@@ -19,7 +19,7 @@ for dir_path in glob.glob(path_pattern, recursive=True):
    valid_path = os.path.join(dir_path, 'results.json')
    if os.path.exists(valid_path):
        print(entry)
        with open(valid_path) as file:
        with open(valid_path, encoding='utf-8') as file:
            valid_result = json.load(file)
    else:
        print(f'{entry} no validation results')
--- a/modelscope/models/nlp/mglm/tasks/language_model/dataset.py
+++ b/modelscope/models/nlp/mglm/tasks/language_model/dataset.py
@@ -121,7 +121,7 @@ class LambadaDataset(torch.utils.data.Dataset):

        self.tokens = []
        self.labels = []
        with open(data_path, 'r') as f:
        with open(data_path, 'r', encoding='utf-8') as f:
            for line in f.readlines():
                text = json.loads(line)['text']
                tokens, labels = self.get_tokens(text)
--- a/modelscope/models/nlp/mglm/tasks/seq2seq/dataset.py
+++ b/modelscope/models/nlp/mglm/tasks/seq2seq/dataset.py
@@ -209,14 +209,16 @@ class XSumProcessor:
            raise NotImplementedError(split)
        print_rank_0(f'Creating XSUM-{split} dataset from {self.data_dir}')
        with open(
                os.path.join(
                    self.data_dir,
                    'XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json')) as file:
                os.path.join(self.data_dir,
                             'XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json'),
                encoding='utf-8') as file:
            id_list = json.load(file)
        id_list = id_list[key]
        source_texts, target_texts = [], []
        for i, idx in enumerate(id_list):
            with open(os.path.join(self.data_dir, f'{idx}.summary')) as file:
            with open(
                    os.path.join(self.data_dir, f'{idx}.summary'),
                    encoding='utf-8') as file:
                key, sentences = None, []
                source_text, target_text = None, None
                for line in file:
--- a/modelscope/models/nlp/mglm/tasks/superglue/dataset.py
+++ b/modelscope/models/nlp/mglm/tasks/superglue/dataset.py
@@ -841,7 +841,7 @@ class RaceProcessor(DataProcessor):
            path, 'middle', '*.txt')) + glob.glob(
                os.path.join(path, 'high', '*.txt'))
        for filename in filenames:
            with open(filename, 'r') as f:
            with open(filename, 'r', encoding='utf-8') as f:
                for line in f:
                    data = json.loads(line)
                    idx = data['id']
@@ -1127,7 +1127,7 @@ class AgnewsProcessor(DataProcessor):
    def _create_examples(path: str, set_type: str) -> List[InputExample]:
        examples = []

        with open(path) as f:
        with open(path, encoding='utf-8') as f:
            reader = csv.reader(f, delimiter=',')
            for idx, row in enumerate(reader):
                label, headline, body = row
@@ -1209,7 +1209,7 @@ class YelpPolarityProcessor(DataProcessor):
    def _create_examples(path: str, set_type: str) -> List[InputExample]:
        examples = []

        with open(path) as f:
        with open(path, encoding='utf-8') as f:
            reader = csv.reader(f, delimiter=',')
            for idx, row in enumerate(reader):
                label, body = row
@@ -1419,7 +1419,7 @@ class SquadProcessor(DataProcessor):
    @staticmethod
    def _create_examples(path: str, set_type: str) -> List[InputExample]:
        examples = []
        with open(path) as f:
        with open(path, encoding='utf-8') as f:
            data = json.load(f)['data']

        for idx, passage in enumerate(data):
--- a/modelscope/models/nlp/mglm/tasks/superglue/pvp.py
+++ b/modelscope/models/nlp/mglm/tasks/superglue/pvp.py
@@ -538,7 +538,7 @@ class PVP(ABC):
            dict)  # type: Dict[int, Dict[str, List[str]]]
        current_pattern_id = None

        with open(path, 'r') as fh:
        with open(path, 'r', encoding='utf-8') as fh:
            for line in fh.read().splitlines():
                if line.isdigit():
                    current_pattern_id = int(line)
--- a/modelscope/models/nlp/mglm/utils.py
+++ b/modelscope/models/nlp/mglm/utils.py
@@ -77,7 +77,7 @@ def print_and_save_args(args, verbose=True, log_dir=None):
        with open(json_file, 'w') as output:
            json.dump(vars(args), output, sort_keys=True)
        if args.deepspeed and args.deepspeed_config is not None:
            with open(args.deepspeed_config) as file:
            with open(args.deepspeed_config, encoding='utf-8') as file:
                deepspeed_config = json.load(file)
            deepspeed_json_file = os.path.join(log_dir,
                                               'config_gpt_large.json')
@@ -324,7 +324,7 @@ def get_checkpoint_iteration(load_path):
        print_rank_0('    will not load any checkpoints and will start from '
                     'random')
        return load_path, 0, False, False
    with open(tracker_filename, 'r') as f:
    with open(tracker_filename, 'r', encoding='utf-8') as f:
        metastring = f.read().strip()
        release = metastring == 'release'
        # try:
--- a/modelscope/models/science/unifold/data/residue_constants.py
+++ b/modelscope/models/science/unifold/data/residue_constants.py
@@ -443,7 +443,7 @@ def load_stereo_chemical_props():
    stereo_chemical_props_path = os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
        'stereo_chemical_props.txt')
    with open(stereo_chemical_props_path, 'rt') as f:
    with open(stereo_chemical_props_path, 'rt', encoding='utf-8') as f:
        stereo_chemical_props = f.read()
    lines_iter = iter(stereo_chemical_props.splitlines())
    # Load bond lengths.
--- a/modelscope/models/science/unifold/dataset.py
+++ b/modelscope/models/science/unifold/dataset.py
@@ -250,7 +250,7 @@ class UnifoldDataset(UnicoreDataset):
        self.path = data_path

        def load_json(filename):
            return json.load(open(filename, 'r'))
            return json.load(open(filename, 'r', encoding='utf-8'))

        sample_weight = load_json(
            os.path.join(self.path,
@@ -400,7 +400,8 @@ class UnifoldMultimerDataset(UnifoldDataset):
        self.pdb_assembly = json.load(
            open(
                os.path.join(self.data_path,
                             json_prefix + 'pdb_assembly.json')))
                             json_prefix + 'pdb_assembly.json'),
                encoding='utf-8'))
        self.pdb_chains = self.get_chains(self.inverse_multi_label)
        self.monomer_feature_path = os.path.join(self.data_path,
                                                 'pdb_features')
--- a/modelscope/models/science/unifold/msa/pipeline.py
+++ b/modelscope/models/science/unifold/msa/pipeline.py
@@ -99,7 +99,7 @@ def run_msa_tool(
            f.write(result[msa_format])
    else:
        logging.warning('Reading MSA from file %s', msa_out_path)
        with open(msa_out_path, 'r') as f:
        with open(msa_out_path, 'r', encoding='utf-8') as f:
            result = {msa_format: f.read()}
    return result

@@ -153,7 +153,7 @@ class DataPipeline:
    def process(self, input_fasta_path: str,
                msa_output_dir: str) -> FeatureDict:
        """Runs alignment tools on the input sequence and creates features."""
        with open(input_fasta_path) as f:
        with open(input_fasta_path, encoding='utf-8') as f:
            input_fasta_str = f.read()
        input_seqs, input_descs = parsers.parse_fasta(input_fasta_str)
        if len(input_seqs) != 1:
--- a/modelscope/models/science/unifold/msa/templates.py
+++ b/modelscope/models/science/unifold/msa/templates.py
@@ -155,7 +155,7 @@ def _parse_release_dates(path: str) -> Mapping[str, datetime.datetime]:
    """Parses release dates file, returns a mapping from PDBs to release dates."""
    if path.endswith('txt'):
        release_dates = {}
        with open(path, 'r') as f:
        with open(path, 'r', encoding='utf-8') as f:
            for line in f:
                pdb_id, date = line.split(':')
                date = date.strip()
--- a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
+++ b/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
@@ -106,14 +106,14 @@ class MovieSceneSegmentationDataset(TorchTaskDataset):
        self.tmpl = '{}/shot_{}_img_{}.jpg'  # video_id, shot_id, shot_num

        if not self.test_mode:
            with open(self.ann_file) as f:
            with open(self.ann_file, encoding='utf-8') as f:
                self.anno_data = json.load(f)
            self.vidsid2label = {
                f"{it['video_id']}_{it['shot_id']}": it['boundary_label']
                for it in self.anno_data
            }
        else:
            with open(self.ann_file) as f:
            with open(self.ann_file, encoding='utf-8') as f:
                self.anno_data = json.load(f)

    def init_sampler(self, cfg):
--- a/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
+++ b/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
@@ -146,7 +146,7 @@ class ReferringVideoObjectSegmentationDataset(TorchTaskDataset):
        saved_annotations_file_path = osp.join(
            root_path, f'sentences_single_frame_{subset}_annotations.json')
        if osp.exists(saved_annotations_file_path):
            with open(saved_annotations_file_path, 'r') as f:
            with open(saved_annotations_file_path, 'r', encoding='utf-8') as f:
                text_annotations_by_frame = [tuple(a) for a in json.load(f)]
                return text_annotations_by_frame
        elif (distributed and dist.get_rank() == 0) or not distributed:
@@ -203,7 +203,7 @@ class ReferringVideoObjectSegmentationDataset(TorchTaskDataset):
                json.dump(text_annotations_by_frame, f)
        if distributed:
            dist.barrier()
            with open(saved_annotations_file_path, 'r') as f:
            with open(saved_annotations_file_path, 'r', encoding='utf-8') as f:
                text_annotations_by_frame = [tuple(a) for a in json.load(f)]
        return text_annotations_by_frame

@@ -267,8 +267,10 @@ def get_text_annotations_gt(root_path, subset):
        osp.join(root_path, 'Release/videoset.csv'), header=None)
    # 'vid', 'label', 'start_time', 'end_time', 'height', 'width', 'total_frames', 'annotated_frames', 'subset'
    a2d_data_info.columns = ['vid', '', '', '', '', '', '', '', 'subset']
    with open(osp.join(root_path, 'text_annotations/missed_videos.txt'),
              'r') as f:
    with open(
            osp.join(root_path, 'text_annotations/missed_videos.txt'),
            'r',
            encoding='utf-8') as f:
        unused_videos = f.read().splitlines()
    subsets = {'train': 0, 'test': 1}
    # filter unused videos and videos which do not belong to our train/test subset:
--- a/modelscope/msdatasets/task_datasets/video_summarization_dataset.py
+++ b/modelscope/msdatasets/task_datasets/video_summarization_dataset.py
@@ -26,7 +26,7 @@ class VideoSummarizationDataset(TorchTaskDataset):
        self.list_n_frames = []
        self.list_positions = []

        with open(self.split_filename) as f:
        with open(self.split_filename, encoding='utf-8') as f:
            data = json.loads(f.read())
            for i, split in enumerate(data):
                if i == self.split_index:
--- a/modelscope/pipelines/audio/asr_inference_pipeline.py
+++ b/modelscope/pipelines/audio/asr_inference_pipeline.py
@@ -116,7 +116,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
        }

        if self.framework == Frameworks.torch:
            config_file = open(inputs['asr_model_config'])
            config_file = open(inputs['asr_model_config'], encoding='utf-8')
            root = yaml.full_load(config_file)
            config_file.close()
            frontend_conf = None
--- a/modelscope/pipelines/cv/init.py
+++ b/modelscope/pipelines/cv/init.py
@@ -59,6 +59,7 @@ if TYPE_CHECKING:
    from .mtcnn_face_detection_pipeline import MtcnnFaceDetectionPipelin
    from .hand_static_pipeline import HandStaticPipeline
    from .referring_video_object_segmentation_pipeline import ReferringVideoObjectSegmentationPipeline
    from .language_guided_video_summarization_pipeline import LanguageGuidedVideoSummarizationPipeline

 else:
    _import_structure = {
@@ -132,6 +133,9 @@ else:
        'referring_video_object_segmentation_pipeline': [
            'ReferringVideoObjectSegmentationPipeline'
        ],
        'language_guided_video_summarization_pipeline': [
            'LanguageGuidedVideoSummarizationPipeline'
        ]
    }

    import sys
--- a/modelscope/pipelines/cv/animal_recognition_pipeline.py
+++ b/modelscope/pipelines/cv/animal_recognition_pipeline.py
@@ -109,7 +109,7 @@ class AnimalRecognitionPipeline(Pipeline):

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        label_mapping_path = osp.join(self.local_path, 'label_mapping.txt')
        with open(label_mapping_path, 'r') as f:
        with open(label_mapping_path, 'r', encoding='utf-8') as f:
            label_mapping = f.readlines()
        score = torch.max(inputs['outputs'])
        inputs = {
--- a/modelscope/pipelines/cv/general_recognition_pipeline.py
+++ b/modelscope/pipelines/cv/general_recognition_pipeline.py
@@ -110,7 +110,7 @@ class GeneralRecognitionPipeline(Pipeline):

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        label_mapping_path = osp.join(self.local_path, 'meta_info.txt')
        with open(label_mapping_path, 'r') as f:
        with open(label_mapping_path, 'r', encoding='utf-8') as f:
            label_mapping = f.readlines()
        score = torch.max(inputs['outputs'])
        inputs = {
--- a/modelscope/pipelines/cv/language_guided_video_summarization_pipeline.py
+++ b/modelscope/pipelines/cv/language_guided_video_summarization_pipeline.py
@@ -0,0 +1,250 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.

 import os
 import os.path as osp
 import random
 import shutil
 import tempfile
 from typing import Any, Dict

 import clip
 import cv2
 import numpy as np
 import torch
 from PIL import Image

 from modelscope.metainfo import Pipelines
 from modelscope.models.cv.language_guided_video_summarization import \
    ClipItVideoSummarization
 from modelscope.models.cv.language_guided_video_summarization.summarizer import (
    extract_video_features, video_features_to_txt)
 from modelscope.models.cv.video_summarization import summary_format
 from modelscope.models.cv.video_summarization.summarizer import (
    generate_summary, get_change_points)
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines.base import Input, Pipeline
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.utils.config import Config
 from modelscope.utils.constant import ModelFile, Tasks
 from modelscope.utils.logger import get_logger

 logger = get_logger()


@PIPELINES.register_module(
    Tasks.language_guided_video_summarization,
    module_name=Pipelines.language_guided_video_summarization)
 class LanguageGuidedVideoSummarizationPipeline(Pipeline):

    def __init__(self, model: str, **kwargs):
        """
        use `model` to create a language guided video summarization pipeline for prediction
        Args:
            model: model id on modelscope hub.
        """
        super().__init__(model=model, auto_collate=False, **kwargs)
        logger.info(f'loading model from {model}')
        self.model_dir = model

        self.tmp_dir = kwargs.get('tmp_dir', None)
        if self.tmp_dir is None:
            self.tmp_dir = tempfile.TemporaryDirectory().name

        config_path = osp.join(model, ModelFile.CONFIGURATION)
        logger.info(f'loading config from {config_path}')
        self.cfg = Config.from_file(config_path)

        self.clip_model, self.clip_preprocess = clip.load(
            'ViT-B/32',
            device=self.device,
            download_root=os.path.join(self.model_dir, 'clip'))

        self.clipit_model = ClipItVideoSummarization(model)
        self.clipit_model = self.clipit_model.to(self.device).eval()

        logger.info('load model done')

    def preprocess(self, input: Input) -> Dict[str, Any]:
        if not isinstance(input, tuple):
            raise TypeError(f'input should be a str,'
                            f'  but got {type(input)}')

        video_path, sentences = input

        if not os.path.exists(self.tmp_dir):
            os.makedirs(self.tmp_dir)

        frames = []
        picks = []
        cap = cv2.VideoCapture(video_path)
        self.fps = cap.get(cv2.CAP_PROP_FPS)
        self.frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
        frame_idx = 0
        # extract 1 frame every 15 frames in the video and save the frame index
        while (cap.isOpened()):
            ret, frame = cap.read()
            if not ret:
                break
            if frame_idx % 15 == 0:
                frames.append(frame)
                picks.append(frame_idx)
            frame_idx += 1
        n_frame = frame_idx

        if sentences is None:
            logger.info('input sentences is none, using sentences from video!')

            tmp_path = os.path.join(self.tmp_dir, 'tmp')
            i3d_flow_path = os.path.join(self.model_dir, 'i3d/i3d_flow.pt')
            i3d_rgb_path = os.path.join(self.model_dir, 'i3d/i3d_rgb.pt')
            kinetics_class_labels = os.path.join(self.model_dir,
                                                 'i3d/label_map.txt')
            pwc_path = os.path.join(self.model_dir, 'i3d/pwc_net.pt')
            vggish_model_path = os.path.join(self.model_dir,
                                             'vggish/vggish_model.ckpt')
            vggish_pca_path = os.path.join(self.model_dir,
                                           'vggish/vggish_pca_params.npz')

            device = torch.device(
                'cuda' if torch.cuda.is_available() else 'cpu')
            i3d_feats = extract_video_features(
                video_path=video_path,
                feature_type='i3d',
                tmp_path=tmp_path,
                i3d_flow_path=i3d_flow_path,
                i3d_rgb_path=i3d_rgb_path,
                kinetics_class_labels=kinetics_class_labels,
                pwc_path=pwc_path,
                vggish_model_path=vggish_model_path,
                vggish_pca_path=vggish_pca_path,
                extraction_fps=2,
                device=device)
            rgb = i3d_feats['rgb']
            flow = i3d_feats['flow']

            device = '/gpu:0' if torch.cuda.is_available() else '/cpu:0'
            vggish = extract_video_features(
                video_path=video_path,
                feature_type='vggish',
                tmp_path=tmp_path,
                i3d_flow_path=i3d_flow_path,
                i3d_rgb_path=i3d_rgb_path,
                kinetics_class_labels=kinetics_class_labels,
                pwc_path=pwc_path,
                vggish_model_path=vggish_model_path,
                vggish_pca_path=vggish_pca_path,
                extraction_fps=2,
                device=device)
            audio = vggish['audio']

            duration_in_secs = float(self.frame_count) / self.fps

            txt = video_features_to_txt(
                duration_in_secs=duration_in_secs,
                pretrained_cap_model_path=os.path.join(
                    self.model_dir, 'bmt/sample/best_cap_model.pt'),
                prop_generator_model_path=os.path.join(
                    self.model_dir, 'bmt/sample/best_prop_model.pt'),
                features={
                    'rgb': rgb,
                    'flow': flow,
                    'audio': audio
                },
                device_id=0)
            sentences = [item['sentence'] for item in txt]

        clip_image_features = []
        for frame in frames:
            x = self.clip_preprocess(
                Image.fromarray(cv2.cvtColor(
                    frame, cv2.COLOR_BGR2RGB))).unsqueeze(0).to(self.device)
            with torch.no_grad():
                f = self.clip_model.encode_image(x).squeeze(0).cpu().numpy()
            clip_image_features.append(f)

        clip_txt_features = []
        for sentence in sentences:
            text_input = clip.tokenize(sentence).to(self.device)
            with torch.no_grad():
                text_feature = self.clip_model.encode_text(text_input).squeeze(
                    0).cpu().numpy()
            clip_txt_features.append(text_feature)
        clip_txt_features = self.sample_txt_feateures(clip_txt_features)
        clip_txt_features = np.array(clip_txt_features).reshape((1, -1))

        result = {
            'video_name': video_path,
            'clip_image_features': np.array(clip_image_features),
            'clip_txt_features': np.array(clip_txt_features),
            'n_frame': n_frame,
            'picks': np.array(picks)
        }
        return result

    def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
        clip_image_features = input['clip_image_features']
        clip_txt_features = input['clip_txt_features']
        clip_image_features = self.norm_feature(clip_image_features)
        clip_txt_features = self.norm_feature(clip_txt_features)

        change_points, n_frame_per_seg = get_change_points(
            clip_image_features, input['n_frame'])

        summary = self.inference(clip_image_features, clip_txt_features,
                                 input['n_frame'], input['picks'],
                                 change_points)

        output = summary_format(summary, self.fps)

        return {OutputKeys.OUTPUT: output}

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        if os.path.exists(self.tmp_dir):
            shutil.rmtree(self.tmp_dir)
        return inputs

    def inference(self, clip_image_features, clip_txt_features, n_frames,
                  picks, change_points):
        clip_image_features = torch.from_numpy(
            np.array(clip_image_features, np.float32)).unsqueeze(0)
        clip_txt_features = torch.from_numpy(
            np.array(clip_txt_features, np.float32)).unsqueeze(0)
        picks = np.array(picks, np.int32)

        with torch.no_grad():
            results = self.clipit_model(
                dict(
                    frame_features=clip_image_features,
                    txt_features=clip_txt_features))
            scores = results['scores']
            if not scores.device.type == 'cpu':
                scores = scores.cpu()
            scores = scores.squeeze(0).numpy().tolist()
            summary = generate_summary([change_points], [scores], [n_frames],
                                       [picks])[0]

        return summary.tolist()

    def sample_txt_feateures(self, feat, num=7):
        while len(feat) < num:
            feat.append(feat[-1])
        idxes = list(np.arange(0, len(feat)))
        samples_idx = []
        for ii in range(num):
            idx = random.choice(idxes)
            while idx in samples_idx:
                idx = random.choice(idxes)
            samples_idx.append(idx)
        samples_idx.sort()

        samples = []
        for idx in samples_idx:
            samples.append(feat[idx])
        return samples

    def norm_feature(self, frames_feat):
        for ii in range(len(frames_feat)):
            frame_feat = frames_feat[ii]
            frames_feat[ii] = frame_feat / np.linalg.norm(frame_feat)
        frames_feat = frames_feat.reshape((frames_feat.shape[0], -1))
        return frames_feat
--- a/modelscope/pipelines/cv/ocr_recognition_pipeline.py
+++ b/modelscope/pipelines/cv/ocr_recognition_pipeline.py
@@ -49,7 +49,7 @@ class OCRRecognitionPipeline(Pipeline):
        self.infer_model.load_state_dict(
            torch.load(model_path, map_location=self.device))
        self.labelMapping = dict()
        with open(label_path, 'r') as f:
        with open(label_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            cnt = 2
            for line in lines:
--- a/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py
+++ b/modelscope/pipelines/cv/referring_video_object_segmentation_pipeline.py
@@ -138,6 +138,19 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline):
            video_np = rearrange(self.video,
                                 't c h w -> t h w c').numpy() / 255.0

            # set font for text query in output video
            if self.model.cfg.pipeline.output_font:
                try:
                    font = ImageFont.truetype(
                        font=self.model.cfg.pipeline.output_font,
                        size=self.model.cfg.pipeline.output_font_size)
                except OSError:
                    logger.error('can\'t open resource %s, load default font'
                                 % self.model.cfg.pipeline.output_font)
                    font = ImageFont.load_default()
            else:
                font = ImageFont.load_default()

            # del video
            pred_masks_per_frame = rearrange(
                torch.stack(inputs), 'q t 1 h w -> t q h w').numpy()
@@ -158,12 +171,6 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline):
                W, H = vid_frame.size
                draw = ImageDraw.Draw(vid_frame)

                if self.model.cfg.pipeline.output_font:
                    font = ImageFont.truetype(
                        font=self.model.cfg.pipeline.output_font,
                        size=self.model.cfg.pipeline.output_font_size)
                else:
                    font = ImageFont.load_default()
                for i, (text_query, color) in enumerate(
                        zip(self.text_queries, colors), start=1):
                    w, h = draw.textsize(text_query, font=font)
@@ -173,9 +180,6 @@ class ReferringVideoObjectSegmentationPipeline(Pipeline):
                              fill=tuple(color) + (255, ),
                              font=font)
                masked_video.append(np.array(vid_frame))
            print(type(vid_frame))
            print(type(masked_video[0]))
            print(masked_video[0].shape)
            # generate and save the output clip:

            assert self.model.cfg.pipeline.output_path
--- a/modelscope/pipelines/cv/tinynas_classification_pipeline.py
+++ b/modelscope/pipelines/cv/tinynas_classification_pipeline.py
@@ -82,7 +82,7 @@ class TinynasClassificationPipeline(Pipeline):

    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        label_mapping_path = osp.join(self.path, 'label_map.txt')
        f = open(label_mapping_path)
        f = open(label_mapping_path, encoding='utf-8')
        content = f.read()
        f.close()
        label_dict = eval(content)
--- a/modelscope/pipelines/cv/video_category_pipeline.py
+++ b/modelscope/pipelines/cv/video_category_pipeline.py
@@ -36,7 +36,7 @@ class VideoCategoryPipeline(Pipeline):
        super().__init__(model=model, **kwargs)
        config_path = osp.join(self.model, ModelFile.CONFIGURATION)
        logger.info(f'loading configuration from {config_path}')
        with open(config_path, 'r') as f:
        with open(config_path, 'r', encoding='utf-8') as f:
            config = json.load(f)
            self.frame_num = config['frame_num']
            self.level_1_num = config['level_1_num']
--- a/modelscope/pipelines/nlp/table_question_answering_pipeline.py
+++ b/modelscope/pipelines/nlp/table_question_answering_pipeline.py
@@ -231,19 +231,6 @@ class TableQuestionAnsweringPipeline(Pipeline):
        header_ids = table['header_id'] + ['null']
        sql = result['sql']

        str_sel_list, sql_sel_list = [], []
        for idx, sel in enumerate(sql['sel']):
            header_name = header_names[sel]
            header_id = '`%s`.`%s`' % (table['table_id'], header_ids[sel])
            if sql['agg'][idx] == 0:
                str_sel_list.append(header_name)
                sql_sel_list.append(header_id)
            else:
                str_sel_list.append(self.agg_ops[sql['agg'][idx]] + '('
                                    + header_name + ')')
                sql_sel_list.append(self.agg_ops[sql['agg'][idx]] + '('
                                    + header_id + ')')

        str_cond_list, sql_cond_list = [], []
        where_conds, orderby_conds = [], []
        for cond in sql['conds']:
@@ -285,9 +272,34 @@ class TableQuestionAnsweringPipeline(Pipeline):
            if is_in:
                str_orderby += ' LIMIT %d' % (limit_num)
                sql_orderby += ' LIMIT %d' % (limit_num)
            # post process null column
            for idx, sel in enumerate(sql['sel']):
                if sel == len(header_ids) - 1:
                    primary_sel = 0
                    for index, attrib in enumerate(table['header_attribute']):
                        if attrib == 'PRIMARY':
                            primary_sel = index
                            break
                    if primary_sel not in sql['sel']:
                        sql['sel'][idx] = primary_sel
                    else:
                        del sql['sel'][idx]
        else:
            str_orderby = ''

        str_sel_list, sql_sel_list = [], []
        for idx, sel in enumerate(sql['sel']):
            header_name = header_names[sel]
            header_id = '`%s`.`%s`' % (table['table_id'], header_ids[sel])
            if sql['agg'][idx] == 0:
                str_sel_list.append(header_name)
                sql_sel_list.append(header_id)
            else:
                str_sel_list.append(self.agg_ops[sql['agg'][idx]] + '('
                                    + header_name + ')')
                sql_sel_list.append(self.agg_ops[sql['agg'][idx]] + '('
                                    + header_id + ')')

        if len(str_cond_list) != 0 and len(str_orderby) != 0:
            final_str = 'SELECT %s FROM %s WHERE %s ORDER BY %s' % (
                ', '.join(str_sel_list), table['table_name'], str_where_conds,
--- a/modelscope/pipelines/science/protein_structure_pipeline.py
+++ b/modelscope/pipelines/science/protein_structure_pipeline.py
@@ -59,8 +59,9 @@ def load_feature_for_one_target(

    else:
        uniprot_msa_dir = data_folder
        sequence_ids = open(os.path.join(data_folder,
                                         'chains.txt')).readline().split()
        sequence_ids = open(
            os.path.join(data_folder, 'chains.txt'),
            encoding='utf-8').readline().split()

    if symmetry_group is None:
        batch, _ = load_and_process(
--- a/modelscope/preprocessors/audio.py
+++ b/modelscope/preprocessors/audio.py
@@ -15,7 +15,7 @@ from modelscope.utils.constant import Fields


 def load_kaldi_feature_transform(filename):
    fp = open(filename, 'r')
    fp = open(filename, 'r', encoding='utf-8')
    all_str = fp.read()
    pos1 = all_str.find('AddShift')
    pos2 = all_str.find('[', pos1)
--- a/modelscope/preprocessors/kws.py
+++ b/modelscope/preprocessors/kws.py
@@ -78,7 +78,7 @@ class WavToLists(Preprocessor):
        assert os.path.exists(
            inputs['config_path']), 'model config yaml file does not exist'

        config_file = open(inputs['config_path'])
        config_file = open(inputs['config_path'], encoding='utf-8')
        root = yaml.full_load(config_file)
        config_file.close()

--- a/modelscope/preprocessors/multi_modal.py
+++ b/modelscope/preprocessors/multi_modal.py
@@ -145,8 +145,9 @@ class CLIPPreprocessor(Preprocessor):
            self.image_resolution = kwargs['resolution']
        else:
            self.image_resolution = json.load(
                open('{}/vision_model_config.json'.format(
                    model_dir)))['image_resolution']
                open(
                    '{}/vision_model_config.json'.format(model_dir),
                    encoding='utf-8'))['image_resolution']
        self.img_preprocess = self._build_image_transform()
        # key mapping
        # specify the input keys, compatible with training and inference whose key names may be different
--- a/modelscope/preprocessors/nlp/nlp_base.py
+++ b/modelscope/preprocessors/nlp/nlp_base.py
@@ -59,8 +59,10 @@ class NLPBasePreprocessor(Preprocessor, ABC):
            self.use_fast = False
        elif self.use_fast is None and os.path.isfile(
                os.path.join(model_dir, 'tokenizer_config.json')):
            with open(os.path.join(model_dir, 'tokenizer_config.json'),
                      'r') as f:
            with open(
                    os.path.join(model_dir, 'tokenizer_config.json'),
                    'r',
                    encoding='utf-8') as f:
                json_config = json.load(f)
                self.use_fast = json_config.get('use_fast')
        self.use_fast = False if self.use_fast is None else self.use_fast
--- a/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py
+++ b/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py
@@ -35,7 +35,10 @@ class DialogIntentPredictionPreprocessor(Preprocessor):
            self.model_dir, config=self.config)

        self.categories = None
        with open(os.path.join(self.model_dir, 'categories.json'), 'r') as f:
        with open(
                os.path.join(self.model_dir, 'categories.json'),
                'r',
                encoding='utf-8') as f:
            self.categories = json.load(f)
        assert len(self.categories) == 77

--- a/modelscope/preprocessors/nlp/space/dst_processors.py
+++ b/modelscope/preprocessors/nlp/space/dst_processors.py
@@ -184,7 +184,7 @@ class multiwoz22Processor(DSTProcessor):
    # Loads the dialogue_acts.json and returns a list
    # of slot-value pairs.
    def load_acts(self, input_file):
        with open(input_file) as f:
        with open(input_file, encoding='utf-8') as f:
            acts = json.load(f)
        s_dict = {}
        for d in acts:
--- a/modelscope/preprocessors/nlp/space/fields/gen_field.py
+++ b/modelscope/preprocessors/nlp/space/fields/gen_field.py
@@ -359,12 +359,14 @@ class MultiWOZBPETextField(BPETextField):
            test_list = [
                line.strip().lower() for line in open(
                    os.path.join(kwargs['data_dir'], 'testListFile.json'),
                    'r').readlines()
                    'r',
                    encoding='utf-8').readlines()
            ]
            dev_list = [
                line.strip().lower() for line in open(
                    os.path.join(kwargs['data_dir'], 'valListFile.json'),
                    'r').readlines()
                    'r',
                    encoding='utf-8').readlines()
            ]

            self.dev_files, self.test_files = {}, {}
--- a/modelscope/preprocessors/nlp/space/tokenizer.py
+++ b/modelscope/preprocessors/nlp/space/tokenizer.py
@@ -531,7 +531,7 @@ class GPT2Tokenizer(object):
                 special_tokens=None,
                 max_len=None):
        self.max_len = max_len if max_len is not None else int(1e12)
        self.encoder = json.load(open(vocab_file))
        self.encoder = json.load(open(vocab_file, encoding='utf-8'))
        self.decoder = {v: k for k, v in self.encoder.items()}
        self.errors = errors  # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
--- a/modelscope/preprocessors/nlp/space_T_cn/fields/database.py
+++ b/modelscope/preprocessors/nlp/space_T_cn/fields/database.py
@@ -20,9 +20,9 @@ class Database:
            self.connection_obj = sqlite3.connect(
                ':memory:', check_same_thread=False)
            self.type_dict = {'text': 'TEXT', 'number': 'INT', 'date': 'TEXT'}
        self.tables = self.init_tables(table_file_path=table_file_path)
        self.syn_dict = self.init_syn_dict(
            syn_dict_file_path=syn_dict_file_path)
        self.tables = self.init_tables(table_file_path=table_file_path)

    def __del__(self):
        if self.is_use_sqlite:
@@ -32,12 +32,12 @@ class Database:
        tables = {}
        lines = []
        if type(table_file_path) == str:
            with open(table_file_path, 'r') as fo:
            with open(table_file_path, 'r', encoding='utf-8') as fo:
                for line in fo:
                    lines.append(line)
        elif type(table_file_path) == list:
            for path in table_file_path:
                with open(path, 'r') as fo:
                with open(path, 'r', encoding='utf-8') as fo:
                    for line in fo:
                        lines.append(line)
        else:
@@ -75,6 +75,10 @@ class Database:
                        continue
                    word = str(cell).strip().lower()
                    trie_set[ii].insert(word, word)
                    if word in self.syn_dict.keys():
                        for term in self.syn_dict[word]:
                            if term.strip() != '':
                                trie_set[ii].insert(term, word)

            table['value_trie'] = trie_set

--- a/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py
+++ b/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py
@@ -45,7 +45,7 @@ class ConversationalTextToSqlPreprocessor(Preprocessor):
            and torch.cuda.is_available() else 'cpu'
        self.processor = None
        self.table_path = os.path.join(self.model_dir, 'tables.json')
        self.tables = json.load(open(self.table_path, 'r'))
        self.tables = json.load(open(self.table_path, 'r', encoding='utf-8'))
        self.output_tables = None
        self.path_cache = []
        self.graph_processor = GraphProcessor()
@@ -89,7 +89,7 @@ class ConversationalTextToSqlPreprocessor(Preprocessor):
                'local_db_path'] not in self.path_cache:
            self.path_cache.append(data['local_db_path'])
            path = os.path.join(data['local_db_path'], 'tables.json')
            self.tables = json.load(open(path, 'r'))
            self.tables = json.load(open(path, 'r', encoding='utf-8'))
            self.processor.db_dir = os.path.join(data['local_db_path'], 'db')
            self.output_tables = process_tables(self.processor, self.tables)
            Example.configuration(
--- a/modelscope/preprocessors/ofa/base.py
+++ b/modelscope/preprocessors/ofa/base.py
@@ -76,7 +76,7 @@ class OfaBasePreprocessor:
        self.constraint_trie = None
        if self.cfg.model.get('answer2label', None):
            ans2label_file = osp.join(model_dir, self.cfg.model.answer2label)
            with open(ans2label_file, 'r') as reader:
            with open(ans2label_file, 'r', encoding='utf-8') as reader:
                ans2label_dict = json.load(reader)
            self.ans2label = ans2label_dict
            self.label2ans = {v: k for k, v in self.ans2label.items()}
--- a/modelscope/preprocessors/science/uni_fold.py
+++ b/modelscope/preprocessors/science/uni_fold.py
@@ -201,7 +201,7 @@ def run_mmseqs2(
    a3m_lines = {}
    for a3m_file in a3m_files:
        update_M, M = True, None
        with open(a3m_file, 'r') as f:
        with open(a3m_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            for line in lines:
                if len(line) > 0:
--- a/modelscope/trainers/nlp/csanmt_translation_trainer.py
+++ b/modelscope/trainers/nlp/csanmt_translation_trainer.py
@@ -1,6 +1,7 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.

 import os.path as osp
 import time
 from typing import Dict, Optional

 import tensorflow as tf
@@ -122,8 +123,7 @@ class CsanmtTranslationTrainer(BaseTrainer):
        self.params['scale_l1'] = self.cfg['train']['scale_l1']
        self.params['scale_l2'] = self.cfg['train']['scale_l2']
        self.params['train_max_len'] = self.cfg['train']['train_max_len']
        self.params['max_training_steps'] = self.cfg['train'][
            'max_training_steps']
        self.params['num_of_epochs'] = self.cfg['train']['num_of_epochs']
        self.params['save_checkpoints_steps'] = self.cfg['train'][
            'save_checkpoints_steps']
        self.params['num_of_samples'] = self.cfg['train']['num_of_samples']
@@ -144,14 +144,15 @@ class CsanmtTranslationTrainer(BaseTrainer):
        vocab_src = osp.join(self.model_dir, self.params['vocab_src'])
        vocab_trg = osp.join(self.model_dir, self.params['vocab_trg'])

        epoch = 0
        iteration = 0

        with self._session.as_default() as tf_session:
            while True:
                iteration += 1
                if iteration >= self.params['max_training_steps']:
                epoch += 1
                if epoch >= self.params['num_of_epochs']:
                    break

                tf.logging.info('%s: Epoch %i' % (__name__, epoch))
                train_input_fn = input_fn(
                    train_src,
                    train_trg,
@@ -160,36 +161,44 @@ class CsanmtTranslationTrainer(BaseTrainer):
                    batch_size_words=self.params['train_batch_size_words'],
                    max_len=self.params['train_max_len'],
                    num_gpus=self.params['num_gpus']
                    if self.params['num_gpus'] > 0 else 1,
                    if self.params['num_gpus'] > 1 else 1,
                    is_train=True,
                    session=tf_session,
                    iteration=iteration)
                    epoch=epoch)

                features, labels = train_input_fn

                features_batch, labels_batch = tf_session.run(
                    [features, labels])

                feed_dict = {
                    self.source_wids: features_batch,
                    self.target_wids: labels_batch
                }
                sess_outputs = self._session.run(
                    self.output, feed_dict=feed_dict)
                loss_step = sess_outputs['loss']
                logger.info('Iteration: {}, step loss: {:.6f}'.format(
                    iteration, loss_step))

                if iteration % self.params['save_checkpoints_steps'] == 0:
                    tf.logging.info('%s: Saving model on step: %d.' %
                                    (__name__, iteration))
                    ck_path = self.model_dir + 'model.ckpt'
                    self.model_saver.save(
                        tf_session,
                        ck_path,
                        global_step=tf.train.get_global_step())

        tf.logging.info('%s: NMT training completed at time: %s.')
                try:
                    while True:
                        features_batch, labels_batch = tf_session.run(
                            [features, labels])
                        iteration += 1
                        feed_dict = {
                            self.source_wids: features_batch,
                            self.target_wids: labels_batch
                        }
                        sess_outputs = self._session.run(
                            self.output, feed_dict=feed_dict)
                        loss_step = sess_outputs['loss']
                        logger.info('Iteration: {}, step loss: {:.6f}'.format(
                            iteration, loss_step))

                        if iteration % self.params[
                                'save_checkpoints_steps'] == 0:
                            tf.logging.info('%s: Saving model on step: %d.' %
                                            (__name__, iteration))
                            ck_path = self.model_dir + 'model.ckpt'
                            self.model_saver.save(
                                tf_session,
                                ck_path,
                                global_step=tf.train.get_global_step())

                except tf.errors.OutOfRangeError:
                    tf.logging.info('epoch %d end!' % (epoch))

            tf.logging.info(
                '%s: NMT training completed at time: %s.' %
                (__name__, time.asctime(time.localtime(time.time()))))

    def evaluate(self,
                 checkpoint_path: Optional[str] = None,
@@ -222,7 +231,7 @@ def input_fn(src_file,
             num_gpus=1,
             is_train=True,
             session=None,
             iteration=None):
             epoch=None):
    src_vocab = tf.lookup.StaticVocabularyTable(
        tf.lookup.TextFileInitializer(
            src_vocab_file,
@@ -291,7 +300,7 @@ def input_fn(src_file,

    if is_train:
        session.run(iterator.initializer)
        if iteration == 1:
        if epoch == 1:
            session.run(tf.tables_initializer())
    return features, labels

--- a/modelscope/trainers/nlp/space/eval.py
+++ b/modelscope/trainers/nlp/space/eval.py
@@ -771,7 +771,8 @@ class CamRestEvaluator(GenericEvaluator):
    def get_entities(self, entity_path):
        entities_flat = []
        entitiy_to_slot_dict = {}
        raw_entities = json.loads(open(entity_path).read().lower())
        raw_entities = json.loads(
            open(entity_path, encoding='utf-8').read().lower())
        for s in raw_entities['informable']:
            entities_flat.extend(raw_entities['informable'][s])
            for v in raw_entities['informable'][s]:
--- a/modelscope/utils/audio/audio_utils.py
+++ b/modelscope/utils/audio/audio_utils.py
@@ -47,7 +47,7 @@ def update_conf(origin_config_file, new_config_file, conf_item: [str, str]):
        else:
            return None

    with open(origin_config_file) as f:
    with open(origin_config_file, encoding='utf-8') as f:
        lines = f.readlines()
    with open(new_config_file, 'w') as f:
        for line in lines:
--- a/modelscope/utils/config.py
+++ b/modelscope/utils/config.py
@@ -178,7 +178,7 @@ class Config:
        if cfg_text:
            text = cfg_text
        elif filename:
            with open(filename, 'r') as f:
            with open(filename, 'r', encoding='utf-8') as f:
                text = f.read()
        else:
            text = ''
--- a/modelscope/utils/constant.py
+++ b/modelscope/utils/constant.py
@@ -80,6 +80,7 @@ class CVTasks(object):
    video_embedding = 'video-embedding'
    virtual_try_on = 'virtual-try-on'
    movie_scene_segmentation = 'movie-scene-segmentation'
    language_guided_video_summarization = 'language-guided-video-summarization'

    # video segmentation
    referring_video_object_segmentation = 'referring-video-object-segmentation'
--- a/modelscope/utils/hub.py
+++ b/modelscope/utils/hub.py
@@ -124,7 +124,7 @@ def parse_label_mapping(model_dir):
    label2id = None
    label_path = os.path.join(model_dir, ModelFile.LABEL_MAPPING)
    if os.path.exists(label_path):
        with open(label_path) as f:
        with open(label_path, encoding='utf-8') as f:
            label_mapping = json.load(f)
        label2id = {name: idx for name, idx in label_mapping.items()}

--- a/modelscope/utils/nlp/space/clean_dataset.py
+++ b/modelscope/utils/nlp/space/clean_dataset.py
@@ -59,7 +59,9 @@ def clean_text(data_dir, text):
                  text)  # 'abc.xyz' -> 'abc . xyz'
    text = re.sub(r'(\w+)\.\.? ', r'\1 . ', text)  # if 'abc. ' -> 'abc . '

    with open(os.path.join(data_dir, 'mapping.pair'), 'r') as fin:
    with open(
            os.path.join(data_dir, 'mapping.pair'), 'r',
            encoding='utf-8') as fin:
        for line in fin.readlines():
            fromx, tox = line.replace('\n', '').split('\t')
            text = ' ' + text + ' '
--- a/modelscope/utils/nlp/space/db_ops.py
+++ b/modelscope/utils/nlp/space/db_ops.py
@@ -15,7 +15,9 @@ class MultiWozDB(object):
        self.dbs = {}
        self.sql_dbs = {}
        for domain in all_domains:
            with open(os.path.join(db_dir, db_paths[domain]), 'r') as f:
            with open(
                    os.path.join(db_dir, db_paths[domain]), 'r',
                    encoding='utf-8') as f:
                self.dbs[domain] = json.loads(f.read().lower())

    def oneHotVector(self, domain, num):
--- a/modelscope/utils/nlp/space/utils.py
+++ b/modelscope/utils/nlp/space/utils.py
@@ -146,9 +146,9 @@ class MultiWOZVocab(object):

    def load_vocab(self, vocab_path):
        self._freq_dict = json.loads(
            open(vocab_path + '.freq.json', 'r').read())
            open(vocab_path + '.freq.json', 'r', encoding='utf-8').read())
        self._word2idx = json.loads(
            open(vocab_path + '.word2idx.json', 'r').read())
            open(vocab_path + '.word2idx.json', 'r', encoding='utf-8').read())
        self._idx2word = {}
        for w, idx in self._word2idx.items():
            self._idx2word[idx] = w
--- a/requirements/cv.txt
+++ b/requirements/cv.txt
@@ -1,5 +1,7 @@
 albumentations>=1.0.3
 av>=9.2.0
 bmt_clipit>=1.0
 clip>=1.0
 easydict
 fairscale>=0.4.1
 fastai>=1.0.51
@@ -19,6 +21,7 @@ moviepy>=1.0.3
 networkx>=2.5
 numba
 onnxruntime>=1.10
 opencv-python
 pai-easycv>=0.6.3.9
 pandas
 psutil
@@ -32,3 +35,4 @@ tf_slim
 timm>=0.4.9
 torchmetrics>=0.6.2
 torchvision
 videofeatures_clipit>=1.0
--- a/requirements/framework.txt
+++ b/requirements/framework.txt
@@ -1,6 +1,6 @@
 addict
 attrs
 # version beyond 2.5.2 introduces compatbility issue and is being resolved
 # version beyond 2.5.2 introduces compatibility issue and is being resolved
 datasets<=2.5.2
 easydict
 einops
@@ -8,7 +8,6 @@ filelock>=3.3.0
 gast>=0.2.2
 jsonplus
 numpy
 opencv-python
 oss2
 Pillow>=6.2.0
 # for pyarrow 9.0.0 event_loop core dump
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def get_hash():


 def get_version():
    with open(version_file, 'r') as f:
    with open(version_file, 'r', encoding='utf-8') as f:
        exec(compile(f.read(), version_file, 'exec'))
    return locals()['__version__']

@@ -109,7 +109,7 @@ def parse_requirements(fname='requirements.txt', with_version=True):
            yield info

    def parse_require_file(fpath):
        with open(fpath, 'r') as f:
        with open(fpath, 'r', encoding='utf-8') as f:
            for line in f.readlines():
                line = line.strip()
                if line.startswith('http'):
--- a/tests/pipelines/test_language_guided_video_summarization.py
+++ b/tests/pipelines/test_language_guided_video_summarization.py
@@ -0,0 +1,49 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.

 import os
 import shutil
 import tempfile
 import unittest

 import torch

 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 from modelscope.utils.demo_utils import DemoCompatibilityCheck
 from modelscope.utils.test_utils import test_level


 class LanguageGuidedVideoSummarizationTest(unittest.TestCase,
                                           DemoCompatibilityCheck):

    def setUp(self) -> None:
        self.task = Tasks.language_guided_video_summarization
        self.model_id = 'damo/cv_clip-it_video-summarization_language-guided_en'

    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_run_modelhub(self):
        video_path = 'data/test/videos/video_category_test_video.mp4'
        # input can be sentences such as sentences=['phone', 'hand'], or sentences=None
        sentences = None
        summarization_pipeline = pipeline(
            Tasks.language_guided_video_summarization, model=self.model_id)
        result = summarization_pipeline((video_path, sentences))

        print(f'video summarization output: \n{result}.')

    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_run_modelhub_default_model(self):
        video_path = 'data/test/videos/video_category_test_video.mp4'
        summarization_pipeline = pipeline(
            Tasks.language_guided_video_summarization)
        result = summarization_pipeline(video_path)

        print(f'video summarization output:\n {result}.')

    @unittest.skip('demo compatibility test is only enabled on a needed-basis')
    def test_demo_compatibility(self):
        self.compatibility_check()


 if __name__ == '__main__':
    unittest.main()
--- a/tests/pipelines/test_referring_video_object_segmentation.py
+++ b/tests/pipelines/test_referring_video_object_segmentation.py
@@ -14,7 +14,7 @@ class ReferringVideoObjectSegmentationTest(unittest.TestCase,
        self.task = Tasks.referring_video_object_segmentation
        self.model_id = 'damo/cv_swin-t_referring_video-object-segmentation'

    @unittest.skip('skip since the model is set to private for now')
    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_referring_video_object_segmentation(self):
        input_location = 'data/test/videos/referring_video_object_segmentation_test_video.mp4'
        text_queries = [
@@ -31,7 +31,7 @@ class ReferringVideoObjectSegmentationTest(unittest.TestCase,
        else:
            raise ValueError('process error')

    @unittest.skip('skip since the model is set to private for now')
    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_referring_video_object_segmentation_with_default_task(self):
        input_location = 'data/test/videos/referring_video_object_segmentation_test_video.mp4'
        text_queries = [
--- a/tests/pipelines/test_table_question_answering.py
+++ b/tests/pipelines/test_table_question_answering.py
@@ -24,13 +24,10 @@ def tableqa_tracking_and_print_results_with_history(
        'utterance': [
            '有哪些风险类型？',
            '风险类型有多少种？',
            '珠江流域的小(2)型水库的库容总量是多少？',
            '珠江流域的小型水库的库容总量是多少？',
            '那平均值是多少？',
            '那水库的名称呢？',
            '换成中型的呢？',
            '枣庄营业厅的电话',
            '那地址呢？',
            '枣庄营业厅的电话和地址',
        ]
    }
    for p in pipelines:
@@ -55,9 +52,7 @@ def tableqa_tracking_and_print_results_without_history(
        'utterance': [
            '有哪些风险类型？',
            '风险类型有多少种？',
            '珠江流域的小(2)型水库的库容总量是多少？',
            '枣庄营业厅的电话',
            '枣庄营业厅的电话和地址',
            '珠江流域的小型水库的库容总量是多少？',
        ]
    }
    for p in pipelines:
@@ -77,13 +72,10 @@ def tableqa_tracking_and_print_results_with_tableid(
        'utterance': [
            ['有哪些风险类型？', 'fund'],
            ['风险类型有多少种？', 'reservoir'],
            ['珠江流域的小(2)型水库的库容总量是多少？', 'reservoir'],
            ['珠江流域的小型水库的库容总量是多少？', 'reservoir'],
            ['那平均值是多少？', 'reservoir'],
            ['那水库的名称呢？', 'reservoir'],
            ['换成中型的呢？', 'reservoir'],
            ['枣庄营业厅的电话', 'business'],
            ['那地址呢？', 'business'],
            ['枣庄营业厅的电话和地址', 'business'],
        ],
    }
    for p in pipelines:
@@ -157,7 +149,7 @@ class TableQuestionAnswering(unittest.TestCase):
                    os.path.join(model.model_dir, 'databases'))
            ],
            syn_dict_file_path=os.path.join(model.model_dir, 'synonym.txt'),
            is_use_sqlite=False)
            is_use_sqlite=True)
        preprocessor = TableQuestionAnsweringPreprocessor(
            model_dir=model.model_dir, db=db)
        pipelines = [
--- a/tests/run.py
+++ b/tests/run.py
@@ -247,7 +247,7 @@ def run_in_subprocess(args):
        test_suite_env_map[test_suite_file] = 'default'

    if args.run_config is not None and Path(args.run_config).exists():
        with open(args.run_config) as f:
        with open(args.run_config, encoding='utf-8') as f:
            run_config = yaml.load(f, Loader=yaml.FullLoader)
        if 'isolated' in run_config:
            isolated_cases = run_config['isolated']
--- a/tests/run_config.yaml
+++ b/tests/run_config.yaml
@@ -12,6 +12,7 @@ isolated:  # test cases that may require excessive anmount of GPU memory, which
  - test_segmentation_pipeline.py
  - test_movie_scene_segmentation.py
  - test_image_inpainting.py
  - test_mglm_text_summarization.py

 envs:
  default: # default env, case not in other env will in default, pytorch.
--- a/tests/trainers/easycv/test_easycv_trainer.py
+++ b/tests/trainers/easycv/test_easycv_trainer.py
@@ -109,7 +109,7 @@ class EasyCVTrainerTestSingleGpu(unittest.TestCase):
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)

        with open(json_files[0], 'r') as f:
        with open(json_files[0], 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]

        self.assertDictContainsSubset(
@@ -185,7 +185,7 @@ class EasyCVTrainerTestMultiGpus(DistributedTestCase):
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)

        with open(json_files[0], 'r') as f:
        with open(json_files[0], 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]

        self.assertDictContainsSubset(
--- a/tests/trainers/test_image_denoise_trainer.py
+++ b/tests/trainers/test_image_denoise_trainer.py
@@ -62,7 +62,7 @@ class ImageDenoiseTrainerTest(unittest.TestCase):
        trainer.train()
        results_files = os.listdir(self.tmp_dir)
        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
        for i in range(2):
        for i in range(1):
            self.assertIn(f'epoch_{i+1}.pth', results_files)

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@@ -73,13 +73,13 @@ class ImageDenoiseTrainerTest(unittest.TestCase):
            model=model,
            train_dataset=self.dataset_train,
            eval_dataset=self.dataset_val,
            max_epochs=2,
            max_epochs=1,
            work_dir=self.tmp_dir)
        trainer = build_trainer(default_args=kwargs)
        trainer.train()
        results_files = os.listdir(self.tmp_dir)
        self.assertIn(f'{trainer.timestamp}.log.json', results_files)
        for i in range(2):
        for i in range(1):
            self.assertIn(f'epoch_{i+1}.pth', results_files)


--- a/tests/trainers/test_referring_video_object_segmentation_trainer.py
+++ b/tests/trainers/test_referring_video_object_segmentation_trainer.py
@@ -7,8 +7,8 @@ import zipfile

 from modelscope.hub.snapshot_download import snapshot_download
 from modelscope.metainfo import Trainers
 from modelscope.models.cv.movie_scene_segmentation import \
    MovieSceneSegmentationModel
 from modelscope.models.cv.referring_video_object_segmentation import \
    ReferringVideoObjectSegmentation
 from modelscope.msdatasets import MsDataset
 from modelscope.trainers import build_trainer
 from modelscope.utils.config import Config, ConfigDict
@@ -46,7 +46,6 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase):
            dataset_name=train_data_cfg.name,
            split=train_data_cfg.split,
            cfg=train_data_cfg.cfg,
            namespace='damo',
            test_mode=train_data_cfg.test_mode)
        assert next(
            iter(self.train_dataset.config_kwargs['split_config'].values()))
@@ -55,14 +54,13 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase):
            dataset_name=test_data_cfg.name,
            split=test_data_cfg.split,
            cfg=test_data_cfg.cfg,
            namespace='damo',
            test_mode=test_data_cfg.test_mode)
        assert next(
            iter(self.test_dataset.config_kwargs['split_config'].values()))

        self.max_epochs = max_epochs

    @unittest.skip('skip since the model is set to private for now')
    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_trainer(self):
        kwargs = dict(
            model=self.model_id,
@@ -77,11 +75,11 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase):
        results_files = os.listdir(trainer.work_dir)
        self.assertIn(f'{trainer.timestamp}.log.json', results_files)

    @unittest.skip('skip since the model is set to private for now')
    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_trainer_with_model_and_args(self):

        cache_path = snapshot_download(self.model_id)
        model = MovieSceneSegmentationModel.from_pretrained(cache_path)
        model = ReferringVideoObjectSegmentation.from_pretrained(cache_path)
        kwargs = dict(
            cfg_file=os.path.join(cache_path, ModelFile.CONFIGURATION),
            model=model,
--- a/tests/trainers/test_trainer.py
+++ b/tests/trainers/test_trainer.py
@@ -248,7 +248,7 @@ class TrainerTest(unittest.TestCase):
        results_files = os.listdir(self.tmp_dir)

        json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json')
        with open(json_file, 'r') as f:
        with open(json_file, 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]
        self.assertDictContainsSubset(
            {
@@ -367,7 +367,7 @@ class TrainerTest(unittest.TestCase):
        trainer.train()
        results_files = os.listdir(self.tmp_dir)
        json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json')
        with open(json_file, 'r') as f:
        with open(json_file, 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]
        self.assertDictContainsSubset(
            {
--- a/tests/trainers/test_trainer_gpu.py
+++ b/tests/trainers/test_trainer_gpu.py
@@ -142,7 +142,7 @@ class TrainerTestSingleGpu(unittest.TestCase):
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)

        with open(json_files[0], 'r') as f:
        with open(json_files[0], 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]
        self.assertDictContainsSubset(
            {
@@ -236,7 +236,7 @@ class TrainerTestMultiGpus(DistributedTestCase):
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)

        with open(json_files[0], 'r') as f:
        with open(json_files[0], 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]

        self.assertDictContainsSubset(
@@ -320,7 +320,7 @@ class TrainerTestMultiGpus(DistributedTestCase):
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)

        with open(json_files[0], 'r') as f:
        with open(json_files[0], 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]

        print(results_files, lines)
--- a/tests/trainers/test_translation_trainer.py
+++ b/tests/trainers/test_translation_trainer.py
@@ -6,11 +6,17 @@ from modelscope.utils.test_utils import test_level


 class TranslationTest(unittest.TestCase):
    model_id = 'damo/nlp_csanmt_translation_zh2en'

    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_run_with_model_name(self):
        trainer = CsanmtTranslationTrainer(model=self.model_id)
    def test_run_with_model_name_for_en2zh(self):
        model_id = 'damo/nlp_csanmt_translation_en2zh'
        trainer = CsanmtTranslationTrainer(model=model_id)
        trainer.train()

    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_run_with_model_name_for_en2fr(self):
        model_id = 'damo/nlp_csanmt_translation_en2fr'
        trainer = CsanmtTranslationTrainer(model=model_id)
        trainer.train()