| @@ -1,7 +1,7 @@ | |||
|  | |||
| ============================================================ | |||
| - [What is MindSpore?](#what-is-mindspore) | |||
| - [What Is MindSpore?](#what-is-mindspore) | |||
| - [Automatic Differentiation](#automatic-differentiation) | |||
| - [Automatic Parallel](#automatic-parallel) | |||
| - [Installation](#installation) | |||
| @@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem. | |||
| <img src="docs/MindSpore-architecture.png" alt="MindSpore Architecture" width="600"/> | |||
| For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.1.0-alpha/architecture.html). | |||
| For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.2.0-alpha/architecture.html). | |||
| ### Automatic Differentiation | |||
| @@ -76,13 +76,36 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex | |||
| 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package. | |||
| ``` | |||
| pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl | |||
| pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl | |||
| ``` | |||
| 2. Run the following command to verify the install. | |||
| ```python | |||
| import numpy as np | |||
| import mindspore.context as context | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| class Mul(nn.Cell): | |||
| def __init__(self): | |||
| super(Mul, self).__init__() | |||
| self.mul = P.Mul() | |||
| def construct(self, x, y): | |||
| return self.mul(x, y) | |||
| x = Tensor(np.array([1.0, 2.0, 3.0]).astype(np.float32)) | |||
| y = Tensor(np.array([4.0, 5.0, 6.0]).astype(np.float32)) | |||
| mul = Mul() | |||
| print(mul(x, y)) | |||
| ``` | |||
| python -c 'import mindspore' | |||
| ``` | |||
| [ 4. 10. 18.] | |||
| ``` | |||
| ### From Source | |||
| @@ -96,20 +119,22 @@ currently the containerized build options are supported as follows: | |||
| | Hardware Platform | Docker Image Repository | Tag | Description | | |||
| | :---------------- | :---------------------- | :-- | :---------- | | |||
| | CPU | `mindspore/mindspore-cpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` CPU release. | | |||
| | CPU | `mindspore/mindspore-cpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` CPU release. | | |||
| | | | `devel` | Development environment provided to build MindSpore (with `CPU` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. | | |||
| | | | `runtime` | Runtime environment provided to install MindSpore binary package with `CPU` backend. | | |||
| | GPU | `mindspore/mindspore-gpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` GPU release. | | |||
| | GPU | `mindspore/mindspore-gpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` GPU release. | | |||
| | | | `devel` | Development environment provided to build MindSpore (with `GPU CUDA10.1` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. | | |||
| | | | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU` backend. | | |||
| | | | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU CUDA10.1` backend. | | |||
| | Ascend | <center>—</center> | <center>—</center> | Coming soon. | | |||
| > **NOTICE:** For GPU `devel` docker image, it's NOT suggested to directly install the whl package after building from the source, instead we strongly RECOMMEND you transfer and install the whl package inside GPU `runtime` docker image. | |||
| * CPU | |||
| For `CPU` backend, you can directly pull and run the image using the below command: | |||
| For `CPU` backend, you can directly pull and run the latest stable image using the below command: | |||
| ``` | |||
| docker pull mindspore/mindspore-cpu:0.1.0-alpha | |||
| docker run -it mindspore/mindspore-cpu:0.1.0-alpha python -c 'import mindspore' | |||
| docker pull mindspore/mindspore-cpu:0.2.0-alpha | |||
| docker run -it mindspore/mindspore-cpu:0.2.0-alpha /bin/bash | |||
| ``` | |||
| * GPU | |||
| @@ -124,20 +149,21 @@ currently the containerized build options are supported as follows: | |||
| sudo systemctl restart docker | |||
| ``` | |||
| Then you can pull and run the image using the below command: | |||
| Then you can pull and run the latest stable image using the below command: | |||
| ``` | |||
| docker pull mindspore/mindspore-gpu:0.1.0-alpha | |||
| docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.1.0-alpha /bin/bash | |||
| docker pull mindspore/mindspore-gpu:0.2.0-alpha | |||
| docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.2.0-alpha /bin/bash | |||
| ``` | |||
| To test if the docker image works, please execute the python code below and check the output: | |||
| ```python | |||
| import numpy as np | |||
| import mindspore.context as context | |||
| from mindspore import Tensor | |||
| from mindspore.ops import functional as F | |||
| import mindspore.context as context | |||
| context.set_context(device_target="GPU") | |||
| x = Tensor(np.ones([1,3,3,4]).astype(np.float32)) | |||
| y = Tensor(np.ones([1,3,3,4]).astype(np.float32)) | |||
| print(F.tensor_add(x, y)) | |||
| @@ -157,11 +183,11 @@ currently the containerized build options are supported as follows: | |||
| ``` | |||
| If you want to learn more about the building process of MindSpore docker images, | |||
| please check out `docker` folder for the details. | |||
| please check out [docker](docker/README.md) repo for the details. | |||
| ## Quickstart | |||
| See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.1.0-alpha/quick_start/quick_start.html) | |||
| See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.2.0-alpha/quick_start/quick_start.html) | |||
| to implement the image classification. | |||
| ## Docs | |||
| @@ -1,3 +1,75 @@ | |||
| # Release 0.2.0-alpha | |||
| ## Major Features and Improvements | |||
| ### Ascend 910 Training and Inference Framework | |||
| * New models | |||
| * MobileNetV2: Inverted Residuals and Linear Bottlenecks. | |||
| * ResNet101: Deep Residual Learning for Image Recognition. | |||
| * Frontend and User Interface | |||
| * Support for all python comparison operators. | |||
| * Support for math operators **,//,%. Support for other python operators like and/or/not/is/is not/ in/ not in. | |||
| * Support for the gradients of function with variable arguments. | |||
| * Support for tensor indexing assignment for certain indexing type. | |||
| * Support for dynamic learning rate. | |||
| * User interfaces change log | |||
| * DepthwiseConv2dNative, DepthwiseConv2dNativeBackpropFilter, DepthwiseConv2dNativeBackpropInput([!424](https://gitee.com/mindspore/mindspore/pulls/424)) | |||
| * ReLU6, ReLU6Grad([!224](https://gitee.com/mindspore/mindspore/pulls/224)) | |||
| * GeneratorDataset([!183](https://gitee.com/mindspore/mindspore/pulls/183)) | |||
| * VOCDataset([!477](https://gitee.com/mindspore/mindspore/pulls/477)) | |||
| * MindDataset, PKSampler([!514](https://gitee.com/mindspore/mindspore/pulls/514)) | |||
| * map([!506](https://gitee.com/mindspore/mindspore/pulls/506)) | |||
| * Conv([!226](https://gitee.com/mindspore/mindspore/pulls/226)) | |||
| * Adam([!253](https://gitee.com/mindspore/mindspore/pulls/253)) | |||
| * _set_fusion_strategy_by_idx, _set_fusion_strategy_by_size([!189](https://gitee.com/mindspore/mindspore/pulls/189)) | |||
| * CheckpointConfig([!122](https://gitee.com/mindspore/mindspore/pulls/122)) | |||
| * Constant([!54](https://gitee.com/mindspore/mindspore/pulls/54)) | |||
| * Executor and Performance Optimization | |||
| * Support parallel execution of data prefetching and forward/backward computing. | |||
| * Support parallel execution of gradient aggregation and forward/backward computing in distributed training scenarios. | |||
| * Support operator fusion optimization. | |||
| * Optimize compilation process and improve the performance. | |||
| * Data processing, augmentation, and save format | |||
| * Support multi-process of GeneratorDataset/PyFunc for high performance | |||
| * Support variable batchsize | |||
| * Support new Dataset operators, such as filter,skip,take,TextLineDataset | |||
| ### Other Hardware Support | |||
| * GPU platform | |||
| * Use dynamic memory pool by default on GPU. | |||
| * Support parallel execution of computation and communication. | |||
| * Support continuous address allocation by memory pool. | |||
| * CPU platform | |||
| * Support for windows 10 OS. | |||
| ## Bugfixes | |||
| * Models | |||
| * Fix mixed precision bug for VGG16 model ([!629](https://gitee.com/mindspore/mindspore/pulls/629)). | |||
| * Python API | |||
| * Fix ControlDepend operator bugs on CPU and GPU ([!396](https://gitee.com/mindspore/mindspore/pulls/396)). | |||
| * Fix ArgMinWithValue operator bugs ([!338](https://gitee.com/mindspore/mindspore/pulls/338)). | |||
| * Fix Dense operator bugs on PyNative mode ([!276](https://gitee.com/mindspore/mindspore/pulls/276)). | |||
| * Fix MatMul operator bugs on PyNative mode ([!288](https://gitee.com/mindspore/mindspore/pulls/288)). | |||
| * Executor | |||
| * Fix operator selection bugs and make it general ([!300](https://gitee.com/mindspore/mindspore/pulls/300)). | |||
| * Fix memory reuse bug for GetNext op ([!291](https://gitee.com/mindspore/mindspore/pulls/291)). | |||
| * GPU platform | |||
| * Fix memory allocation in multi-graph scenarios ([!444](https://gitee.com/mindspore/mindspore/pulls/444)). | |||
| * Fix bias_add_grad under fp16 precision ([!598](https://gitee.com/mindspore/mindspore/pulls/598)). | |||
| * Fix support for fp16 kernels on nvidia 1080Ti([!571](https://gitee.com/mindspore/mindspore/pulls/571)). | |||
| * Fix parsing of tuple type parameters ([!316](https://gitee.com/mindspore/mindspore/pulls/316)). | |||
| * Data processing | |||
| * Fix TypeErrors about can't pickle mindspore._c_dataengine.DEPipeline objects([!434](https://gitee.com/mindspore/mindspore/pulls/434)). | |||
| * Add TFRecord file verification([!406](https://gitee.com/mindspore/mindspore/pulls/406)). | |||
| ## Contributors | |||
| Thanks goes to these wonderful people: | |||
| Alexey_Shevlyakov, Cathy, Chong, Hoai, Jonathan, Junhan, JunhanHu, Peilin, SanjayChan, StrawNoBerry, VectorSL, Wei, WeibiaoYu, Xiaoda, Yanjun, YuJianfeng, ZPaC, Zhang, ZhangQinghua, ZiruiWu, amongo, anthonyaje, anzhengqi, biffex, caifubi, candanzg, caojian05, casgj, cathwong, ch-l, chang, changzherui, chenfei, chengang, chenhaozhe, chenjianping, chentingting, chenzomi, chujinjin, dengwentao, dinghao, fanglei, fary86, flywind, gaojing, geekun, gengdongjie, ghzl, gong, gongchen, gukecai, guohongzilong, guozhijian, gziyan, h.farahat, hesham, huangdongrun, huanghui, jiangzhiwen, jinyaohui, jjfeing, jojobugfree, jonathan_yan, jonyguo, jzw, kingfo, kisnwang, laiyongqiang, leonwanghui, lianliguang, lichen, lichenever, limingqi107, liubuyu, liuxiao, liyong, liyong126, lizhenyu, lupengcheng, lvliang, maoweiyong, ms_yan, mxm, ougongchang, panfengfeng, panyifeng, pengyanjun, penn, qianlong, seatea, simson, suteng, thlinh, vlne-v1, wangchengke, wanghua, wangnan39, wangqiuliang, wenchunjiang, wenkai, wukesong, xiefangqi, xulei, yanghaitao, yanghaoran, yangjie159, yangzhenzhang, yankai10, yanzhenxiang2020, yao_yf, yoonlee666, zhangbuxue, zhangz0911gm, zhangzheng, zhaojichen, zhaoting, zhaozhenlong, zhongligeng, zhoufeng, zhousiyi, zjun, zyli2020, yuhuijun, limingqi107, lizhenyu, chenweifeng. | |||
| Contributions of any kind are welcome! | |||
| # Release 0.1.0-alpha | |||
| ## Main Features | |||
| @@ -3042,6 +3042,60 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", AND | |||
| Why Three Licenses? | |||
| The zlib License could have been used instead of the Modified (3-clause) BSD License, and since the IJG License effectively subsumes the distribution conditions of the zlib License, this would have effectively placed libjpeg-turbo binary distributions under the IJG License. However, the IJG License specifically refers to the Independent JPEG Group and does not extend attribution and endorsement protections to other entities. Thus, it was desirable to choose a license that granted us the same protections for new code that were granted to the IJG for code derived from their software. | |||
| Software: libtiff 4.1.0 | |||
| Copyright notice: | |||
| Copyright © 2015 Open Microscopy Environment / University of Dundee | |||
| Copyright (c) 2004, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) 1990-1997 Sam Leffler | |||
| Copyright (c) 1991-1997 Silicon Graphics, Inc. | |||
| Copyright (c) 1988-1997 Sam Leffler | |||
| Copyright (c) 1991-1997 Sam Leffler | |||
| Use and Copyright | |||
| Copyright (C) 1990, 1995 Frank D. Cringle. | |||
| Copyright (c) 1994-1997 Sam Leffler | |||
| Copyright (c) 1994-1997 Silicon Graphics, Inc. | |||
| Copyright (c) 1997 Greg Ward Larson | |||
| Copyright (c) 1997 Silicon Graphics, Inc. | |||
| Copyright (c) 2010, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) Joris Van Damme <info@awaresystems.be> | |||
| Copyright (c) AWare Systems <http:www.awaresystems.be/> | |||
| Copyright (c) 1996-1997 Sam Leffler | |||
| Copyright (c) 1996 Pixar | |||
| Copyright (c) 1995-1997 Sam Leffler | |||
| Copyright (c) 1995-1997 Silicon Graphics, Inc. | |||
| Copyright (c) 1988-1996 Sam Leffler | |||
| Copyright (c) 1991-1996 Silicon Graphics, Inc. | |||
| Copyright (c) 1992-1997 Sam Leffler | |||
| Copyright (c) 1992-1997 Silicon Graphics, Inc. | |||
| Copyright (c) 2018, Mapbox | |||
| Copyright (c) 2017, Planet Labs | |||
| Copyright (c) 1990 by Sun Microsystems, Inc. | |||
| Copyright 1990 by Digital Equipment Corporation, Maynard, Massachusetts. | |||
| Copyright 1991 by Digital Equipment Corporation, Maynard, Massachusetts. | |||
| Copyright (c) 2002, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) 2003 Ross Finlayson | |||
| Additions (c) Richard Nolde 2006-2010 | |||
| Copyright (c) 2003, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) 2000, Frank Warmerdam | |||
| Copyright (c) 1987, 1993, 1994 | |||
| Copyright (c) 1989, 1993 | |||
| Copyright (c) 2009 Frank Warmerdam | |||
| Copyright (c) 1987, 1993 | |||
| Copyright (c) 2005 The DragonFly Project. All rights reserved. | |||
| Copyright (c) 2003 Citrus Project, | |||
| All rights reserved. | |||
| Copyright (c) 1990, 1993 | |||
| Copyright (c) 1996 Mike Johnson | |||
| Copyright (c) 1996 BancTec AB | |||
| Copyright (c) 2004, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) 2012, Frank Warmerdam <warmerdam@pobox.com> | |||
| Copyright (c) 2019, Even Rouault <even.rouault at spatialys.com> | |||
| Copyright (c) 2007, Frank Warmerdam <warmerdam@pobox.com> | |||
| Copyright (c) 2019, Thomas Bernard <miniupnp@free.fr> | |||
| Copyright (c) 2008, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) 1999, Frank Warmerdam | |||
| Copyright (c) 1991-1996 Sam Leffler | |||
| Copyright (c) 1996 USAF Phillips Laboratory | |||
| Software: opencv 4.2.0 | |||
| Copyright notice: | |||
| @@ -14,27 +14,27 @@ | |||
| @rem ============================================================================ | |||
| @echo off | |||
| @title mindspore_build | |||
| SET BASEPATH=%CD% | |||
| IF NOT EXIST %BASEPATH%/build ( | |||
| md "build" | |||
| ) | |||
| cd %BASEPATH%/build | |||
| SET BUILD_PATH=%CD% | |||
| IF NOT EXIST %BUILD_PATH%/mindspore ( | |||
| md "mindspore" | |||
| ) | |||
| cd %CD%/mindspore | |||
| cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_CPU=ON -DENABLE_MINDDATA=ON -DUSE_GLOG=ON -G "CodeBlocks - MinGW Makefiles" ../.. | |||
| IF NOT %errorlevel% == 0 ( | |||
| echo "cmake fail." | |||
| goto run_fail | |||
| ) | |||
| IF "%1%" == "" ( | |||
| cmake --build . --target package -- -j6 | |||
| ) ELSE ( | |||
| @@ -433,9 +433,9 @@ build_predict() | |||
| cd "${BASEPATH}/predict/output/" | |||
| if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then | |||
| tar -cf MSPredict-0.1.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed | |||
| tar -cf MSPredict-0.2.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed | |||
| elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then | |||
| tar -cf MSPredict-0.1.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed | |||
| tar -cf MSPredict-0.2.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed | |||
| fi | |||
| echo "success to build predict project!" | |||
| } | |||
| @@ -4,14 +4,13 @@ This folder hosts all the `Dockerfile` to build MindSpore container images with | |||
| ### MindSpore docker build command | |||
| * CPU | |||
| | Hardware Platform | Version | Build Command | | |||
| | :---------------- | :------ | :------------ | | |||
| | CPU | `x.y.z` | cd mindspore-cpu/x.y.z && docker build . -t mindspore/mindspore-cpu:x.y.z | | |||
| | | `devel` | cd mindspore-cpu/devel && docker build . -t mindspore/mindspore-cpu:devel | | |||
| | | `runtime` | cd mindspore-cpu/runtime && docker build . -t mindspore/mindspore-cpu:runtime | | |||
| | GPU | `x.y.z` | cd mindspore-gpu/x.y.z && docker build . -t mindspore/mindspore-gpu:x.y.z | | |||
| | | `devel` | cd mindspore-gpu/devel && docker build . -t mindspore/mindspore-gpu:devel | | |||
| | | `runtime` | cd mindspore-gpu/runtime && docker build . -t mindspore/mindspore-gpu:runtime | | |||
| ``` | |||
| cd mindspore-cpu/0.1.0-alpha && docker build . -t mindspore/mindspore-cpu:0.1.0-alpha | |||
| ``` | |||
| * GPU | |||
| ``` | |||
| cd mindspore-gpu/0.1.0-alpha && docker build . -t mindspore/mindspore-gpu:0.1.0-alpha | |||
| ``` | |||
| > **NOTICE:** The `x.y.z` version shown above should be replaced with the real version number. | |||
| @@ -64,4 +64,4 @@ RUN mkdir -pv /root/.pip \ | |||
| && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf | |||
| # Install MindSpore cpu whl package | |||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl | |||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl | |||
| @@ -0,0 +1,67 @@ | |||
| FROM ubuntu:18.04 | |||
| MAINTAINER leonwanghui <leon.wanghui@huawei.com> | |||
| # Set env | |||
| ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5 | |||
| ENV PATH /usr/local/bin:$PATH | |||
| # Install base tools | |||
| RUN apt update \ | |||
| && DEBIAN_FRONTEND=noninteractive apt install -y \ | |||
| vim \ | |||
| wget \ | |||
| curl \ | |||
| xz-utils \ | |||
| net-tools \ | |||
| openssh-client \ | |||
| git \ | |||
| ntpdate \ | |||
| tzdata \ | |||
| tcl \ | |||
| sudo \ | |||
| bash-completion | |||
| # Install compile tools | |||
| RUN DEBIAN_FRONTEND=noninteractive apt install -y \ | |||
| gcc \ | |||
| g++ \ | |||
| zlibc \ | |||
| make \ | |||
| libgmp-dev \ | |||
| patch \ | |||
| autoconf \ | |||
| libtool \ | |||
| automake \ | |||
| flex | |||
| # Set bash | |||
| RUN echo "dash dash/sh boolean false" | debconf-set-selections | |||
| RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash | |||
| # Install python (v3.7.5) | |||
| RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ | |||
| libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \ | |||
| && cd /tmp \ | |||
| && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \ | |||
| && tar -xvf v3.7.5.tar.gz \ | |||
| && cd /tmp/cpython-3.7.5 \ | |||
| && mkdir -p ${PYTHON_ROOT_PATH} \ | |||
| && ./configure --prefix=${PYTHON_ROOT_PATH} \ | |||
| && make -j4 \ | |||
| && make install -j4 \ | |||
| && rm -f /usr/local/bin/python \ | |||
| && rm -f /usr/local/bin/pip \ | |||
| && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ | |||
| && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ | |||
| && rm -rf /tmp/cpython-3.7.5 \ | |||
| && rm -f /tmp/v3.7.5.tar.gz | |||
| # Set pip source | |||
| RUN mkdir -pv /root/.pip \ | |||
| && echo "[global]" > /root/.pip/pip.conf \ | |||
| && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \ | |||
| && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf | |||
| # Install MindSpore cpu whl package | |||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl | |||
| @@ -80,4 +80,4 @@ RUN cd /tmp \ | |||
| && rm -f /tmp/openmpi-3.1.5.tar.gz | |||
| # Install MindSpore cuda-10.1 whl package | |||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl | |||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl | |||
| @@ -0,0 +1,83 @@ | |||
| FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 | |||
| MAINTAINER leonwanghui <leon.wanghui@huawei.com> | |||
| # Set env | |||
| ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5 | |||
| ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5 | |||
| ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH | |||
| ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH | |||
| # Install base tools | |||
| RUN apt update \ | |||
| && DEBIAN_FRONTEND=noninteractive apt install -y \ | |||
| vim \ | |||
| wget \ | |||
| curl \ | |||
| xz-utils \ | |||
| net-tools \ | |||
| openssh-client \ | |||
| git \ | |||
| ntpdate \ | |||
| tzdata \ | |||
| tcl \ | |||
| sudo \ | |||
| bash-completion | |||
| # Install compile tools | |||
| RUN DEBIAN_FRONTEND=noninteractive apt install -y \ | |||
| gcc \ | |||
| g++ \ | |||
| zlibc \ | |||
| make \ | |||
| libgmp-dev \ | |||
| patch \ | |||
| autoconf \ | |||
| libtool \ | |||
| automake \ | |||
| flex \ | |||
| libnccl2=2.4.8-1+cuda10.1 \ | |||
| libnccl-dev=2.4.8-1+cuda10.1 | |||
| # Set bash | |||
| RUN echo "dash dash/sh boolean false" | debconf-set-selections | |||
| RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash | |||
| # Install python (v3.7.5) | |||
| RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ | |||
| libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \ | |||
| && cd /tmp \ | |||
| && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \ | |||
| && tar -xvf v3.7.5.tar.gz \ | |||
| && cd /tmp/cpython-3.7.5 \ | |||
| && mkdir -p ${PYTHON_ROOT_PATH} \ | |||
| && ./configure --prefix=${PYTHON_ROOT_PATH} \ | |||
| && make -j4 \ | |||
| && make install -j4 \ | |||
| && rm -f /usr/local/bin/python \ | |||
| && rm -f /usr/local/bin/pip \ | |||
| && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ | |||
| && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ | |||
| && rm -rf /tmp/cpython-3.7.5 \ | |||
| && rm -f /tmp/v3.7.5.tar.gz | |||
| # Set pip source | |||
| RUN mkdir -pv /root/.pip \ | |||
| && echo "[global]" > /root/.pip/pip.conf \ | |||
| && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \ | |||
| && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf | |||
| # Install openmpi (v3.1.5) | |||
| RUN cd /tmp \ | |||
| && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \ | |||
| && tar -xvf openmpi-3.1.5.tar.gz \ | |||
| && cd /tmp/openmpi-3.1.5 \ | |||
| && mkdir -p ${OMPI_ROOT_PATH} \ | |||
| && ./configure --prefix=${OMPI_ROOT_PATH} \ | |||
| && make -j4 \ | |||
| && make install -j4 \ | |||
| && rm -rf /tmp/openmpi-3.1.5 \ | |||
| && rm -f /tmp/openmpi-3.1.5.tar.gz | |||
| # Install MindSpore cuda-10.1 whl package | |||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore_gpu-0.2.0-cp37-cp37m-linux_x86_64.whl | |||
| @@ -4,8 +4,8 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base]( | |||
| ## Requirements | |||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| - Download the zhwiki dataset from <https://dumps.wikimedia.org/zhwiki> for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wiliextractor). Convert the dataset to TFRecord format and move the files to a specified path. | |||
| - Download the CLUE dataset from <https://www.cluebenchmarks.com> for fine-tuning and evaluation. | |||
| - Download the zhwiki dataset for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wikiextractor). Convert the dataset to TFRecord format and move the files to a specified path. | |||
| - Download the CLUE dataset for fine-tuning and evaluation. | |||
| > Notes: | |||
| If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file. | |||
| @@ -10,7 +10,7 @@ This is the simple tutorial for training AlexNet in MindSpore. | |||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| - Download the CIFAR-10 dataset at <http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz>. The directory structure is as follows: | |||
| - Download the CIFAR-10 dataset, the directory structure is as follows: | |||
| ``` | |||
| ├─cifar-10-batches-bin | |||
| @@ -10,7 +10,7 @@ This is the simple and basic tutorial for constructing a network in MindSpore. | |||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| - Download the MNIST dataset at <http://yann.lecun.com/exdb/mnist/>. The directory structure is as follows: | |||
| - Download the MNIST dataset, the directory structure is as follows: | |||
| ``` | |||
| └─MNIST_Data | |||
| @@ -0,0 +1,101 @@ | |||
| # MobileNetV2 Example | |||
| ## Description | |||
| This is an example of training MobileNetV2 with ImageNet2012 dataset in MindSpore. | |||
| ## Requirements | |||
| * Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| * Download the dataset [ImageNet2012]. | |||
| > Unzip the ImageNet2012 dataset to any path you want and the folder structure should be as follows: | |||
| > ``` | |||
| > . | |||
| > ├── train # train dataset | |||
| > └── val # infer dataset | |||
| > ``` | |||
| ## Example structure | |||
| ``` shell | |||
| . | |||
| ├── config.py # parameter configuration | |||
| ├── dataset.py # data preprocessing | |||
| ├── eval.py # infer script | |||
| ├── launch.py # launcher for distributed training | |||
| ├── lr_generator.py # generate learning rate for each step | |||
| ├── run_infer.sh # launch infering | |||
| ├── run_train.sh # launch training | |||
| └── train.py # train script | |||
| ``` | |||
| ## Parameter configuration | |||
| Parameters for both training and inference can be set in 'config.py'. | |||
| ``` | |||
| "num_classes": 1000, # dataset class num | |||
| "image_height": 224, # image height | |||
| "image_width": 224, # image width | |||
| "batch_size": 256, # training or infering batch size | |||
| "epoch_size": 200, # total training epochs, including warmup_epochs | |||
| "warmup_epochs": 4, # warmup epochs | |||
| "lr": 0.4, # base learning rate | |||
| "momentum": 0.9, # momentum | |||
| "weight_decay": 4e-5, # weight decay | |||
| "loss_scale": 1024, # loss scale | |||
| "save_checkpoint": True, # whether save checkpoint | |||
| "save_checkpoint_epochs": 1, # the epoch interval between two checkpoints | |||
| "keep_checkpoint_max": 200, # only keep the last keep_checkpoint_max checkpoint | |||
| "save_checkpoint_path": "./checkpoint" # path to save checkpoint | |||
| ``` | |||
| ## Running the example | |||
| ### Train | |||
| #### Usage | |||
| Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] | |||
| #### Launch | |||
| ``` | |||
| # training example | |||
| sh run_train.sh 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet | |||
| ``` | |||
| #### Result | |||
| Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log will be redirected to `./train/train.log` like followings. | |||
| ``` | |||
| epoch: [ 0/200], step:[ 624/ 625], loss:[5.258/5.258], time:[140412.236], lr:[0.100] | |||
| epoch time: 140522.500, per step time: 224.836, avg loss: 5.258 | |||
| epoch: [ 1/200], step:[ 624/ 625], loss:[3.917/3.917], time:[138221.250], lr:[0.200] | |||
| epoch time: 138331.250, per step time: 221.330, avg loss: 3.917 | |||
| ``` | |||
| ### Infer | |||
| #### Usage | |||
| Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH] | |||
| #### Launch | |||
| ``` | |||
| # infer example | |||
| sh run_infer.sh ~/imagenet ~/train/mobilenet-200_625.ckpt | |||
| ``` | |||
| > checkpoint can be produced in training process. | |||
| #### Result | |||
| Inference result will be stored in the example path, you can find result like the followings in `val.log`. | |||
| ``` | |||
| result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt | |||
| ``` | |||
| @@ -0,0 +1,35 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| network config setting, will be used in train.py and eval.py | |||
| """ | |||
| from easydict import EasyDict as ed | |||
| config = ed({ | |||
| "num_classes": 1000, | |||
| "image_height": 224, | |||
| "image_width": 224, | |||
| "batch_size": 256, | |||
| "epoch_size": 200, | |||
| "warmup_epochs": 4, | |||
| "lr": 0.4, | |||
| "momentum": 0.9, | |||
| "weight_decay": 4e-5, | |||
| "loss_scale": 1024, | |||
| "save_checkpoint": True, | |||
| "save_checkpoint_epochs": 1, | |||
| "keep_checkpoint_max": 200, | |||
| "save_checkpoint_path": "./checkpoint", | |||
| }) | |||
| @@ -0,0 +1,84 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| create train or eval dataset. | |||
| """ | |||
| import os | |||
| import mindspore.common.dtype as mstype | |||
| import mindspore.dataset.engine as de | |||
| import mindspore.dataset.transforms.vision.c_transforms as C | |||
| import mindspore.dataset.transforms.c_transforms as C2 | |||
| from config import config | |||
| def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): | |||
| """ | |||
| create a train or eval dataset | |||
| Args: | |||
| dataset_path(string): the path of dataset. | |||
| do_train(bool): whether dataset is used for train or eval. | |||
| repeat_num(int): the repeat times of dataset. Default: 1 | |||
| batch_size(int): the batch size of dataset. Default: 32 | |||
| Returns: | |||
| dataset | |||
| """ | |||
| rank_size = int(os.getenv("RANK_SIZE")) | |||
| rank_id = int(os.getenv("RANK_ID")) | |||
| if rank_size == 1: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True, | |||
| num_shards=rank_size, shard_id=rank_id) | |||
| resize_height = config.image_height | |||
| resize_width = config.image_width | |||
| rescale = 1.0 / 255.0 | |||
| shift = 0.0 | |||
| buffer_size = 1000 | |||
| # define map operations | |||
| decode_op = C.Decode() | |||
| resize_crop_op = C.RandomResizedCrop(resize_height, scale=(0.2, 1.0)) | |||
| horizontal_flip_op = C.RandomHorizontalFlip() | |||
| resize_op = C.Resize((256, 256)) | |||
| center_crop = C.CenterCrop(resize_width) | |||
| rescale_op = C.Rescale(rescale, shift) | |||
| normalize_op = C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | |||
| change_swap_op = C.HWC2CHW() | |||
| if do_train: | |||
| trans = [decode_op, resize_crop_op, horizontal_flip_op, rescale_op, normalize_op, change_swap_op] | |||
| else: | |||
| trans = [decode_op, resize_op, center_crop, rescale_op, normalize_op, change_swap_op] | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op) | |||
| # apply shuffle operations | |||
| ds = ds.shuffle(buffer_size=buffer_size) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| # apply dataset repeat operation | |||
| ds = ds.repeat(repeat_num) | |||
| return ds | |||
| @@ -0,0 +1,56 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| eval. | |||
| """ | |||
| import os | |||
| import argparse | |||
| from dataset import create_dataset | |||
| from config import config | |||
| from mindspore import context | |||
| from mindspore.model_zoo.mobilenet import mobilenet_v2 | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits | |||
| parser = argparse.ArgumentParser(description='Image classification') | |||
| parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') | |||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||
| args_opt = parser.parse_args() | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False) | |||
| context.set_context(enable_task_sink=True) | |||
| context.set_context(enable_loop_sink=True) | |||
| context.set_context(enable_mem_reuse=True) | |||
| if __name__ == '__main__': | |||
| context.set_context(enable_hccl=False) | |||
| loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') | |||
| net = mobilenet_v2() | |||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size) | |||
| step_size = dataset.get_dataset_size() | |||
| if args_opt.checkpoint_path: | |||
| param_dict = load_checkpoint(args_opt.checkpoint_path) | |||
| load_param_into_net(net, param_dict) | |||
| net.set_train(False) | |||
| model = Model(net, loss_fn=loss, metrics={'acc'}) | |||
| res = model.eval(dataset) | |||
| print("result:", res, "ckpt=", args_opt.checkpoint_path) | |||
| @@ -0,0 +1,143 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """launch train script""" | |||
| import os | |||
| import sys | |||
| import json | |||
| from argparse import ArgumentParser | |||
| def parse_args(): | |||
| """ | |||
| parse args . | |||
| Args: | |||
| Returns: | |||
| args. | |||
| Examples: | |||
| >>> parse_args() | |||
| """ | |||
| parser = ArgumentParser(description="mindspore distributed training launch " | |||
| "helper utilty that will spawn up " | |||
| "multiple distributed processes") | |||
| parser.add_argument("--nproc_per_node", type=int, default=1, | |||
| help="The number of processes to launch on each node, " | |||
| "for D training, this is recommended to be set " | |||
| "to the number of D in your system so that " | |||
| "each process can be bound to a single D.") | |||
| parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7", | |||
| help="will use the visible devices sequentially") | |||
| parser.add_argument("--server_id", type=str, default="", | |||
| help="server ip") | |||
| parser.add_argument("--training_script", type=str, | |||
| help="The full path to the single D training " | |||
| "program/script to be launched in parallel, " | |||
| "followed by all the arguments for the " | |||
| "training script") | |||
| # rest from the training program | |||
| args, unknown = parser.parse_known_args() | |||
| args.training_script_args = unknown | |||
| return args | |||
| def main(): | |||
| print("start", __file__) | |||
| args = parse_args() | |||
| print(args) | |||
| visible_devices = args.visible_devices.split(',') | |||
| assert os.path.isfile(args.training_script) | |||
| assert len(visible_devices) >= args.nproc_per_node | |||
| print('visible_devices:{}'.format(visible_devices)) | |||
| if not args.server_id: | |||
| print('pleaser input server ip!!!') | |||
| exit(0) | |||
| print('server_id:{}'.format(args.server_id)) | |||
| # construct hccn_table | |||
| hccn_configs = open('/etc/hccn.conf', 'r').readlines() | |||
| device_ips = {} | |||
| for hccn_item in hccn_configs: | |||
| hccn_item = hccn_item.strip() | |||
| if hccn_item.startswith('address_'): | |||
| device_id, device_ip = hccn_item.split('=') | |||
| device_id = device_id.split('_')[1] | |||
| device_ips[device_id] = device_ip | |||
| print('device_id:{}, device_ip:{}'.format(device_id, device_ip)) | |||
| hccn_table = {} | |||
| hccn_table['board_id'] = '0x0000' | |||
| hccn_table['chip_info'] = '910' | |||
| hccn_table['deploy_mode'] = 'lab' | |||
| hccn_table['group_count'] = '1' | |||
| hccn_table['group_list'] = [] | |||
| instance_list = [] | |||
| usable_dev = '' | |||
| for instance_id in range(args.nproc_per_node): | |||
| instance = {} | |||
| instance['devices'] = [] | |||
| device_id = visible_devices[instance_id] | |||
| device_ip = device_ips[device_id] | |||
| usable_dev += str(device_id) | |||
| instance['devices'].append({ | |||
| 'device_id': device_id, | |||
| 'device_ip': device_ip, | |||
| }) | |||
| instance['rank_id'] = str(instance_id) | |||
| instance['server_id'] = args.server_id | |||
| instance_list.append(instance) | |||
| hccn_table['group_list'].append({ | |||
| 'device_num': str(args.nproc_per_node), | |||
| 'server_num': '1', | |||
| 'group_name': '', | |||
| 'instance_count': str(args.nproc_per_node), | |||
| 'instance_list': instance_list, | |||
| }) | |||
| hccn_table['para_plane_nic_location'] = 'device' | |||
| hccn_table['para_plane_nic_name'] = [] | |||
| for instance_id in range(args.nproc_per_node): | |||
| eth_id = visible_devices[instance_id] | |||
| hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id)) | |||
| hccn_table['para_plane_nic_num'] = str(args.nproc_per_node) | |||
| hccn_table['status'] = 'completed' | |||
| # save hccn_table to file | |||
| table_path = os.getcwd() | |||
| if not os.path.exists(table_path): | |||
| os.mkdir(table_path) | |||
| table_fn = os.path.join(table_path, | |||
| 'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id)) | |||
| with open(table_fn, 'w') as table_fp: | |||
| json.dump(hccn_table, table_fp, indent=4) | |||
| sys.stdout.flush() | |||
| # spawn the processes | |||
| for rank_id in range(0, args.nproc_per_node): | |||
| device_id = visible_devices[rank_id] | |||
| device_dir = os.path.join(os.getcwd(), 'device{}'.format(rank_id)) | |||
| rank_process = 'export RANK_SIZE={} && export RANK_ID={} && export DEVICE_ID={} && '.format(args.nproc_per_node, | |||
| rank_id, device_id) | |||
| if args.nproc_per_node > 1: | |||
| rank_process += 'export MINDSPORE_HCCL_CONFIG_PATH={} && '.format(table_fn) | |||
| rank_process += 'export RANK_TABLE_FILE={} && '.format(table_fn) | |||
| rank_process += 'rm -rf {dir} && mkdir {dir} && cd {dir} && python {script} '.format(dir=device_dir, | |||
| script=args.training_script | |||
| ) | |||
| rank_process += ' '.join(args.training_script_args) + ' > log{}.log 2>&1 &'.format(rank_id) | |||
| os.system(rank_process) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,54 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """learning rate generator""" | |||
| import math | |||
| import numpy as np | |||
| def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch): | |||
| """ | |||
| generate learning rate array | |||
| Args: | |||
| global_step(int): total steps of the training | |||
| lr_init(float): init learning rate | |||
| lr_end(float): end learning rate | |||
| lr_max(float): max learning rate | |||
| warmup_epochs(int): number of warmup epochs | |||
| total_epochs(int): total epoch of training | |||
| steps_per_epoch(int): steps of one epoch | |||
| Returns: | |||
| np.array, learning rate array | |||
| """ | |||
| lr_each_step = [] | |||
| total_steps = steps_per_epoch * total_epochs | |||
| warmup_steps = steps_per_epoch * warmup_epochs | |||
| for i in range(total_steps): | |||
| if i < warmup_steps: | |||
| lr = lr_init + (lr_max - lr_init) * i / warmup_steps | |||
| else: | |||
| lr = lr_end + \ | |||
| (lr_max - lr_end) * \ | |||
| (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2. | |||
| if lr < 0.0: | |||
| lr = 0.0 | |||
| lr_each_step.append(lr) | |||
| current_step = global_step | |||
| lr_each_step = np.array(lr_each_step).astype(np.float32) | |||
| learning_rate = lr_each_step[current_step:] | |||
| return learning_rate | |||
| @@ -0,0 +1,33 @@ | |||
| #!/usr/bin/env bash | |||
| if [ $# != 2 ] | |||
| then | |||
| echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]" | |||
| exit 1 | |||
| fi | |||
| if [ ! -d $1 ] | |||
| then | |||
| echo "error: DATASET_PATH=$1 is not a directory" | |||
| exit 1 | |||
| fi | |||
| if [ ! -f $2 ] | |||
| then | |||
| echo "error: CHECKPOINT_PATH=$2 is not a file" | |||
| exit 1 | |||
| fi | |||
| BASEPATH=$(cd "`dirname $0`" || exit; pwd) | |||
| export PYTHONPATH=${BASEPATH}:$PYTHONPATH | |||
| export DEVICE_ID=0 | |||
| export RANK_ID=0 | |||
| export RANK_SIZE=1 | |||
| if [ -d "eval" ]; | |||
| then | |||
| rm -rf ./eval | |||
| fi | |||
| mkdir ./eval | |||
| cd ./eval || exit | |||
| python ${BASEPATH}/eval.py \ | |||
| --checkpoint_path=$2 \ | |||
| --dataset_path=$1 &> infer.log & # dataset val folder path | |||
| @@ -0,0 +1,33 @@ | |||
| #!/usr/bin/env bash | |||
| if [ $# != 4 ] | |||
| then | |||
| echo "Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]" | |||
| exit 1 | |||
| fi | |||
| if [ $1 -lt 1 ] && [ $1 -gt 8 ] | |||
| then | |||
| echo "error: DEVICE_NUM=$1 is not in (1-8)" | |||
| exit 1 | |||
| fi | |||
| if [ ! -d $4 ] | |||
| then | |||
| echo "error: DATASET_PATH=$4 is not a directory" | |||
| exit 1 | |||
| fi | |||
| BASEPATH=$(cd "`dirname $0`" || exit; pwd) | |||
| export PYTHONPATH=${BASEPATH}:$PYTHONPATH | |||
| if [ -d "train" ]; | |||
| then | |||
| rm -rf ./train | |||
| fi | |||
| mkdir ./train | |||
| cd ./train || exit | |||
| python ${BASEPATH}/launch.py \ | |||
| --nproc_per_node=$1 \ | |||
| --visible_devices=$3 \ | |||
| --server_id=$2 \ | |||
| --training_script=${BASEPATH}/train.py \ | |||
| --dataset_path=$4 &> train.log & # dataset train folder | |||
| @@ -0,0 +1,148 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """train_imagenet.""" | |||
| import os | |||
| import time | |||
| import argparse | |||
| import random | |||
| import numpy as np | |||
| from dataset import create_dataset | |||
| from lr_generator import get_lr | |||
| from config import config | |||
| from mindspore import context | |||
| from mindspore import Tensor | |||
| from mindspore.model_zoo.mobilenet import mobilenet_v2 | |||
| from mindspore.parallel._auto_parallel_context import auto_parallel_context | |||
| from mindspore.nn.optim.momentum import Momentum | |||
| from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits | |||
| from mindspore.train.model import Model, ParallelMode | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback | |||
| from mindspore.train.loss_scale_manager import FixedLossScaleManager | |||
| import mindspore.dataset.engine as de | |||
| from mindspore.communication.management import init | |||
| random.seed(1) | |||
| np.random.seed(1) | |||
| de.config.set_seed(1) | |||
| parser = argparse.ArgumentParser(description='Image classification') | |||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||
| args_opt = parser.parse_args() | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| rank_id = int(os.getenv('RANK_ID')) | |||
| rank_size = int(os.getenv('RANK_SIZE')) | |||
| run_distribute = rank_size > 1 | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False) | |||
| context.set_context(enable_task_sink=True) | |||
| context.set_context(enable_loop_sink=True) | |||
| context.set_context(enable_mem_reuse=True) | |||
| class Monitor(Callback): | |||
| """ | |||
| Monitor loss and time. | |||
| Args: | |||
| lr_init (numpy array): train lr | |||
| Returns: | |||
| None. | |||
| Examples: | |||
| >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy()) | |||
| """ | |||
| def __init__(self, lr_init=None): | |||
| super(Monitor, self).__init__() | |||
| self.lr_init = lr_init | |||
| self.lr_init_len = len(lr_init) | |||
| def epoch_begin(self, run_context): | |||
| self.losses = [] | |||
| self.epoch_time = time.time() | |||
| def epoch_end(self, run_context): | |||
| cb_params = run_context.original_args() | |||
| epoch_mseconds = (time.time() - self.epoch_time) * 1000 | |||
| per_step_mseconds = epoch_mseconds / cb_params.batch_num | |||
| print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds, | |||
| per_step_mseconds, | |||
| np.mean(self.losses) | |||
| ), flush=True) | |||
| def step_begin(self, run_context): | |||
| self.step_time = time.time() | |||
| def step_end(self, run_context): | |||
| cb_params = run_context.original_args() | |||
| step_mseconds = (time.time() - self.step_time) * 1000 | |||
| step_loss = cb_params.net_outputs | |||
| if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor): | |||
| step_loss = step_loss[0] | |||
| if isinstance(step_loss, Tensor): | |||
| step_loss = np.mean(step_loss.asnumpy()) | |||
| self.losses.append(step_loss) | |||
| cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num | |||
| print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format( | |||
| cb_params.cur_epoch_num - 1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss, | |||
| np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]), flush=True) | |||
| if __name__ == '__main__': | |||
| if run_distribute: | |||
| context.set_context(enable_hccl=True) | |||
| context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL, | |||
| parameter_broadcast=True, mirror_mean=True) | |||
| auto_parallel_context().set_all_reduce_fusion_split_indices([140]) | |||
| init() | |||
| else: | |||
| context.set_context(enable_hccl=False) | |||
| epoch_size = config.epoch_size | |||
| net = mobilenet_v2(num_classes=config.num_classes) | |||
| loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') | |||
| print("train args: ", args_opt, "\ncfg: ", config, | |||
| "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size)) | |||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, | |||
| repeat_num=epoch_size, batch_size=config.batch_size) | |||
| step_size = dataset.get_dataset_size() | |||
| loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) | |||
| lr = Tensor(get_lr(global_step=0, lr_init=0, lr_end=0, lr_max=config.lr, | |||
| warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size)) | |||
| opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, | |||
| config.weight_decay, config.loss_scale) | |||
| model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale) | |||
| cb = None | |||
| if rank_id == 0: | |||
| cb = [Monitor(lr_init=lr.asnumpy())] | |||
| if config.save_checkpoint: | |||
| config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * step_size, | |||
| keep_checkpoint_max=config.keep_checkpoint_max) | |||
| ckpt_cb = ModelCheckpoint(prefix="mobilenet", directory=config.save_checkpoint_path, config=config_ck) | |||
| cb += [ckpt_cb] | |||
| model.train(epoch_size, dataset, callbacks=cb) | |||
| @@ -0,0 +1,135 @@ | |||
| # ResNet101 Example | |||
| ## Description | |||
| This is an example of training ResNet101 with ImageNet dataset in MindSpore. | |||
| ## Requirements | |||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| - Download the dataset ImageNet2012. | |||
| > Unzip the ImageNet2012 dataset to any path you want, the folder should include train and eval dataset as follows: | |||
| ``` | |||
| . | |||
| └─dataset | |||
| ├─ilsvrc | |||
| │ | |||
| └─validation_preprocess | |||
| ``` | |||
| ## Example structure | |||
| ```shell | |||
| . | |||
| ├── crossentropy.py # CrossEntropy loss function | |||
| ├── config.py # parameter configuration | |||
| ├── dataset.py # data preprocessing | |||
| ├── eval.py # eval net | |||
| ├── lr_generator.py # generate learning rate | |||
| ├── run_distribute_train.sh # launch distributed training(8p) | |||
| ├── run_infer.sh # launch evaluating | |||
| ├── run_standalone_train.sh # launch standalone training(1p) | |||
| └── train.py # train net | |||
| ``` | |||
| ## Parameter configuration | |||
| Parameters for both training and evaluating can be set in config.py. | |||
| ``` | |||
| "class_num": 1001, # dataset class number | |||
| "batch_size": 32, # batch size of input tensor | |||
| "loss_scale": 1024, # loss scale | |||
| "momentum": 0.9, # momentum optimizer | |||
| "weight_decay": 1e-4, # weight decay | |||
| "epoch_size": 120, # epoch sizes for training | |||
| "buffer_size": 1000, # number of queue size in data preprocessing | |||
| "image_height": 224, # image height | |||
| "image_width": 224, # image width | |||
| "save_checkpoint": True, # whether save checkpoint or not | |||
| "save_checkpoint_steps": 500, # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step | |||
| "keep_checkpoint_max": 10, # only keep the last keep_checkpoint_max checkpoint | |||
| "save_checkpoint_path": "./", # path to save checkpoint relative to the executed path | |||
| "warmup_epochs": 0, # number of warmup epoch | |||
| "lr_decay_mode": "cosine" # decay mode for generating learning rate | |||
| "label_smooth": 1, # label_smooth | |||
| "label_smooth_factor": 0.1, # label_smooth_factor | |||
| "lr": 0.1 # base learning rate | |||
| ``` | |||
| ## Running the example | |||
| ### Train | |||
| #### Usage | |||
| ``` | |||
| # distributed training | |||
| sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH] | |||
| # standalone training | |||
| sh run_standalone_train.sh [DATASET_PATH] | |||
| ``` | |||
| #### Launch | |||
| ```bash | |||
| # distributed training example(8p) | |||
| sh run_distribute_train.sh rank_table_8p.json dataset/ilsvrc | |||
| # standalone training example(1p) | |||
| sh run_standalone_train.sh dataset/ilsvrc | |||
| ``` | |||
| > About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). | |||
| #### Result | |||
| Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". You can find checkpoint file together with result like the followings in log. | |||
| ``` | |||
| # distribute training result(8p) | |||
| epoch: 1 step: 5004, loss is 4.805483 | |||
| epoch: 2 step: 5004, loss is 3.2121816 | |||
| epoch: 3 step: 5004, loss is 3.429647 | |||
| epoch: 4 step: 5004, loss is 3.3667371 | |||
| epoch: 5 step: 5004, loss is 3.1718972 | |||
| ... | |||
| epoch: 67 step: 5004, loss is 2.2768745 | |||
| epoch: 68 step: 5004, loss is 1.7223864 | |||
| epoch: 69 step: 5004, loss is 2.0665488 | |||
| epoch: 70 step: 5004, loss is 1.8717369 | |||
| ... | |||
| ``` | |||
| ### Infer | |||
| #### Usage | |||
| ``` | |||
| # infer | |||
| sh run_infer.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH] | |||
| ``` | |||
| #### Launch | |||
| ```bash | |||
| # infer with checkpoint | |||
| sh run_infer.sh dataset/validation_preprocess/ train_parallel0/resnet-120_5004.ckpt | |||
| ``` | |||
| > checkpoint can be produced in training process. | |||
| #### Result | |||
| Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log. | |||
| ``` | |||
| result: {'top_5_accuracy': 0.9429417413572343, 'top_1_accuracy': 0.7853513124199744} ckpt=train_parallel0/resnet-120_5004.ckpt | |||
| ``` | |||
| @@ -0,0 +1,39 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| network config setting, will be used in train.py and eval.py | |||
| """ | |||
| from easydict import EasyDict as ed | |||
| config = ed({ | |||
| "class_num": 1001, | |||
| "batch_size": 32, | |||
| "loss_scale": 1024, | |||
| "momentum": 0.9, | |||
| "weight_decay": 1e-4, | |||
| "epoch_size": 120, | |||
| "buffer_size": 1000, | |||
| "image_height": 224, | |||
| "image_width": 224, | |||
| "save_checkpoint": True, | |||
| "save_checkpoint_steps": 500, | |||
| "keep_checkpoint_max": 10, | |||
| "save_checkpoint_path": "./", | |||
| "warmup_epochs": 0, | |||
| "lr_decay_mode": "cosine", | |||
| "label_smooth": 1, | |||
| "label_smooth_factor": 0.1, | |||
| "lr": 0.1 | |||
| }) | |||
| @@ -0,0 +1,36 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """define loss function for network""" | |||
| from mindspore.nn.loss.loss import _Loss | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops import functional as F | |||
| from mindspore import Tensor | |||
| from mindspore.common import dtype as mstype | |||
| import mindspore.nn as nn | |||
| class CrossEntropy(_Loss): | |||
| """the redefined loss function with SoftmaxCrossEntropyWithLogits""" | |||
| def __init__(self, smooth_factor=0., num_classes=1001): | |||
| super(CrossEntropy, self).__init__() | |||
| self.onehot = P.OneHot() | |||
| self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) | |||
| self.off_value = Tensor(1.0 * smooth_factor / (num_classes -1), mstype.float32) | |||
| self.ce = nn.SoftmaxCrossEntropyWithLogits() | |||
| self.mean = P.ReduceMean(False) | |||
| def construct(self, logit, label): | |||
| one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) | |||
| loss = self.ce(logit, one_hot_label) | |||
| loss = self.mean(loss, 0) | |||
| return loss | |||
| @@ -0,0 +1,89 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| create train or eval dataset. | |||
| """ | |||
| import os | |||
| import mindspore.common.dtype as mstype | |||
| import mindspore.dataset.engine as de | |||
| import mindspore.dataset.transforms.vision.c_transforms as C | |||
| import mindspore.dataset.transforms.c_transforms as C2 | |||
| from config import config | |||
| def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): | |||
| """ | |||
| create a train or evaluate dataset | |||
| Args: | |||
| dataset_path(string): the path of dataset. | |||
| do_train(bool): whether dataset is used for train or eval. | |||
| repeat_num(int): the repeat times of dataset. Default: 1 | |||
| batch_size(int): the batch size of dataset. Default: 32 | |||
| Returns: | |||
| dataset | |||
| """ | |||
| device_num = int(os.getenv("RANK_SIZE")) | |||
| rank_id = int(os.getenv("RANK_ID")) | |||
| if device_num == 1: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, | |||
| num_shards=device_num, shard_id=rank_id) | |||
| resize_height = 224 | |||
| rescale = 1.0 / 255.0 | |||
| shift = 0.0 | |||
| # define map operations | |||
| decode_op = C.Decode() | |||
| random_resize_crop_op = C.RandomResizedCrop(resize_height, (0.08, 1.0), (0.75, 1.33), max_attempts=100) | |||
| horizontal_flip_op = C.RandomHorizontalFlip(rank_id / (rank_id + 1)) | |||
| resize_op_256 = C.Resize((256, 256)) | |||
| center_crop = C.CenterCrop(224) | |||
| rescale_op = C.Rescale(rescale, shift) | |||
| normalize_op = C.Normalize((0.475, 0.451, 0.392), (0.275, 0.267, 0.278)) | |||
| changeswap_op = C.HWC2CHW() | |||
| trans = [] | |||
| if do_train: | |||
| trans = [decode_op, | |||
| random_resize_crop_op, | |||
| horizontal_flip_op, | |||
| rescale_op, | |||
| normalize_op, | |||
| changeswap_op] | |||
| else: | |||
| trans = [decode_op, | |||
| resize_op_256, | |||
| center_crop, | |||
| rescale_op, | |||
| normalize_op, | |||
| changeswap_op] | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) | |||
| # apply shuffle operations | |||
| ds = ds.shuffle(buffer_size=config.buffer_size) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| # apply dataset repeat operation | |||
| ds = ds.repeat(repeat_num) | |||
| return ds | |||
| @@ -0,0 +1,84 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| eval. | |||
| """ | |||
| import os | |||
| import argparse | |||
| import random | |||
| import numpy as np | |||
| from dataset import create_dataset | |||
| from config import config | |||
| from mindspore import context | |||
| from mindspore.model_zoo.resnet import resnet101 | |||
| from mindspore.parallel._auto_parallel_context import auto_parallel_context | |||
| from mindspore.train.model import Model, ParallelMode | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| import mindspore.dataset.engine as de | |||
| from mindspore.communication.management import init | |||
| from crossentropy import CrossEntropy | |||
| random.seed(1) | |||
| np.random.seed(1) | |||
| de.config.set_seed(1) | |||
| parser = argparse.ArgumentParser(description='Image classification') | |||
| parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') | |||
| parser.add_argument('--device_num', type=int, default=1, help='Device num.') | |||
| parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.') | |||
| parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.') | |||
| parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') | |||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||
| args_opt = parser.parse_args() | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id) | |||
| context.set_context(enable_task_sink=True) | |||
| context.set_context(enable_loop_sink=True) | |||
| context.set_context(enable_mem_reuse=True) | |||
| if __name__ == '__main__': | |||
| if args_opt.do_eval: | |||
| context.set_context(enable_hccl=False) | |||
| else: | |||
| if args_opt.run_distribute: | |||
| context.set_context(enable_hccl=True) | |||
| context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, | |||
| mirror_mean=True, parameter_broadcast=True) | |||
| auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313]) | |||
| init() | |||
| else: | |||
| context.set_context(enable_hccl=False) | |||
| epoch_size = config.epoch_size | |||
| net = resnet101(class_num=config.class_num) | |||
| if not config.label_smooth: | |||
| config.label_smooth_factor = 0.0 | |||
| loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) | |||
| if args_opt.do_eval: | |||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size) | |||
| step_size = dataset.get_dataset_size() | |||
| if args_opt.checkpoint_path: | |||
| param_dict = load_checkpoint(args_opt.checkpoint_path) | |||
| load_param_into_net(net, param_dict) | |||
| net.set_train(False) | |||
| model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) | |||
| res = model.eval(dataset) | |||
| print("result:", res, "ckpt=", args_opt.checkpoint_path) | |||
| @@ -0,0 +1,52 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """learning rate generator""" | |||
| import math | |||
| import numpy as np | |||
| def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr): | |||
| lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps) | |||
| lr = float(init_lr) + lr_inc * current_step | |||
| return lr | |||
| def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch): | |||
| """ | |||
| generate learning rate array with cosine | |||
| Args: | |||
| lr(float): base learning rate | |||
| steps_per_epoch(int): steps size of one epoch | |||
| warmup_epochs(int): number of warmup epochs | |||
| max_epoch(int): total epochs of training | |||
| Returns: | |||
| np.array, learning rate array | |||
| """ | |||
| base_lr = lr | |||
| warmup_init_lr = 0 | |||
| total_steps = int(max_epoch * steps_per_epoch) | |||
| warmup_steps = int(warmup_epochs * steps_per_epoch) | |||
| decay_steps = total_steps - warmup_steps | |||
| lr_each_step = [] | |||
| for i in range(total_steps): | |||
| if i < warmup_steps: | |||
| lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr) | |||
| else: | |||
| linear_decay = (total_steps - i) / decay_steps | |||
| cosine_decay = 0.5 * (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps)) | |||
| decayed = linear_decay * cosine_decay + 0.00001 | |||
| lr = base_lr * decayed | |||
| lr_each_step.append(lr) | |||
| return np.array(lr_each_step).astype(np.float32) | |||
| @@ -0,0 +1,66 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 2 ] | |||
| then | |||
| echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]" | |||
| exit 1 | |||
| fi | |||
| get_real_path(){ | |||
| if [ "${1:0:1}" == "/" ]; then | |||
| echo "$1" | |||
| else | |||
| echo "$(realpath -m $PWD/$1)" | |||
| fi | |||
| } | |||
| PATH1=$(get_real_path $1) | |||
| PATH2=$(get_real_path $2) | |||
| echo $PATH1 | |||
| echo $PATH2 | |||
| if [ ! -f $PATH1 ] | |||
| then | |||
| echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file" | |||
| exit 1 | |||
| fi | |||
| if [ ! -d $PATH2 ] | |||
| then | |||
| echo "error: DATASET_PATH=$PATH2 is not a directory" | |||
| exit 1 | |||
| fi | |||
| ulimit -u unlimited | |||
| export DEVICE_NUM=8 | |||
| export RANK_SIZE=8 | |||
| export MINDSPORE_HCCL_CONFIG_PATH=$PATH1 | |||
| export RANK_TABLE_FILE=$PATH1 | |||
| for((i=0; i<${DEVICE_NUM}; i++)) | |||
| do | |||
| export DEVICE_ID=$i | |||
| export RANK_ID=$i | |||
| rm -rf ./train_parallel$i | |||
| mkdir ./train_parallel$i | |||
| cp *.py ./train_parallel$i | |||
| cp *.sh ./train_parallel$i | |||
| cd ./train_parallel$i || exit | |||
| echo "start training for rank $RANK_ID, device $DEVICE_ID" | |||
| env > env.log | |||
| python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log & | |||
| cd .. | |||
| done | |||
| @@ -0,0 +1,64 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 2 ] | |||
| then | |||
| echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]" | |||
| exit 1 | |||
| fi | |||
| get_real_path(){ | |||
| if [ "${1:0:1}" == "/" ]; then | |||
| echo "$1" | |||
| else | |||
| echo "$(realpath -m $PWD/$1)" | |||
| fi | |||
| } | |||
| PATH1=$(get_real_path $1) | |||
| PATH2=$(get_real_path $2) | |||
| echo $PATH1 | |||
| echo $PATH2 | |||
| if [ ! -d $PATH1 ] | |||
| then | |||
| echo "error: DATASET_PATH=$PATH1 is not a directory" | |||
| exit 1 | |||
| fi | |||
| if [ ! -f $PATH2 ] | |||
| then | |||
| echo "error: CHECKPOINT_PATH=$PATH2 is not a file" | |||
| exit 1 | |||
| fi | |||
| ulimit -u unlimited | |||
| export DEVICE_NUM=1 | |||
| export DEVICE_ID=0 | |||
| export RANK_SIZE=$DEVICE_NUM | |||
| export RANK_ID=0 | |||
| if [ -d "infer" ]; | |||
| then | |||
| rm -rf ./infer | |||
| fi | |||
| mkdir ./infer | |||
| cp *.py ./infer | |||
| cp *.sh ./infer | |||
| cd ./infer || exit | |||
| env > env.log | |||
| echo "start infering for device $DEVICE_ID" | |||
| python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log & | |||
| cd .. | |||
| @@ -0,0 +1,56 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 1 ] | |||
| then | |||
| echo "Usage: sh run_standalone_train.sh [DATASET_PATH]" | |||
| exit 1 | |||
| fi | |||
| get_real_path(){ | |||
| if [ "${1:0:1}" == "/" ]; then | |||
| echo "$1" | |||
| else | |||
| echo "$(realpath -m $PWD/$1)" | |||
| fi | |||
| } | |||
| PATH1=$(get_real_path $1) | |||
| echo $PATH1 | |||
| if [ ! -d $PATH1 ] | |||
| then | |||
| echo "error: DATASET_PATH=$PATH1 is not a directory" | |||
| exit 1 | |||
| fi | |||
| ulimit -u unlimited | |||
| export DEVICE_NUM=1 | |||
| export DEVICE_ID=0 | |||
| export RANK_ID=0 | |||
| export RANK_SIZE=1 | |||
| if [ -d "train" ]; | |||
| then | |||
| rm -rf ./train | |||
| fi | |||
| mkdir ./train | |||
| cp *.py ./train | |||
| cp *.sh ./train | |||
| cd ./train || exit | |||
| echo "start training for device $DEVICE_ID" | |||
| env > env.log | |||
| python train.py --do_train=True --dataset_path=$PATH1 &> log & | |||
| cd .. | |||
| @@ -0,0 +1,103 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """train_imagenet.""" | |||
| import os | |||
| import argparse | |||
| import random | |||
| import numpy as np | |||
| from dataset import create_dataset | |||
| from lr_generator import warmup_cosine_annealing_lr | |||
| from config import config | |||
| from mindspore import context | |||
| from mindspore import Tensor | |||
| from mindspore.model_zoo.resnet import resnet101 | |||
| from mindspore.parallel._auto_parallel_context import auto_parallel_context | |||
| from mindspore.nn.optim.momentum import Momentum | |||
| from mindspore.train.model import Model, ParallelMode | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor | |||
| from mindspore.train.loss_scale_manager import FixedLossScaleManager | |||
| import mindspore.dataset.engine as de | |||
| from mindspore.communication.management import init | |||
| import mindspore.nn as nn | |||
| import mindspore.common.initializer as weight_init | |||
| from crossentropy import CrossEntropy | |||
| random.seed(1) | |||
| np.random.seed(1) | |||
| de.config.set_seed(1) | |||
| parser = argparse.ArgumentParser(description='Image classification') | |||
| parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') | |||
| parser.add_argument('--device_num', type=int, default=1, help='Device num.') | |||
| parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.') | |||
| parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.') | |||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||
| args_opt = parser.parse_args() | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id) | |||
| context.set_context(enable_task_sink=True) | |||
| context.set_context(enable_loop_sink=True) | |||
| context.set_context(enable_mem_reuse=True) | |||
| if __name__ == '__main__': | |||
| if args_opt.do_eval: | |||
| context.set_context(enable_hccl=False) | |||
| else: | |||
| if args_opt.run_distribute: | |||
| context.set_context(enable_hccl=True) | |||
| context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, | |||
| mirror_mean=True, parameter_broadcast=True) | |||
| auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313]) | |||
| init() | |||
| else: | |||
| context.set_context(enable_hccl=False) | |||
| epoch_size = config.epoch_size | |||
| net = resnet101(class_num=config.class_num) | |||
| # weight init | |||
| for _, cell in net.cells_and_names(): | |||
| if isinstance(cell, nn.Conv2d): | |||
| cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(), | |||
| cell.weight.default_input.shape(), | |||
| cell.weight.default_input.dtype()) | |||
| if isinstance(cell, nn.Dense): | |||
| cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(), | |||
| cell.weight.default_input.shape(), | |||
| cell.weight.default_input.dtype()) | |||
| if not config.label_smooth: | |||
| config.label_smooth_factor = 0.0 | |||
| loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) | |||
| if args_opt.do_train: | |||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, | |||
| repeat_num=epoch_size, batch_size=config.batch_size) | |||
| step_size = dataset.get_dataset_size() | |||
| loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) | |||
| # learning rate strategy with cosine | |||
| lr = Tensor(warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size)) | |||
| opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, | |||
| config.weight_decay, config.loss_scale) | |||
| model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) | |||
| time_cb = TimeMonitor(data_size=step_size) | |||
| loss_cb = LossMonitor() | |||
| cb = [time_cb, loss_cb] | |||
| if config.save_checkpoint: | |||
| config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps, | |||
| keep_checkpoint_max=config.keep_checkpoint_max) | |||
| ckpt_cb = ModelCheckpoint(prefix="resnet", directory=config.save_checkpoint_path, config=config_ck) | |||
| cb += [ckpt_cb] | |||
| model.train(epoch_size, dataset, callbacks=cb) | |||
| @@ -8,7 +8,7 @@ This is an example of training ResNet-50 with CIFAR-10 dataset in MindSpore. | |||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| - Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz). | |||
| - Download the dataset CIFAR-10. | |||
| > Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows: | |||
| > ``` | |||
| @@ -40,9 +40,9 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): | |||
| rank_id = int(os.getenv("RANK_ID")) | |||
| if device_num == 1: | |||
| ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=4, shuffle=True) | |||
| ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True) | |||
| else: | |||
| ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=4, shuffle=True, | |||
| ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True, | |||
| num_shards=device_num, shard_id=rank_id) | |||
| resize_height = config.image_height | |||
| @@ -68,11 +68,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op) | |||
| ds = ds.map(input_columns="image", operations=trans) | |||
| # apply shuffle operations | |||
| ds = ds.shuffle(buffer_size=config.buffer_size) | |||
| ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) | |||
| ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -22,7 +22,7 @@ fi | |||
| if [ ! -f $1 ] | |||
| then | |||
| echo "error: DMINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" | |||
| echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" | |||
| exit 1 | |||
| fi | |||
| @@ -36,6 +36,7 @@ ulimit -u unlimited | |||
| export DEVICE_NUM=8 | |||
| export RANK_SIZE=8 | |||
| export MINDSPORE_HCCL_CONFIG_PATH=$1 | |||
| export RANK_TABLE_FILE=$1 | |||
| for((i=0; i<${DEVICE_NUM}; i++)) | |||
| do | |||
| @@ -61,14 +61,14 @@ if __name__ == '__main__': | |||
| context.set_context(enable_hccl=True) | |||
| context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, | |||
| mirror_mean=True) | |||
| auto_parallel_context().set_all_reduce_fusion_split_indices([140]) | |||
| auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160]) | |||
| init() | |||
| else: | |||
| context.set_context(enable_hccl=False) | |||
| epoch_size = config.epoch_size | |||
| net = resnet50(class_num=config.class_num) | |||
| loss = SoftmaxCrossEntropyWithLogits(sparse=True) | |||
| loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") | |||
| if args_opt.do_train: | |||
| @@ -8,7 +8,7 @@ This example is for VGG16 model training and evaluation. | |||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| - Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz). | |||
| - Download the CIFAR-10 binary version dataset. | |||
| > Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows: | |||
| > ``` | |||
| @@ -49,6 +49,24 @@ You will get the accuracy as following: | |||
| result: {'acc': 0.92} | |||
| ``` | |||
| ### Distribute Training | |||
| ``` | |||
| sh run_distribute_train.sh rank_table.json your_data_path | |||
| ``` | |||
| The above shell script will run distribute training in the background, you can view the results through the file `train_parallel[X]/log`. | |||
| You will get the loss value as following: | |||
| ``` | |||
| # grep "result: " train_parallel*/log | |||
| train_parallel0/log:epoch: 1 step: 97, loss is 1.9060308 | |||
| train_parallel0/log:epcoh: 2 step: 97, loss is 1.6003821 | |||
| ... | |||
| train_parallel1/log:epoch: 1 step: 97, loss is 1.7095519 | |||
| train_parallel1/log:epcoh: 2 step: 97, loss is 1.7133579 | |||
| ... | |||
| ... | |||
| ``` | |||
| > About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). | |||
| ## Usage: | |||
| @@ -75,4 +93,14 @@ parameters/options: | |||
| --data_path the storage path of datasetd | |||
| --device_id the device which used to evaluate model. | |||
| --checkpoint_path the checkpoint file path used to evaluate model. | |||
| ``` | |||
| ``` | |||
| ### Distribute Training | |||
| ``` | |||
| Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH] | |||
| parameters/options: | |||
| MINDSPORE_HCCL_CONFIG_PATH HCCL configuration file path. | |||
| DATA_PATH the storage path of dataset. | |||
| ``` | |||
| @@ -28,7 +28,11 @@ def create_dataset(data_home, repeat_num=1, training=True): | |||
| data_dir = os.path.join(data_home, "cifar-10-batches-bin") | |||
| if not training: | |||
| data_dir = os.path.join(data_home, "cifar-10-verify-bin") | |||
| data_set = ds.Cifar10Dataset(data_dir) | |||
| rank_size = int(os.environ.get("RANK_SIZE")) if os.environ.get("RANK_SIZE") else None | |||
| rank_id = int(os.environ.get("RANK_ID")) if os.environ.get("RANK_ID") else None | |||
| data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id) | |||
| resize_height = cfg.image_height | |||
| resize_width = cfg.image_width | |||
| rescale = 1.0 / 255.0 | |||
| @@ -0,0 +1,54 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 2 ] | |||
| then | |||
| echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]" | |||
| exit 1 | |||
| fi | |||
| if [ ! -f $1 ] | |||
| then | |||
| echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" | |||
| exit 1 | |||
| fi | |||
| if [ ! -d $2 ] | |||
| then | |||
| echo "error: DATA_PATH=$2 is not a directory" | |||
| exit 1 | |||
| fi | |||
| ulimit -u unlimited | |||
| export DEVICE_NUM=8 | |||
| export RANK_SIZE=8 | |||
| export MINDSPORE_HCCL_CONFIG_PATH=$1 | |||
| export RANK_TABLE_FILE=$1 | |||
| for((i=0; i<${DEVICE_NUM}; i++)) | |||
| do | |||
| export DEVICE_ID=$i | |||
| export RANK_ID=$i | |||
| rm -rf ./train_parallel$i | |||
| mkdir ./train_parallel$i | |||
| cp *.py ./train_parallel$i | |||
| cp *.sh ./train_parallel$i | |||
| cd ./train_parallel$i || exit | |||
| echo "start training for rank $RANK_ID, device $DEVICE_ID" | |||
| env > env.log | |||
| python train.py --data_path=$2 --device_id=$i &> log & | |||
| cd .. | |||
| done | |||
| @@ -17,16 +17,18 @@ | |||
| python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID | |||
| """ | |||
| import argparse | |||
| import os | |||
| import random | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.communication.management import init | |||
| from mindspore.nn.optim.momentum import Momentum | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.model import Model, ParallelMode | |||
| from mindspore import context | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor | |||
| from mindspore.model_zoo.vgg import vgg16 | |||
| import dataset | |||
| from dataset import create_dataset | |||
| from config import cifar_cfg as cfg | |||
| random.seed(1) | |||
| np.random.seed(1) | |||
| @@ -62,17 +64,30 @@ if __name__ == '__main__': | |||
| context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) | |||
| context.set_context(device_id=args_opt.device_id) | |||
| context.set_context(enable_task_sink=True) | |||
| context.set_context(enable_loop_sink=True) | |||
| context.set_context(enable_mem_reuse=True, enable_hccl=False) | |||
| device_num = int(os.environ.get("DEVICE_NUM", 1)) | |||
| if device_num > 1: | |||
| context.reset_auto_parallel_context() | |||
| context.set_context(enable_hccl=True) | |||
| context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, | |||
| mirror_mean=True) | |||
| init() | |||
| dataset = create_dataset(args_opt.data_path, cfg.epoch_size) | |||
| batch_num = dataset.get_dataset_size() | |||
| net = vgg16(num_classes=cfg.num_classes) | |||
| lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=50000 // cfg.batch_size) | |||
| lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num) | |||
| opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay) | |||
| loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) | |||
| model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) | |||
| dataset = dataset.create_dataset(args_opt.data_path, cfg.epoch_size) | |||
| batch_num = dataset.get_dataset_size() | |||
| config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 5, keep_checkpoint_max=cfg.keep_checkpoint_max) | |||
| time_cb = TimeMonitor(data_size=batch_num) | |||
| ckpoint_cb = ModelCheckpoint(prefix="train_vgg_cifar10", directory="./", config=config_ck) | |||
| loss_cb = LossMonitor() | |||
| model.train(cfg.epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb]) | |||
| model.train(cfg.epoch_size, dataset, callbacks=[time_cb, ckpoint_cb, loss_cb]) | |||
| print("train success") | |||
| @@ -0,0 +1,94 @@ | |||
| # YOLOv3 Example | |||
| ## Description | |||
| YOLOv3 network based on ResNet-18, with support for training and evaluation. | |||
| ## Requirements | |||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| - Dataset | |||
| We use coco2017 as training dataset. | |||
| 1. The directory structure is as follows: | |||
| > ``` | |||
| > . | |||
| > ├── annotations # annotation jsons | |||
| > ├── train2017 # train dataset | |||
| > └── val2017 # infer dataset | |||
| > ``` | |||
| 2. Organize the dataset infomation into a TXT file, each row in the file is as follows: | |||
| ``` | |||
| train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2 | |||
| ``` | |||
| Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. `dataset.py` is the parsing script, we read image from an image path joined by the `image_dir`(dataset directory) and the relative path in `anno_path`(the TXT file path), `image_dir` and `anno_path` are external inputs. | |||
| ## Running the Example | |||
| ### Training | |||
| To train the model, run `train.py` with the dataset `image_dir`, `anno_path` and `mindrecord_dir`. If the `mindrecord_dir` is empty, it wil generate [mindrecord](https://www.mindspore.cn/tutorial/en/master/use/data_preparation/converting_datasets.html) file by `image_dir` and `anno_path`(the absolute image path is joined by the `image_dir` and the relative path in `anno_path`). **Note if `mindrecord_dir` isn't empty, it will use `mindrecord_dir` rather than `image_dir` and `anno_path`.** | |||
| - Stand alone mode | |||
| ``` | |||
| sh run_standalone_train.sh 0 50 ./Mindrecord_train ./dataset ./dataset/train.txt | |||
| ``` | |||
| The input variables are device id, epoch size, mindrecord directory path, dataset directory path and train TXT file path. | |||
| - Distributed mode | |||
| ``` | |||
| sh run_distribute_train.sh 8 150 /data/Mindrecord_train /data /data/train.txt /data/hccl.json | |||
| ``` | |||
| The input variables are device numbers, epoch size, mindrecord directory path, dataset directory path, train TXT file path and [hccl json configuration file](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). **It is better to use absolute path.** | |||
| You will get the loss value and time of each step as following: | |||
| ``` | |||
| epoch: 145 step: 156, loss is 12.202981 | |||
| epoch time: 25599.22742843628, per step time: 164.0976117207454 | |||
| epoch: 146 step: 156, loss is 16.91706 | |||
| epoch time: 23199.971675872803, per step time: 148.7177671530308 | |||
| epoch: 147 step: 156, loss is 13.04007 | |||
| epoch time: 23801.95164680481, per step time: 152.57661312054364 | |||
| epoch: 148 step: 156, loss is 10.431475 | |||
| epoch time: 23634.241580963135, per step time: 151.50154859591754 | |||
| epoch: 149 step: 156, loss is 14.665991 | |||
| epoch time: 24118.8325881958, per step time: 154.60790120638333 | |||
| epoch: 150 step: 156, loss is 10.779521 | |||
| epoch time: 25319.57221031189, per step time: 162.30495006610187 | |||
| ``` | |||
| Note the results is two-classification(person and face) used our own annotations with coco2017, you can change `num_classes` in `config.py` to train your dataset. And we will suport 80 classifications in coco2017 the near future. | |||
| ### Evaluation | |||
| To eval, run `eval.py` with the dataset `image_dir`, `anno_path`(eval txt), `mindrecord_dir` and `ckpt_path`. `ckpt_path` is the path of [checkpoint](https://www.mindspore.cn/tutorial/en/master/use/saving_and_loading_model_parameters.html) file. | |||
| ``` | |||
| sh run_eval.sh 0 yolo.ckpt ./Mindrecord_eval ./dataset ./dataset/eval.txt | |||
| ``` | |||
| The input variables are device id, checkpoint path, mindrecord directory path, dataset directory path and train TXT file path. | |||
| You will get the precision and recall value of each class: | |||
| ``` | |||
| class 0 precision is 88.18%, recall is 66.00% | |||
| class 1 precision is 85.34%, recall is 79.13% | |||
| ``` | |||
| Note the precision and recall values are results of two-classification(person and face) used our own annotations with coco2017. | |||
| @@ -13,51 +13,6 @@ | |||
| # limitations under the License. | |||
| """__init__""" | |||
| from __future__ import absolute_import as _abs | |||
| import sys | |||
| import os | |||
| def AKGAddPath(): | |||
| """_akg add path.""" | |||
| pwd = os.path.dirname(os.path.realpath(__file__)) | |||
| tvm_path = os.path.realpath(pwd) | |||
| if tvm_path not in sys.path: | |||
| sys.path.insert(0, tvm_path) | |||
| else: | |||
| sys.path.remove(tvm_path) | |||
| sys.path.insert(0, tvm_path) | |||
| class AKGMetaPathFinder: | |||
| """class AKGMetaPath finder.""" | |||
| def find_module(self, fullname, path=None): | |||
| """method _akg find module.""" | |||
| if fullname.startswith("_akg.tvm"): | |||
| rname = fullname[5:] | |||
| return AKGMetaPathLoader(rname) | |||
| if fullname.startswith("_akg.topi"): | |||
| rname = fullname[5:] | |||
| return AKGMetaPathLoader(rname) | |||
| return None | |||
| class AKGMetaPathLoader: | |||
| """class AKGMetaPathLoader loader.""" | |||
| def __init__(self, rname): | |||
| self.__rname = rname | |||
| def load_module(self, fullname): | |||
| if self.__rname in sys.modules: | |||
| sys.modules.pop(self.__rname) | |||
| AKGAddPath() | |||
| __import__(self.__rname, globals(), locals()) | |||
| self.__target_module = sys.modules[self.__rname] | |||
| sys.modules[fullname] = self.__target_module | |||
| return self.__target_module | |||
| sys.meta_path.insert(0, AKGMetaPathFinder()) | |||
| from . import add_path | |||
| from .op_build import op_build | |||
| from .message import compilewithjson | |||
| @@ -0,0 +1,61 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| """add tvm path""" | |||
| import sys | |||
| import os | |||
| def AKGAddPath(): | |||
| """_akg add path.""" | |||
| pwd = os.path.dirname(os.path.realpath(__file__)) | |||
| tvm_path = os.path.realpath(pwd) | |||
| if tvm_path not in sys.path: | |||
| sys.path.insert(0, tvm_path) | |||
| else: | |||
| sys.path.remove(tvm_path) | |||
| sys.path.insert(0, tvm_path) | |||
| class AKGMetaPathFinder: | |||
| """class AKGMetaPath finder.""" | |||
| def find_module(self, fullname, path=None): | |||
| """method _akg find module.""" | |||
| if fullname.startswith("_akg.tvm"): | |||
| rname = fullname[5:] | |||
| return AKGMetaPathLoader(rname) | |||
| if fullname.startswith("_akg.topi"): | |||
| rname = fullname[5:] | |||
| return AKGMetaPathLoader(rname) | |||
| return None | |||
| class AKGMetaPathLoader: | |||
| """class AKGMetaPathLoader loader.""" | |||
| def __init__(self, rname): | |||
| self.__rname = rname | |||
| def load_module(self, fullname): | |||
| if self.__rname in sys.modules: | |||
| sys.modules.pop(self.__rname) | |||
| AKGAddPath() | |||
| __import__(self.__rname, globals(), locals()) | |||
| self.__target_module = sys.modules[self.__rname] | |||
| sys.modules[fullname] = self.__target_module | |||
| return self.__target_module | |||
| sys.meta_path.insert(0, AKGMetaPathFinder()) | |||
| @@ -122,10 +122,12 @@ def get_args(op_info, arg_type): | |||
| elif arg_type == 'attrs': | |||
| for item in op_info[arg_type]: | |||
| if 'value' not in item: | |||
| raise ValueError("Json string Errors, attr key:value not found.") | |||
| if item["name"] != "isRef": | |||
| args.append(item['value']) | |||
| if item["valid"]: | |||
| if 'value' not in item: | |||
| raise ValueError("Json string Errors, attr key:value not found.") | |||
| if item["name"] != "isRef": | |||
| args.append(item['value']) | |||
| return args | |||
| @@ -91,6 +91,14 @@ void PrintNodeInputType(std::ostringstream &buffer, const AnfNodePtr &nd) { | |||
| } | |||
| } | |||
| void PrintInputAndOutputInferType(std::ostringstream &buffer, const AnfNodePtr &nd) { | |||
| buffer << " : ("; | |||
| PrintNodeInputType(buffer, nd); | |||
| buffer << ") -> ("; | |||
| PrintNodeOutputType(buffer, nd); | |||
| buffer << ")"; | |||
| } | |||
| struct SubGraphIRInfo { | |||
| int32_t local_var; | |||
| std::ostringstream buffer; | |||
| @@ -18,12 +18,14 @@ | |||
| #include <string> | |||
| #include <vector> | |||
| #include "ir/dtype/type.h" | |||
| #include "ir/anf.h" | |||
| namespace mindspore { | |||
| constexpr char PARALLEL_STRATEGY[] = "strategy"; | |||
| void DumpIR(const std::string &filename, const FuncGraphPtr &func_graph, bool dump_full_name = false); | |||
| void PrintInputAndOutputInferType(std::ostringstream &buffer, const AnfNodePtr &nd); | |||
| const std::string ToShortString(const TypeId &typeId); | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_DEBUG_ANF_IR_DUMP_H_ | |||
| @@ -134,7 +134,7 @@ class DebugInfo : public Base { | |||
| explicit DebugInfo(const LocationPtr &loc); | |||
| virtual ~DebugInfo() = default; | |||
| ~DebugInfo() override = default; | |||
| MS_DECLARE_PARENT(DebugInfo, Base); | |||
| int64_t debug_id(); | |||
| int64_t unique_id() const { return unique_id_; } | |||
| @@ -231,10 +231,10 @@ std::string AnalyzedFuncGraphExporter::GetNodeType(const AnfNodePtr &node) { | |||
| auto engine = node_cfg_->engine(); | |||
| auto cfg = engine->MakeConfig(node, ctx); | |||
| auto abs = engine->cache().GetValue(cfg); | |||
| if (abs == nullptr) { | |||
| return "Undefined"; | |||
| } | |||
| auto dtype = abs->BuildType(); | |||
| auto shape = abs->BuildShape(); | |||
| std::ostringstream oss; | |||
| @@ -321,7 +321,7 @@ class TraceTransform : public TraceInfo { | |||
| std::string full_name() override { return full_name_ + transform_name_; } | |||
| MS_DECLARE_PARENT(TraceTransform, TraceInfo); | |||
| virtual std::string symbol() { | |||
| std::string symbol() override { | |||
| if (transform_name_.empty()) { | |||
| return ""; | |||
| } | |||
| @@ -18,14 +18,15 @@ | |||
| #include <string> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <set> | |||
| #include <unordered_map> | |||
| #include <utility> | |||
| #include <map> | |||
| #include "kernel/oplib/oplib.h" | |||
| #include "kernel/kernel_query.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "kernel/kernel_build_info.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "operator/ops.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| @@ -180,6 +181,7 @@ void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, co | |||
| } | |||
| void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector<int> *support_index) { | |||
| MS_EXCEPTION_IF_NULL(support_index); | |||
| int index = kUnSupportMixedDataTypeIndex; | |||
| switch (data_type) { | |||
| case kNumberTypeFloat16: | |||
| @@ -197,6 +199,7 @@ void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector<int> *s | |||
| void AddKernelInputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t input_index, | |||
| std::vector<int> *support_datatype_index, std::vector<TypeId> *support_datatype) { | |||
| MS_EXCEPTION_IF_NULL(support_datatype); | |||
| auto data_type = kernel_build_info.GetInputDeviceType(input_index); | |||
| support_datatype->push_back(data_type); | |||
| AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index); | |||
| @@ -204,6 +207,7 @@ void AddKernelInputSupportDataType(const kernel::KernelBuildInfo &kernel_build_i | |||
| void AddKernelOutputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t output_index, | |||
| std::vector<int> *support_datatype_index, std::vector<TypeId> *support_datatype) { | |||
| MS_EXCEPTION_IF_NULL(support_datatype); | |||
| auto data_type = kernel_build_info.GetOutputDeviceType(output_index); | |||
| support_datatype->push_back(data_type); | |||
| AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index); | |||
| @@ -214,16 +218,7 @@ void AddNodeInputDataType(const CNodePtr &kernel_node, size_t input_index, | |||
| std::vector<TypeId> *node_mix_precision_datatype) { | |||
| AnfNodePtr cur_input = AnfAlgo::GetInputNode(kernel_node, input_index); | |||
| MS_EXCEPTION_IF_NULL(cur_input); | |||
| TypeId input_origin_type; | |||
| if (cur_input->isa<Parameter>() && AnfAlgo::IsParameterWeight(cur_input->cast<ParameterPtr>())) { | |||
| // weight | |||
| input_origin_type = AnfAlgo::GetOutputDeviceDataType(cur_input, 0); | |||
| } else if (cur_input->isa<ValueNode>()) { | |||
| input_origin_type = AnfAlgo::GetOutputDeviceDataType(cur_input, 0); | |||
| } else { | |||
| // feature map | |||
| input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index); | |||
| } | |||
| TypeId input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index); | |||
| AddSupportMixedPrecisionDataTypeIndex(input_origin_type, node_mix_precision_datatype_index); | |||
| node_mix_precision_datatype->push_back(input_origin_type); | |||
| } | |||
| @@ -238,8 +233,8 @@ void AddNodeOutputDataType(const CNodePtr &kernel_node, size_t output_index, | |||
| void CheckDataTypeInputs(const std::vector<int> &node_mix_precision_datatype_index, | |||
| const std::vector<TypeId> &node_mix_precision_datatype, | |||
| const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||
| std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||
| const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||
| std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||
| if (node_mix_precision_datatype_index.size() != node_mix_precision_datatype.size()) { | |||
| MS_LOG(EXCEPTION) << "node datatype index size " << node_mix_precision_datatype_index.size() << " != datatype size " | |||
| << node_mix_precision_datatype.size(); | |||
| @@ -251,10 +246,11 @@ void CheckDataTypeInputs(const std::vector<int> &node_mix_precision_datatype_ind | |||
| } | |||
| } | |||
| int RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index, | |||
| const std::vector<TypeId> &node_mix_precision_datatype, | |||
| const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||
| std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||
| bool RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index, | |||
| const std::vector<TypeId> &node_mix_precision_datatype, | |||
| const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||
| std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||
| MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx); | |||
| CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes, | |||
| kernel_match_datatype_idx); | |||
| for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) { | |||
| @@ -289,40 +285,22 @@ int RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_data | |||
| } | |||
| } | |||
| } | |||
| if (kernel_match_datatype_idx->size() >= 1) { | |||
| return SizeToInt(kernel_match_datatype_idx->begin()->first); | |||
| } | |||
| return -1; | |||
| return !kernel_match_datatype_idx->empty(); | |||
| } | |||
| int GetMinReducePrecisionCountIndex(std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx, | |||
| const std::unordered_map<size_t, size_t> &precision_reduce_count) { | |||
| int selected_index = -1; | |||
| size_t min_reduce_precision_count = kMaxCount; | |||
| auto iter = kernel_match_datatype_idx->begin(); | |||
| while (iter != kernel_match_datatype_idx->end()) { | |||
| auto find_iter = precision_reduce_count.find(iter->first); | |||
| if (find_iter == precision_reduce_count.end()) { | |||
| continue; | |||
| } | |||
| if (min_reduce_precision_count > find_iter->second) { | |||
| selected_index = SizeToInt(iter->first); | |||
| min_reduce_precision_count = find_iter->second; | |||
| } | |||
| ++iter; | |||
| } | |||
| return selected_index; | |||
| bool CanDataTypeReduce(const std::vector<int> &datatype_indexes, int check_index, | |||
| const std::vector<int> &node_mix_precision_datatype_index) { | |||
| return datatype_indexes[check_index] != kUnSupportMixedDataTypeIndex && | |||
| datatype_indexes[check_index] <= node_mix_precision_datatype_index[check_index]; | |||
| } | |||
| int RaiseOrReduceDataTypePrecisionSelect( | |||
| const std::vector<int> &node_mix_precision_datatype_index, const std::vector<TypeId> &node_mix_precision_datatype, | |||
| const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||
| std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||
| bool RaiseOrReduceDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index, | |||
| const std::vector<TypeId> &node_mix_precision_datatype, | |||
| const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||
| std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||
| MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx); | |||
| CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes, | |||
| kernel_match_datatype_idx); | |||
| // reduce / raise | |||
| std::unordered_map<size_t, size_t> precision_reduce_count; | |||
| for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) { | |||
| if (node_mix_precision_datatype[i] == kTypeUnknown) { | |||
| continue; | |||
| @@ -348,29 +326,21 @@ int RaiseOrReduceDataTypePrecisionSelect( | |||
| if (i >= datatype_indexes.size()) { | |||
| MS_LOG(EXCEPTION) << "index " << i << "> kernel datatype indexes size " << datatype_indexes.size(); | |||
| } | |||
| if (datatype_indexes[i] == kUnSupportMixedDataTypeIndex) { | |||
| if (!CanDataTypeReduce(datatype_indexes, i, node_mix_precision_datatype_index)) { | |||
| iter = kernel_match_datatype_idx->erase(iter); | |||
| } else { | |||
| if (datatype_indexes[i] < node_mix_precision_datatype_index[i]) { | |||
| auto count_iter = precision_reduce_count.find(iter->first); | |||
| if (count_iter != precision_reduce_count.end()) { | |||
| count_iter->second++; | |||
| } else { | |||
| precision_reduce_count[iter->first] = 1; | |||
| } | |||
| } | |||
| ++iter; | |||
| } | |||
| } | |||
| } | |||
| return GetMinReducePrecisionCountIndex(kernel_match_datatype_idx, precision_reduce_count); | |||
| return !kernel_match_datatype_idx->empty(); | |||
| } | |||
| void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelBuildInfo &kernel_build_info, | |||
| std::vector<int> *support_indexes, std::vector<TypeId> *node_mix_precision_datatype, | |||
| std::vector<TypeId> *support_datatypes, | |||
| std::vector<int> *node_mix_precision_datatype_index) { | |||
| MS_EXCEPTION_IF_NULL(node_mix_precision_datatype); | |||
| bool add_node_datatype_flag = false; | |||
| if (node_mix_precision_datatype->size() == 0) { | |||
| add_node_datatype_flag = true; | |||
| @@ -390,104 +360,59 @@ void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelB | |||
| } | |||
| } | |||
| int PrecisionReduce(const std::vector<int> &node_mix_precision_datatype_index, | |||
| const std::vector<TypeId> &node_mix_precision_datatype, | |||
| const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatype, | |||
| std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx, bool *precision_reduce) { | |||
| void PrecisionReduce(const std::vector<int> &node_mix_precision_datatype_index, | |||
| const std::vector<TypeId> &node_mix_precision_datatype, | |||
| const std::map<size_t, std::vector<TypeId>> &kernel_support_datatype, | |||
| std::map<size_t, std::vector<int>> *kernel_match_datatype_idx, bool *precision_reduce) { | |||
| MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx); | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| MS_EXCEPTION_IF_NULL(precision_reduce); | |||
| std::unordered_map<size_t, std::vector<int>> kernel_match_datatype_idx_copy = *kernel_match_datatype_idx; | |||
| std::map<size_t, std::vector<int>> kernel_match_datatype_idx_copy = *kernel_match_datatype_idx; | |||
| // raise precision | |||
| int selected_index = RaiseDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, | |||
| kernel_support_datatype, kernel_match_datatype_idx); | |||
| if (selected_index != -1) { | |||
| int max_match = 0; | |||
| auto iter = kernel_match_datatype_idx->begin(); | |||
| int match_count = 0; | |||
| while (iter != kernel_match_datatype_idx->end()) { | |||
| auto kernel_datatypes = kernel_support_datatype.find(iter->first); | |||
| if (kernel_datatypes == kernel_support_datatype.end()) { | |||
| MS_LOG(EXCEPTION) << "Can not find kernel index" << iter->first << "'s datatype."; | |||
| } | |||
| if (kernel_datatypes->second.size() < node_mix_precision_datatype.size()) { | |||
| MS_LOG(EXCEPTION) << "Kernel datatype size is not equal to node datatype size!"; | |||
| } | |||
| for (size_t i = 0; i < node_mix_precision_datatype.size(); ++i) { | |||
| if (node_mix_precision_datatype[i] == kernel_datatypes->second[i]) { | |||
| ++match_count; | |||
| } | |||
| } | |||
| if (match_count > max_match) { | |||
| selected_index = SizeToInt(iter->first); | |||
| } | |||
| ++iter; | |||
| } | |||
| bool selected_ret = RaiseDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, | |||
| kernel_support_datatype, kernel_match_datatype_idx); | |||
| if (selected_ret) { | |||
| *precision_reduce = false; | |||
| return; | |||
| } | |||
| if (selected_index == -1 && context_ptr->enable_reduce_precision()) { | |||
| selected_index = | |||
| RaiseOrReduceDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, | |||
| kernel_support_datatype, &kernel_match_datatype_idx_copy); | |||
| if (selected_index != -1) { | |||
| *precision_reduce = true; | |||
| } | |||
| if (context_ptr->enable_reduce_precision()) { | |||
| selected_ret = RaiseOrReduceDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, | |||
| kernel_support_datatype, &kernel_match_datatype_idx_copy); | |||
| } | |||
| if (selected_ret) { | |||
| *precision_reduce = true; | |||
| *kernel_match_datatype_idx = kernel_match_datatype_idx_copy; | |||
| } | |||
| return selected_index; | |||
| } | |||
| void SelectKernel(const CNodePtr &kernel_node, bool precision_reduce, const std::vector<TypeId> &node_datatype, | |||
| const std::shared_ptr<kernel::KernelBuildInfo> &selected_kernel_info_ptr) { | |||
| MS_EXCEPTION_IF_NULL(selected_kernel_info_ptr); | |||
| void PrintRaiseOrReducePrecisionSelectedInfo(const CNodePtr &cnode, | |||
| const std::shared_ptr<kernel::KernelBuildInfo> &selected_kernel_build_info, | |||
| bool precision_reduce) { | |||
| MS_EXCEPTION_IF_NULL(selected_kernel_build_info); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| std::ostringstream buffer; | |||
| buffer << cnode->DebugString(); | |||
| if (precision_reduce) { | |||
| std::ostringstream datatype; | |||
| size_t input_num = selected_kernel_info_ptr->GetInputNum(); | |||
| size_t i = 0; | |||
| datatype << "("; | |||
| for (; i < input_num && i < node_datatype.size(); ++i) { | |||
| datatype << static_cast<int>(node_datatype[i]); | |||
| if (i < input_num - 1) { | |||
| datatype << ", "; | |||
| } | |||
| } | |||
| datatype << ") -> ("; | |||
| for (; i < node_datatype.size(); ++i) { | |||
| datatype << static_cast<int>(node_datatype[i]); | |||
| if (i < node_datatype.size() - 1) { | |||
| datatype << ", "; | |||
| } | |||
| } | |||
| datatype << ")"; | |||
| MS_LOG(WARNING) << kernel_node->DebugString() << " reduce precision, node datatype: " << datatype.str() | |||
| << ", select kernel: %s" << selected_kernel_info_ptr->ToString(); | |||
| buffer << " reduce precision, node datatype: "; | |||
| } else { | |||
| buffer << " raise precision, node datatype: "; | |||
| } | |||
| AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info_ptr, kernel_node.get()); | |||
| // Set format and data type for input tensor. | |||
| SetTensorDeviceInfo(*selected_kernel_info_ptr, kernel_node); | |||
| PrintInputAndOutputInferType(buffer, cnode); | |||
| buffer << ", select kernel:" << selected_kernel_build_info->ToString(); | |||
| MS_LOG(INFO) << buffer.str(); | |||
| } | |||
| } // namespace | |||
| void SelectKernelInfo(const CNodePtr &kernel_node) { | |||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list; | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| kernel::KernelQuery(kernel_node, &kernel_info_list); | |||
| std::shared_ptr<kernel::KernelBuildInfo> ChooseMatchedKernelInfo( | |||
| const CNodePtr &kernel_node, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) { | |||
| if (kernel_info_list.empty()) { | |||
| return nullptr; | |||
| } | |||
| std::vector<int> most_match_counts = {-1, -1, -1, -1}; | |||
| int selected_index = -1; | |||
| std::unordered_map<size_t, std::vector<int>> kernel_match_datatype_idx; | |||
| std::unordered_map<size_t, std::vector<TypeId>> kernel_support_datatype; | |||
| std::vector<int> node_mix_precision_datatype_index; | |||
| std::vector<TypeId> node_mix_precision_datatype; | |||
| size_t selected_index = 0; | |||
| for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) { | |||
| std::vector<int> cur_kernel_info_match_counts = {0, 0, 0, 0}; | |||
| auto kernel_build_info = *(kernel_info_list[info_index]); | |||
| std::vector<int> support_indexes; | |||
| std::vector<TypeId> support_datatypes; | |||
| AddNodeAndKernelDataType(kernel_node, kernel_build_info, &support_indexes, &node_mix_precision_datatype, | |||
| &support_datatypes, &node_mix_precision_datatype_index); | |||
| kernel_match_datatype_idx[info_index] = support_indexes; | |||
| kernel_support_datatype[info_index] = support_datatypes; | |||
| if (!MatchInferOutputDataType(kernel_node, kernel_build_info)) { | |||
| continue; | |||
| } | |||
| std::shared_ptr<kernel::KernelBuildInfo> kernel_info_ptr = kernel_info_list[info_index]; | |||
| UpdateCurMatchCounts(*kernel_info_ptr, kernel_node, &cur_kernel_info_match_counts); | |||
| // Currently the selection policy is the match format count first, and then is datatype counts. | |||
| @@ -495,22 +420,77 @@ void SelectKernelInfo(const CNodePtr &kernel_node) { | |||
| selected_index = SizeToInt(info_index); | |||
| } | |||
| } | |||
| return kernel_info_list[selected_index]; | |||
| } | |||
| bool precision_reduce = false; | |||
| if (selected_index == -1) { | |||
| selected_index = PrecisionReduce(node_mix_precision_datatype_index, node_mix_precision_datatype, | |||
| kernel_support_datatype, &kernel_match_datatype_idx, &precision_reduce); | |||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> GetAllMatchedFilteredKernelInfo( | |||
| const CNodePtr &cnode, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) { | |||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> result; | |||
| for (const auto &kernel_build_info : kernel_info_list) { | |||
| MS_EXCEPTION_IF_NULL(kernel_build_info); | |||
| if (!MatchInferOutputDataType(cnode, *kernel_build_info)) { | |||
| continue; | |||
| } | |||
| result.push_back(kernel_build_info); | |||
| } | |||
| if (selected_index == -1) { | |||
| MS_LOG(EXCEPTION) << kernel_node->DebugString() << "Cannot find valid kernel Info !"; | |||
| return result; | |||
| } | |||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> FilterRaisedOrReducePrecisionMatchedKernelInfo( | |||
| const CNodePtr &cnode, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list, | |||
| bool *precision_reduce) { | |||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> filtered_kernel_info_list; | |||
| std::map<size_t, std::vector<int>> kernel_match_datatype_idx; | |||
| std::map<size_t, std::vector<TypeId>> kernel_support_datatype; | |||
| std::vector<int> node_mix_precision_datatype_index; | |||
| std::vector<TypeId> node_mix_precision_datatype; | |||
| for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) { | |||
| std::vector<int> support_indexes; | |||
| std::vector<TypeId> support_datatypes; | |||
| MS_EXCEPTION_IF_NULL(kernel_info_list[info_index]); | |||
| AddNodeAndKernelDataType(cnode, *kernel_info_list[info_index], &support_indexes, &node_mix_precision_datatype, | |||
| &support_datatypes, &node_mix_precision_datatype_index); | |||
| kernel_match_datatype_idx[info_index] = support_indexes; | |||
| kernel_support_datatype[info_index] = support_datatypes; | |||
| } | |||
| auto index = IntToSize(selected_index); | |||
| if (index >= kernel_info_list.size()) { | |||
| MS_LOG(EXCEPTION) << "index outof range"; | |||
| PrecisionReduce(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatype, | |||
| &kernel_match_datatype_idx, precision_reduce); | |||
| std::transform( | |||
| kernel_match_datatype_idx.begin(), kernel_match_datatype_idx.end(), std::back_inserter(filtered_kernel_info_list), | |||
| [&](const std::pair<size_t, std::vector<int>> &matched_idx) -> std::shared_ptr<kernel::KernelBuildInfo> { | |||
| return kernel_info_list[matched_idx.first]; | |||
| }); | |||
| return filtered_kernel_info_list; | |||
| } | |||
| } // namespace | |||
| void SelectKernelInfo(const CNodePtr &kernel_node) { | |||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list; | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| bool precision_reduce = false; | |||
| std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info = nullptr; | |||
| kernel::KernelQuery(kernel_node, &kernel_info_list); | |||
| // filter kernel info matched with me infered type | |||
| auto filtered_kernel_info_list = GetAllMatchedFilteredKernelInfo(kernel_node, kernel_info_list); | |||
| if (!filtered_kernel_info_list.empty()) { | |||
| selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list); | |||
| } else { | |||
| // selected kernel info using raised precision or reduce precision | |||
| filtered_kernel_info_list = | |||
| FilterRaisedOrReducePrecisionMatchedKernelInfo(kernel_node, kernel_info_list, &precision_reduce); | |||
| selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list); | |||
| if (selected_kernel_info == nullptr) { | |||
| std::ostringstream buffer; | |||
| PrintInputAndOutputInferType(buffer, kernel_node); | |||
| MS_EXCEPTION(TypeError) << "The node [" << kernel_node->DebugString() | |||
| << "] cannot find valid kernel info, not supported the type" << buffer.str(); | |||
| } else { | |||
| PrintRaiseOrReducePrecisionSelectedInfo(kernel_node, selected_kernel_info, precision_reduce); | |||
| } | |||
| } | |||
| std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info_ptr = kernel_info_list[index]; | |||
| MS_EXCEPTION_IF_NULL(selected_kernel_info_ptr); | |||
| SelectKernel(kernel_node, precision_reduce, node_mix_precision_datatype, selected_kernel_info_ptr); | |||
| AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info, kernel_node.get()); | |||
| // Set format and data type for input tensor. | |||
| SetTensorDeviceInfo(*selected_kernel_info, kernel_node); | |||
| } | |||
| bool CheckKernelAccuracySupported(const CNodePtr &kernel_node, | |||
| @@ -148,18 +148,29 @@ std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exe | |||
| } | |||
| if (bp_end_str.empty()) { | |||
| auto last_cnode = cnode_exec_order.back(); | |||
| MS_EXCEPTION_IF_NULL(last_cnode); | |||
| bp_end_str = last_cnode->fullname_with_scope(); | |||
| bp_end_str = GetGraphLastTbeKernelName(cnode_exec_order); | |||
| } | |||
| return bp_end_str; | |||
| } | |||
| std::string ProfilingUtils::GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order) { | |||
| std::string last_tbe_kernel_name = ""; | |||
| // find last tbe_kernel | |||
| for (auto iter = cnode_exec_order.rbegin(); iter != cnode_exec_order.rend(); ++iter) { | |||
| if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL) { | |||
| last_tbe_kernel_name = (*iter)->fullname_with_scope(); | |||
| break; | |||
| } | |||
| } | |||
| if (last_tbe_kernel_name.empty()) { | |||
| MS_LOG(WARNING) << "tbe kernel not found in graph"; | |||
| } | |||
| return last_tbe_kernel_name; | |||
| } | |||
| std::string ProfilingUtils::GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order) { | |||
| const char *trace_netoutput = std::getenv(kIterEndNode); | |||
| auto &last_cnode = cnode_exec_order.back(); | |||
| MS_EXCEPTION_IF_NULL(last_cnode); | |||
| return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput); | |||
| return trace_netoutput == nullptr ? GetGraphLastTbeKernelName(cnode_exec_order) : std::string(trace_netoutput); | |||
| } | |||
| NotNull<CNodePtr> ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content, | |||
| @@ -114,6 +114,7 @@ class ProfilingUtils { | |||
| static std::string GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order); | |||
| static std::string GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order); | |||
| static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order); | |||
| static std::string GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order); | |||
| static void GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order, | |||
| NotNull<ProfilingTraceInfo *> profiling_trace); | |||
| static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order, | |||
| @@ -87,6 +87,12 @@ const char *MetaIdLabel(const TypeId &v) { | |||
| return "kMetaTypeExternal"; | |||
| case kMetaTypeNone: | |||
| return "kMetaTypeNone"; | |||
| case kMetaTypeNull: | |||
| return "kMetaTypeNull"; | |||
| case kMetaTypeEllipsis: | |||
| return "kMetaTypeEllipsis"; | |||
| case kMetaTypeEnd: | |||
| return "kMetaTypeEnd"; | |||
| default: | |||
| return "[Unknown Type Id]"; | |||
| } | |||
| @@ -166,9 +166,6 @@ Tensor::Tensor(const py::int_ &input, const TypePtr &data_type) { init(py::array | |||
| Tensor::Tensor(const Tensor &tensor, const TypePtr &data_type) | |||
| : MetaTensor(tensor), dirty_(tensor.dirty_), device_address_(tensor.device_address_) { | |||
| init(tensor.data_, data_type); | |||
| if (device_address_ != nullptr) { | |||
| (void)data_sync(); | |||
| } | |||
| } | |||
| Tensor &Tensor::operator=(const Tensor &tensor) { | |||
| @@ -17,6 +17,7 @@ | |||
| #include "kernel/kernel_build_info.h" | |||
| #include <algorithm> | |||
| #include "utils/log_adapter.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| std::string KernelBuildInfo::GetInputFormat(size_t input_index) const { | |||
| @@ -82,14 +83,14 @@ std::string KernelBuildInfo::ToString() const { | |||
| if (index != 0) { | |||
| output_buffer << ", "; | |||
| } | |||
| output_buffer << "<" << static_cast<int>(GetInputDeviceType(index)) << "x" << GetInputFormat(index) << ">"; | |||
| output_buffer << "<" << ToShortString(GetInputDeviceType(index)) << "x" << GetInputFormat(index) << ">"; | |||
| } | |||
| output_buffer << ") -> ("; | |||
| for (size_t index = 0; index < GetOutputNum(); ++index) { | |||
| if (index != 0) { | |||
| output_buffer << ", "; | |||
| } | |||
| output_buffer << "<" << static_cast<int>(GetOutputDeviceType(index)) << "x" << GetOutputFormat(index) << ">"; | |||
| output_buffer << "<" << ToShortString(GetOutputDeviceType(index)) << "x" << GetOutputFormat(index) << ">"; | |||
| } | |||
| output_buffer << ")"; | |||
| return output_buffer.str(); | |||
| @@ -108,7 +108,8 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||
| } | |||
| if ((task_result != nullptr) && (strcmp(task_result, "Success") != 0)) { | |||
| MS_LOG(DEBUG) << "fuison op build failed, err log: " << task_result << " change to single op build."; | |||
| MS_LOG(INFO) << "Fusion warning: Fuison op build failed, err log: " << task_result | |||
| << " change to single op build."; | |||
| build_failed_num++; | |||
| } | |||
| auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, false); | |||
| @@ -153,6 +153,52 @@ void TbeAdapter::InputOrderPass(const std::string &op_name, std::vector<std::vec | |||
| } | |||
| } | |||
| void TbeAdapter::FusionInputOrderPass(const std::string &op_name, const std::vector<nlohmann::json> &inputs_list, | |||
| std::vector<nlohmann::json> *inputs_json) { | |||
| MS_EXCEPTION_IF_NULL(inputs_json); | |||
| if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) { | |||
| (void)std::copy(inputs_list.begin(), inputs_list.end(), std::back_inserter((*inputs_json))); | |||
| } else { | |||
| if (op_name == "MinimumGrad" || op_name == "MaximumGrad") { | |||
| inputs_json->emplace_back(inputs_list[2]); | |||
| inputs_json->emplace_back(inputs_list[0]); | |||
| inputs_json->emplace_back(inputs_list[1]); | |||
| for (size_t i = 3; i < inputs_list.size(); ++i) { | |||
| inputs_json->emplace_back(inputs_list[i]); | |||
| } | |||
| } else { | |||
| inputs_json->emplace_back(inputs_list[1]); | |||
| inputs_json->emplace_back(inputs_list[0]); | |||
| for (size_t i = 2; i < inputs_list.size(); ++i) { | |||
| inputs_json->emplace_back(inputs_list[i]); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void TbeAdapter::FusionDataOrderPass(const std::string &op_name, const std::vector<AnfNodePtr> &data_layer, | |||
| std::vector<AnfNodePtr> *reorder_data_layer) { | |||
| MS_EXCEPTION_IF_NULL(reorder_data_layer); | |||
| if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) { | |||
| (void)std::copy(data_layer.begin(), data_layer.end(), std::back_inserter((*reorder_data_layer))); | |||
| } else { | |||
| if (op_name == "MinimumGrad" || op_name == "MaximumGrad") { | |||
| reorder_data_layer->emplace_back(data_layer[2]); | |||
| reorder_data_layer->emplace_back(data_layer[0]); | |||
| reorder_data_layer->emplace_back(data_layer[1]); | |||
| for (size_t i = 3; i < data_layer.size(); ++i) { | |||
| reorder_data_layer->emplace_back(data_layer[i]); | |||
| } | |||
| } else { | |||
| reorder_data_layer->emplace_back(data_layer[1]); | |||
| reorder_data_layer->emplace_back(data_layer[0]); | |||
| for (size_t i = 2; i < data_layer.size(); ++i) { | |||
| reorder_data_layer->emplace_back(data_layer[i]); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| std::map<std::string, FAttrsPass> TbeAdapter::build_json_attr_pass_map_ = { | |||
| {"MaximumGrad", TbeAdapter::MaximumGradAttrJsonPass}, | |||
| {"MinimumGrad", TbeAdapter::MinimumGradAttrJsonPass}, | |||
| @@ -44,15 +44,12 @@ class TbeAdapter { | |||
| static void GenTopKV2IndicesTensorInfo(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, | |||
| std::vector<nlohmann::json> *input_list, kCreaterType creater_type); | |||
| static void FusionInputOrderPass(const std::string &op_name, const std::vector<nlohmann::json> &inputs_list, | |||
| std::vector<nlohmann::json> *inputs_json); | |||
| static void FusionDataOrderPass(const std::string &op_name, const std::vector<AnfNodePtr> &data_layer, | |||
| std::vector<AnfNodePtr> *reorder_data_layer); | |||
| private: | |||
| static void Conv2DAttrJsonPass(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | |||
| nlohmann::json *attrs_json); | |||
| static void Conv2DBackpropFilterAttrJsonPass(const AnfNodePtr &anf_node, | |||
| const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | |||
| nlohmann::json *attrs_json); | |||
| static void Conv2DBackpropInputAttrJsonPass(const AnfNodePtr &anf_node, | |||
| const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | |||
| nlohmann::json *attrs_json); | |||
| static void MaximumGradAttrJsonPass(const AnfNodePtr &anf_node, | |||
| const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | |||
| nlohmann::json *attrs_json); | |||
| @@ -375,20 +375,26 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_no | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| for (const auto &attr_ptr : attrs_ptr) { | |||
| std::string attr_name = attr_ptr->name(); | |||
| nlohmann::json attr_obj; | |||
| attr_obj["name"] = attr_name; | |||
| if (primitive->GetAttr(attr_name) != nullptr) { | |||
| nlohmann::json attr_obj; | |||
| auto value = primitive->GetAttr(attr_name); | |||
| std::string type = attr_ptr->type(); | |||
| ParseAttrValue(type, value, &attr_obj); | |||
| attr_obj["name"] = attr_name; | |||
| attr_obj["valid"] = true; | |||
| (*attrs_json).push_back(attr_obj); | |||
| } else { | |||
| if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD && op_info->impl_path() != "") { | |||
| MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name | |||
| << " is required, but not set."; | |||
| if (op_info->impl_path().empty()) { | |||
| attr_obj["valid"] = false; | |||
| } else { | |||
| if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD) { | |||
| MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name | |||
| << " is required, but not set."; | |||
| } else { | |||
| attr_obj["valid"] = false; | |||
| } | |||
| } | |||
| } | |||
| (*attrs_json).push_back(attr_obj); | |||
| } | |||
| return true; | |||
| } | |||
| @@ -484,7 +490,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp | |||
| MS_EXCEPTION_IF_NULL(fusion_kernel); | |||
| // get input layer info | |||
| std::vector<std::vector<mindspore::AnfNodePtr>> input_layers; | |||
| if (!GetInputLayers(input_nodes, compute_nodes, &input_layers)) { | |||
| std::map<const AnfNodePtr, FusionDataType> spec_data_input; | |||
| if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) { | |||
| return false; | |||
| } | |||
| // gen fusion scopre_op jsom | |||
| @@ -505,8 +512,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp | |||
| for (const auto &layer : input_layers) { | |||
| for (const auto &data_input : layer) { | |||
| nlohmann::json data_str; | |||
| if (!GenFusionDataInputJson(data_input, &data_str, &index)) { | |||
| MS_LOG(DEBUG) << "GenFusionDataInputJson faild."; | |||
| if (!GenFusionDataInputJson(data_input, spec_data_input, &data_str, &index)) { | |||
| MS_LOG(INFO) << "Fusion error: gen fusion datainput json faild."; | |||
| return false; | |||
| } | |||
| data_list.push_back(data_str); | |||
| @@ -519,7 +526,7 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp | |||
| } | |||
| void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | |||
| size_t desc_output_idx, nlohmann::json *output_desc) { | |||
| size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) { | |||
| std::string output_desc_name = anf_node->fullname_with_scope(); | |||
| if (node_out_idx > 0) { | |||
| output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx); | |||
| @@ -539,58 +546,109 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_ | |||
| (*output_desc)["shape"] = shape; | |||
| auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx); | |||
| if (format == kOpFormat_DEFAULT) { | |||
| if (ori_shape.size() == 4) { | |||
| format = kOpFormat_NCHW; | |||
| } else { | |||
| format = kOpFormat_ND; | |||
| } | |||
| format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND; | |||
| } | |||
| (*output_desc)["format"] = format; | |||
| (*output_desc)["ori_format"] = kOpFormat_NCHW; | |||
| (*output_desc)["output_index"] = desc_output_idx; | |||
| if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) { | |||
| std::vector<size_t> spec_shape = {}; | |||
| spec_shape.emplace_back(shape[0]); | |||
| spec_shape.emplace_back(shape[1]); | |||
| spec_shape.emplace_back(shape[2] * shape[3]); | |||
| spec_shape.emplace_back(shape[4]); | |||
| (*output_desc)["shape"] = spec_shape; | |||
| } else if (fusion_data_type == kFusionReLUGradV2 && (*output_desc)["data_type"] == "uint8") { | |||
| std::vector<size_t> spec_shape = {}; | |||
| spec_shape.emplace_back(shape[0]); | |||
| spec_shape.emplace_back(shape[1]); | |||
| spec_shape.emplace_back(shape[2] * shape[3]); | |||
| spec_shape.emplace_back(16); | |||
| (*output_desc)["shape"] = spec_shape; | |||
| (*output_desc)["data_type"] = "bool"; | |||
| } | |||
| } | |||
| void TbeKernelBuild::GenReusedOutputDesc(const shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | |||
| size_t output_index, nlohmann::json *output_desc) { | |||
| std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); | |||
| (*output_desc)["name"] = NormalizeFullScopeName(output_desc_name); | |||
| (*output_desc)["data_type"] = tbe::TypeIdToString(kNumberTypeFloat32); | |||
| (*output_desc)["output_index"] = output_index; | |||
| std::vector<size_t> shape; | |||
| (*output_desc)["shape"] = shape; | |||
| } | |||
| bool TbeKernelBuild::GetInputLayers(const vector<mindspore::AnfNodePtr> &input_nodes, | |||
| const vector<mindspore::AnfNodePtr> &compute_nodes, | |||
| std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers) { | |||
| bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, | |||
| const std::vector<mindspore::AnfNodePtr> &reorder_layer, | |||
| std::map<const AnfNodePtr, FusionDataType> *spec_data_input) { | |||
| if ((op_name == kReluGradV2OpName || op_name == kAddNOpName) && reorder_layer.empty()) { | |||
| MS_LOG(INFO) << "Fusion error: node(" << op_name << " )'s input is null. "; | |||
| return false; | |||
| } | |||
| MS_LOG(INFO) << "Fusion info: op_name: " << op_name << "input layer size: " << reorder_layer.size(); | |||
| if (op_name == kReluGradV2OpName) { | |||
| (*spec_data_input)[reorder_layer[0]] = kFusionReLUGradV2; | |||
| } else if (op_name == kAddNOpName) { | |||
| for (const auto &it : reorder_layer) { | |||
| (*spec_data_input)[it] = kFusionAddN; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes, | |||
| const std::vector<mindspore::AnfNodePtr> &compute_nodes, | |||
| std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers, | |||
| std::map<const AnfNodePtr, FusionDataType> *spec_data_input) { | |||
| auto result = std::find_if(compute_nodes.begin(), compute_nodes.end(), [](const auto &it) { | |||
| auto op_name = AnfAlgo::GetCNodeName(it); | |||
| return op_name == kConv2DBackpropInputOpName; | |||
| }); | |||
| bool need_spec = (result != compute_nodes.end()); | |||
| size_t input_size = 0; | |||
| for (const auto &compute_node : compute_nodes) { | |||
| std::vector<mindspore::AnfNodePtr> layer; | |||
| std::vector<mindspore::AnfNodePtr> layer = {}; | |||
| std::vector<mindspore::AnfNodePtr> reorder_layer = {}; | |||
| MS_EXCEPTION_IF_NULL(compute_node); | |||
| auto op_name = AnfAlgo::GetCNodeName(compute_node); | |||
| auto ccompute_node = compute_node->cast<CNodePtr>(); | |||
| if (ccompute_node == nullptr) { | |||
| MS_LOG(DEBUG) << "fusion compute node must be cnode"; | |||
| MS_LOG(INFO) << "Fusion error: fusion compute node must be cnode"; | |||
| return false; | |||
| } | |||
| MS_LOG(INFO) << "Fusion info: compute name: " << compute_node->fullname_with_scope(); | |||
| for (size_t i = 1; i < ccompute_node->inputs().size(); ++i) { | |||
| auto input = ccompute_node->input(i); | |||
| auto find_iter = std::find(input_nodes.begin(), input_nodes.end(), input); | |||
| if (find_iter != input_nodes.end()) { | |||
| MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope(); | |||
| layer.emplace_back((*find_iter)); | |||
| } else { | |||
| MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope() | |||
| << ") node's output."; | |||
| } | |||
| } | |||
| TbeAdapter::FusionDataOrderPass(op_name, layer, &reorder_layer); | |||
| if (need_spec) { | |||
| MS_LOG(INFO) << "Fusion info: match conv2d backprop input + ... patten."; | |||
| if (!GetSpecInputLayers(op_name, reorder_layer, spec_data_input)) { | |||
| return false; | |||
| } | |||
| } | |||
| input_size += layer.size(); | |||
| input_layers->emplace_back(layer); | |||
| input_size += reorder_layer.size(); | |||
| input_layers->emplace_back(reorder_layer); | |||
| } | |||
| if (input_nodes.size() != input_size) { | |||
| MS_LOG(DEBUG) << "fusion scope error, layer input:" << input_size << ", input_node:" << input_nodes.size(); | |||
| MS_LOG(INFO) << "Fusion error: fusion scope error, layer input:" << input_size | |||
| << ", input_node:" << input_nodes.size(); | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode> &data_input, nlohmann::json *data_str, | |||
| size_t *index) { | |||
| bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input, | |||
| const std::map<const AnfNodePtr, FusionDataType> &spec_data_input, | |||
| nlohmann::json *data_str, size_t *index) { | |||
| MS_EXCEPTION_IF_NULL(data_str); | |||
| MS_EXCEPTION_IF_NULL(index); | |||
| std::vector<nlohmann::json> output_desc_list; | |||
| @@ -604,13 +662,17 @@ bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode> | |||
| output_desc_list.push_back(output_desc); | |||
| (*index)++; | |||
| } else { | |||
| FusionDataType fusion_data_type = kFusionNormal; | |||
| if (spec_data_input.find(data_input) != spec_data_input.end()) { | |||
| fusion_data_type = spec_data_input.at(data_input); | |||
| } | |||
| auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0); | |||
| auto real_node = kernel_idx.first; | |||
| size_t real_idx = kernel_idx.second; | |||
| MS_LOG(INFO) << "real name " << real_node->fullname_with_scope() << " index:" << real_idx; | |||
| // "output_desc" | |||
| nlohmann::json output_desc; | |||
| GenDescJson(real_node, real_idx, real_idx, &output_desc); | |||
| GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type); | |||
| output_desc_list.push_back(output_desc); | |||
| (*data_str)["name"] = NormalizeFullScopeName(real_node->fullname_with_scope()); | |||
| } | |||
| @@ -632,11 +694,12 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) { | |||
| auto real_input_size = cnode->inputs().size() - 1; | |||
| auto dyn_input_size = dyn_input_sizes.size(); | |||
| if (dyn_input_size != 1) { | |||
| MS_LOG(DEBUG) << "fusion build not support dyn_input_sizes > 1"; | |||
| MS_LOG(INFO) << "Fusion error: fusion build not support dyn_input_sizes > 1"; | |||
| return ret; | |||
| } | |||
| if (IntToSize(dyn_input_sizes[0]) != real_input_size) { | |||
| MS_LOG(DEBUG) << " dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size" << real_input_size; | |||
| MS_LOG(INFO) << "Fusion error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size" | |||
| << real_input_size; | |||
| return ret; | |||
| } | |||
| ret = true; | |||
| @@ -663,6 +726,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | |||
| std::vector<nlohmann::json> *input_desc_list, size_t *index) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(input_desc_list); | |||
| std::vector<nlohmann::json> input_desc_list_tmp = {}; | |||
| bool is_dynamic_input = IsDynamicInput(cnode); | |||
| for (size_t i = 1; i < cnode->inputs().size(); ++i) { | |||
| auto input = cnode->input(i); | |||
| @@ -676,7 +740,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | |||
| MS_LOG(INFO) << "node has dynamic input."; | |||
| input_desc["dyn_index"] = (i - 1); | |||
| } | |||
| (*input_desc_list).emplace_back(input_desc); | |||
| input_desc_list_tmp.emplace_back(input_desc); | |||
| } | |||
| size_t optional_num = GetOptionalInput(cnode, is_dynamic_input); | |||
| if (optional_num > 0) { | |||
| @@ -686,35 +750,24 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | |||
| optional_input_desc["name"] = std::string(kOptional) + std::to_string(*index); | |||
| (*index)++; | |||
| (*layer_iter)->emplace_back(nullptr); | |||
| (*input_desc_list).emplace_back(optional_input_desc); | |||
| input_desc_list_tmp.emplace_back(optional_input_desc); | |||
| } | |||
| } | |||
| auto op_name = AnfAlgo::GetCNodeName(cnode); | |||
| TbeAdapter::FusionInputOrderPass(op_name, input_desc_list_tmp, input_desc_list); | |||
| return true; | |||
| } | |||
| std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &output_used_nums) { | |||
| std::vector<size_t> desc_output_index = {}; | |||
| bool find_reused = false; | |||
| size_t reused_num = 0; | |||
| for (size_t idx = 0; idx < output_used_nums.size(); ++idx) { | |||
| auto output_use_num_item = output_used_nums[idx]; | |||
| MS_LOG(INFO) << "output used num[" << idx << "] = " << output_use_num_item; | |||
| if (output_use_num_item == 1 || output_use_num_item == 0) { | |||
| desc_output_index.emplace_back(idx); | |||
| if (output_use_num_item > 1) { | |||
| desc_output_index.emplace_back(idx); | |||
| } else { | |||
| if (!find_reused) { | |||
| desc_output_index.emplace_back(idx); | |||
| } else { | |||
| desc_output_index.emplace_back(desc_output_index[idx - 1]); | |||
| } | |||
| reused_num += (output_use_num_item - 1); | |||
| find_reused = true; | |||
| } | |||
| } | |||
| auto pad_value = output_used_nums.size() == 1 ? 0 : desc_output_index[desc_output_index.size() - 1] + 1; | |||
| for (size_t i = 0; i < reused_num; ++i) { | |||
| desc_output_index.emplace_back(pad_value); | |||
| } | |||
| return desc_output_index; | |||
| } | |||
| @@ -722,8 +775,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode | |||
| std::vector<nlohmann::json> *output_desc_list) { | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(cnode); | |||
| if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) { | |||
| // wait anther pr: auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum); | |||
| auto output_used_nums = {SizeToInt(AnfAlgo::GetNodeAttr<std::size_t>(cnode, kAttrOutputUsedNum))}; | |||
| auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum); | |||
| MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope(); | |||
| if (output_used_nums.size() != output_size) { | |||
| MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")" | |||
| @@ -812,6 +864,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto | |||
| } | |||
| auto ret = GetIOSizeImpl(data_output); | |||
| input_size_list->push_back(ret); | |||
| MS_LOG(INFO) << "Fusion info: scope input name: " << op["name"] << ", size: " << ret; | |||
| } | |||
| } | |||
| } | |||
| @@ -820,26 +873,31 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto | |||
| auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0); | |||
| auto real_node = kernel_idx.first; | |||
| size_t real_idx = kernel_idx.second; | |||
| auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); | |||
| MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx; | |||
| for (const auto &op : fusion_op_list) { | |||
| auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); | |||
| if (op["name"] == normal_name) { | |||
| auto op_output_desces = op["output_desc"]; | |||
| if (output_node != real_node) { | |||
| // tuple_get item | |||
| MS_LOG(DEBUG) << "output is a tuple getitem node"; | |||
| MS_LOG(INFO) << "output is a tuple getitem node"; | |||
| auto output_desc = op_output_desces[real_idx]; | |||
| if (output_desc["shape"].empty()) { | |||
| continue; | |||
| MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx; | |||
| return false; | |||
| } | |||
| auto ret = GetIOSizeImpl(output_desc); | |||
| output_size_list->push_back(ret); | |||
| MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret; | |||
| } else { | |||
| for (const auto &output_desc : op_output_desces) { | |||
| if (output_desc["shape"].empty()) { | |||
| MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output"; | |||
| continue; | |||
| } | |||
| auto ret = GetIOSizeImpl(output_desc); | |||
| output_size_list->push_back(ret); | |||
| MS_LOG(INFO) << "Fusion info: scope output size: " << ret; | |||
| } | |||
| } | |||
| } | |||
| @@ -35,6 +35,8 @@ namespace kernel { | |||
| // kernel operate type used for generate json | |||
| class TbeKernelBuild { | |||
| enum FusionDataType { kFusionNormal = 0, kFusionAddN, kFusionReLUGradV2 }; | |||
| public: | |||
| static bool GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list, | |||
| std::vector<size_t> *output_size_list); | |||
| @@ -48,8 +50,9 @@ class TbeKernelBuild { | |||
| private: | |||
| TbeKernelBuild() = default; | |||
| ~TbeKernelBuild() = default; | |||
| static bool GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input, nlohmann::json *data_str, | |||
| size_t *index); | |||
| static bool GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input, | |||
| const std::map<const AnfNodePtr, FusionDataType> &spec_data_input, | |||
| nlohmann::json *data_str, size_t *index); | |||
| static bool GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node, | |||
| std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter, | |||
| nlohmann::json *compute_op_str, std::string *fusion_kernel_name, size_t *index); | |||
| @@ -60,13 +63,17 @@ class TbeKernelBuild { | |||
| static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode, | |||
| std::vector<nlohmann::json> *output_desc_list); | |||
| static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | |||
| size_t desc_output_idx, nlohmann::json *output_desc); | |||
| size_t desc_output_idx, nlohmann::json *output_desc, | |||
| FusionDataType fusion_data_type = kFusionNormal); | |||
| static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | |||
| size_t output_index, nlohmann::json *output_desc); | |||
| static size_t GetIOSizeImpl(const nlohmann::json &desc); | |||
| static bool GetSpecInputLayers(const std::string &op_name, const std::vector<mindspore::AnfNodePtr> &reorder_layer, | |||
| std::map<const AnfNodePtr, FusionDataType> *spec_data_input); | |||
| static bool GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes, | |||
| const std::vector<mindspore::AnfNodePtr> &compute_nodes, | |||
| std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers); | |||
| std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers, | |||
| std::map<const AnfNodePtr, FusionDataType> *spec_data_input); | |||
| static bool IsDynamicInput(const CNodePtr &cnode); | |||
| static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input); | |||
| }; | |||
| @@ -346,7 +346,8 @@ void ShardReader::GetClassesInShard(sqlite3 *db, int shard_id, const std::string | |||
| MS_LOG(ERROR) << "Error in select sql statement, sql:" << common::SafeCStr(sql) << ", error: " << errmsg; | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "Get" << static_cast<int>(columns.size()) << " records from shard " << shard_id << " index."; | |||
| MS_LOG(INFO) << "Get " << static_cast<int>(columns.size()) << " records from shard " << shard_id << " index."; | |||
| std::lock_guard<std::mutex> lck(shard_locker_); | |||
| for (int i = 0; i < static_cast<int>(columns.size()); ++i) { | |||
| categories.emplace(columns[i][0]); | |||
| } | |||
| @@ -1084,6 +1084,7 @@ int GenerateStridedSliceParametersFromTuple(const AbstractTuplePtr &slice_tuple, | |||
| std::vector<unsigned int> shrink; | |||
| auto slice_tuple_eles = slice_tuple->elements(); | |||
| size_t ellipsis_num = 0; | |||
| for (size_t index = 0; index < slice_tuple_size; index++) { | |||
| if (slice_tuple_eles[index]->isa<AbstractSlice>()) { | |||
| AbstractSlicePtr slice = dyn_cast<AbstractSlice>(slice_tuple_eles[index]); | |||
| @@ -1118,12 +1119,13 @@ int GenerateStridedSliceParametersFromTuple(const AbstractTuplePtr &slice_tuple, | |||
| << slice_tuple_eles[index]->ToString(); | |||
| } | |||
| for (size_t index = slice_tuple_size; index < shape_size; index++) { | |||
| begin->push_back(0); | |||
| end->push_back(shape[index]); | |||
| strides->push_back(1); | |||
| if (ellipsis_num == 0) { | |||
| for (size_t index = slice_tuple_size; index < shape_size; index++) { | |||
| begin->push_back(0); | |||
| end->push_back(shape[index]); | |||
| strides->push_back(1); | |||
| } | |||
| } | |||
| return ConvertBinaryToDecimal(shrink); | |||
| } | |||
| @@ -1199,6 +1201,7 @@ FuncGraphPtr TensorSlice::GenerateFuncGraph(const AbstractBasePtrList &args_spec | |||
| if (scalar_ptr->BuildValue()->cast<BoolImmPtr>()->value()) { | |||
| return ExpandADim(ret_graph, tensor_node); | |||
| } | |||
| MS_LOG(EXCEPTION) << "TensorSlice not support the index is False."; | |||
| } | |||
| shrink_axis_mask = GenerateStridedSliceParametersFromNumber(scalar_ptr, shape, &begin, &end, &strides); | |||
| } else if (args_spec_list[1]->isa<AbstractEllipsis>()) { | |||
| @@ -35,7 +35,6 @@ | |||
| namespace mindspore { | |||
| // namespace to support composite operators definition | |||
| namespace prim { | |||
| // Expand the tuple and dict parameters generated when parsing the function call, | |||
| // and generate positional parameters and key-value pairs for function. | |||
| class UnpackCall : public MetaFuncGraph { | |||
| @@ -47,7 +46,6 @@ class UnpackCall : public MetaFuncGraph { | |||
| friend bool operator==(const UnpackCall &lhs, const UnpackCall &rhs) { return lhs.name_ == rhs.name_; } | |||
| }; | |||
| using UnpackCallPtr = std::shared_ptr<UnpackCall>; | |||
| } // namespace prim | |||
| } // namespace mindspore | |||
| @@ -133,7 +133,6 @@ ResolveIRPassLib::ResolveIRPassLib() { | |||
| InferenceOptPrepareLib::InferenceOptPrepareLib() { | |||
| grad_var_prepare_ = MakeSubstitution(GradVarPrepare(), "grad_var_prepare", IsCNode); | |||
| } | |||
| } // namespace irpass | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -159,7 +159,6 @@ inline bool IsCNodeDup(const AnfNodePtr &node) { | |||
| } | |||
| return false; | |||
| } | |||
| } // namespace irpass | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -31,7 +31,6 @@ | |||
| namespace mindspore { | |||
| namespace opt { | |||
| namespace irpass { | |||
| static AnfNodePtr GenerateUnpackGraphNode(std::vector<AnfNodePtr> inputs_y, FuncGraphPtr func_graph, | |||
| AnfNodePtr func_node, bool is_unpack, bool sens_param) { | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| @@ -33,7 +33,6 @@ | |||
| namespace mindspore { | |||
| namespace opt { | |||
| namespace irpass { | |||
| // {{GradOperation, g, w}, Ys} | |||
| // {UnPackCall, {GradOperation, g, w}, Ys} | |||
| class GradVarPrepare : public AnfVisitor { | |||
| @@ -28,13 +28,11 @@ | |||
| namespace mindspore { | |||
| namespace pipeline { | |||
| struct ExecutorInfo { | |||
| FuncGraphPtr func_graph; | |||
| ResourcePtr resource; | |||
| std::size_t arg_list_size; | |||
| }; | |||
| using ExecutorInfoPtr = std::shared_ptr<ExecutorInfo>; | |||
| inline std::string GetPhasePrefix(const std::string &phase) { | |||
| @@ -97,7 +97,7 @@ PYBIND11_MODULE(_c_expression, m) { | |||
| py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"), | |||
| py::arg("phase") = py::str("dataset"), "Init and exec dataset."); | |||
| (void)m.def("_set_dataset_mode_config", &mindspore::ConfigManager::SetDatasetModeConfig, "API for set dataset mode."); | |||
| (void)m.def("init_ge", &mindspore::pipeline::InitGe, "Init GE"); | |||
| (void)m.def("init_backend", &mindspore::pipeline::InitBackend, "Init Backend."); | |||
| (void)m.def("export_graph", &mindspore::pipeline::ExportGraph, "Export Graph."); | |||
| @@ -101,7 +101,7 @@ py::tuple GenerateKey(const std::string &name, const std::unordered_map<std::str | |||
| MS_LOG(INFO) << "Start new args and compile key:" << key; | |||
| g_args_cache[args_spec] = key++; | |||
| } | |||
| py::tuple argSpec = py::tuple(2); | |||
| auto argSpec = py::tuple(2); | |||
| argSpec[0] = name; | |||
| argSpec[1] = g_args_cache[args_spec]; | |||
| return argSpec; | |||
| @@ -236,7 +236,7 @@ py::dict ExecutorPy::GetAllreduceFusion(const std::string &phase) { | |||
| void ExecutorPy::DelNetRes(const std::string &id) { | |||
| #ifdef ENABLE_GE | |||
| FinalizeGe(); | |||
| FinalizeBackend(); | |||
| #endif | |||
| if (executor_ != nullptr) { | |||
| bool flag = false; | |||
| @@ -668,6 +668,13 @@ bool InitExecDataset(const std::string &queue_name, int64_t iter_num, int64_t ba | |||
| const std::vector<TypePtr> &types, const std::vector<std::vector<int64_t>> &shapes, | |||
| const std::vector<int64_t> &input_indexes, const std::string &phase) { | |||
| std::string name = MsContext::GetInstance()->backend_policy(); | |||
| #ifndef NO_DLIB | |||
| auto ms_context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(ms_context); | |||
| if (!ms_context->IsTsdOpened() || !ms_context->IsGeInited()) { | |||
| (void)InitBackend(); | |||
| } | |||
| #endif | |||
| if (name == kMsConvert || name == kMsVm) { | |||
| return InitExecDatasetVm(queue_name, iter_num, batch_size, types, shapes, input_indexes); | |||
| } | |||
| @@ -746,7 +753,7 @@ void ResetOpId() { mindspore::id_generator::reset_id(); } | |||
| void InitHccl() { | |||
| #ifdef ENABLE_GE | |||
| (void)InitGe(); | |||
| (void)InitBackend(); | |||
| #else | |||
| mindspore::parse::python_adapter::set_python_env_flag(true); | |||
| auto ms_context = MsContext::GetInstance(); | |||
| @@ -768,7 +775,7 @@ void InitHccl() { | |||
| void FinalizeHccl() { | |||
| #ifdef ENABLE_GE | |||
| (void)FinalizeGe(); | |||
| (void)FinalizeBackend(); | |||
| #else | |||
| device::KernelRuntimeManager::Instance().ClearRuntimeResource(); | |||
| #endif | |||
| @@ -789,7 +796,7 @@ void ReleaseGeTsd() { | |||
| } | |||
| } | |||
| void InitGe() { | |||
| void InitBackend() { | |||
| // set python env flag | |||
| mindspore::parse::python_adapter::set_python_env_flag(true); | |||
| // open tsd before ge initialize | |||
| @@ -801,7 +808,7 @@ void InitGe() { | |||
| (void)ms_context->InitGe(); | |||
| } | |||
| void FinalizeGe() { | |||
| void FinalizeBackend() { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| (void)context_ptr->FinalizeGe(); | |||
| @@ -115,8 +115,8 @@ bool InitDistribute(const std::map<std::string, std::string> &options); | |||
| void ResetOpId(); | |||
| void InitHccl(); | |||
| void FinalizeHccl(); | |||
| void InitGe(); | |||
| void FinalizeGe(); | |||
| void InitBackend(); | |||
| void FinalizeBackend(); | |||
| void ClearResAtexit(); | |||
| void ReleaseGeTsd(); | |||
| @@ -52,11 +52,11 @@ void DoExecNonInputGraph(const std::string &phase) { | |||
| transform::RunOptions run_options; | |||
| run_options.name = phase; | |||
| auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); | |||
| if (graph_runner == nullptr) { | |||
| MS_LOG(ERROR) << "Can not found GraphRunner"; | |||
| return; | |||
| } | |||
| { | |||
| // Release GIL before calling into (potentially long-running) C++ code | |||
| py::gil_scoped_release release; | |||
| @@ -181,7 +181,6 @@ bool AddDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const py::di | |||
| size_t pos = phase.find('.'); | |||
| std::string net_id = ((pos == std::string::npos || pos == phase.size() - 1) ? phase : phase.substr(pos + 1)); | |||
| std::string phase_prefix = phase.substr(0, pos); | |||
| if (phase_prefix == "export") { | |||
| MS_LOG(INFO) << "Set DfGraphConvertor training : false"; | |||
| convertor.set_training(false); | |||
| @@ -319,19 +318,24 @@ void RunGEInitGraph(const py::dict &init_params, const std::string &phase) { | |||
| py::object ExtractGeneralCnodeRet(const AbstractBasePtr &cnode_data, const py::tuple &data, size_t *count) { | |||
| MS_EXCEPTION_IF_NULL(cnode_data); | |||
| if (*count >= data.size()) { | |||
| MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() | |||
| << " less than the number of elements required. "; | |||
| } | |||
| if (cnode_data->isa<AbstractTensor>()) { | |||
| if (*count >= data.size()) { | |||
| MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() | |||
| << " less than the number of elements required. "; | |||
| } | |||
| BaseShapePtr shape = cnode_data->BuildShape(); | |||
| auto shape_act = shape->cast<abstract::ShapePtr>()->shape(); | |||
| Tensor tensor_exp = py::cast<Tensor>(data[*count]); | |||
| if (shape_act != tensor_exp.shape()) { | |||
| MS_LOG(EXCEPTION) << "The shape of the tensor returned from GE is not the same as " | |||
| "the shape of the tensor derived from ME."; | |||
| if (!shape->isa<abstract::Shape>()) { | |||
| MS_LOG(EXCEPTION) << "The shape of the tensor derived is not Shape, is " << shape->ToString(); | |||
| } | |||
| auto shape_me = shape->cast<abstract::ShapePtr>()->shape(); | |||
| auto shape_ge = py::cast<Tensor>(data[*count]).shape(); | |||
| if (shape_ge != shape_me) { | |||
| MS_LOG(EXCEPTION) << "The shape of the " << *count << "th tensor returned: " << shape_ge | |||
| << " is not the same as the shape of the tensor derived: " << shape_me; | |||
| } | |||
| return data[(*count)++]; | |||
| } | |||
| @@ -343,7 +347,7 @@ py::object ExtractGeneralCnodeRet(const AbstractBasePtr &cnode_data, const py::t | |||
| auto data_tp = cnode_data->cast<AbstractTuplePtr>(); | |||
| auto elements = data_tp->elements(); | |||
| size_t size = data_tp->size(); | |||
| py::tuple tp = py::tuple(size); | |||
| auto tp = py::tuple(size); | |||
| for (size_t i = 0; i < size; i++) { | |||
| tp[i] = ExtractGeneralCnodeRet(elements[i], data, count); | |||
| } | |||
| @@ -357,11 +361,11 @@ py::object StructureOutput(const AnfNodePtr &output_node, const py::tuple &data, | |||
| return ValuePtrToPyData(GetValueNode(output_node)); | |||
| } | |||
| if (*count >= data.size()) { | |||
| MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() | |||
| << " less than the number of elements required. "; | |||
| } | |||
| if (output_node->isa<Parameter>()) { | |||
| if (*count >= data.size()) { | |||
| MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() | |||
| << " less than the number of elements required. "; | |||
| } | |||
| return data[(*count)++]; | |||
| } | |||
| @@ -374,7 +378,7 @@ py::object StructureOutput(const AnfNodePtr &output_node, const py::tuple &data, | |||
| if (output_c->IsApply(prim::kPrimMakeTuple)) { | |||
| auto input_list = output_c->inputs(); | |||
| size_t size = input_list.size(); | |||
| py::tuple tp = py::tuple(size - 1); | |||
| auto tp = py::tuple(size - 1); | |||
| for (size_t i = 1; i < size; i++) { | |||
| tp[i - 1] = StructureOutput(input_list[i], data, count); | |||
| } | |||
| @@ -396,11 +400,8 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr &graph, const std::ve | |||
| std::vector<GeTensorPtr> ge_outputs; | |||
| transform::RunOptions run_options; | |||
| run_options.name = phase; | |||
| auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); | |||
| if (graph_runner == nullptr) { | |||
| MS_LOG(EXCEPTION) << "Can not found GraphRunner."; | |||
| } | |||
| @@ -473,7 +474,6 @@ void ProcessGeArg(const std::map<std::string, ExecutorInfoPtr> &info, const py:: | |||
| py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const py::tuple &args, | |||
| const std::string &phase) { | |||
| std::string phase_prefix = GetPhasePrefix(phase); | |||
| if (phase_prefix == "save") { | |||
| DoExecNonInputGraph(phase); | |||
| ConfigManager::GetInstance().ResetConfig(); | |||
| @@ -483,7 +483,6 @@ py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const | |||
| if (info.count(phase) == 0) { | |||
| MS_LOG(EXCEPTION) << "There is no phase:" << phase; | |||
| } | |||
| FuncGraphPtr anf_graph = info.at(phase)->func_graph; | |||
| #ifdef ENABLE_INFER | |||
| @@ -31,7 +31,6 @@ | |||
| namespace mindspore { | |||
| namespace pipeline { | |||
| namespace py = pybind11; | |||
| void SetGeOption(const std::map<std::string, std::string> &options); | |||
| @@ -50,7 +49,6 @@ bool InitExecDatasetGe(const std::string &queue_name, int64_t size, int64_t batc | |||
| const std::vector<int64_t> &input_indexes, const std::string &phase); | |||
| void ExportDFGraph(const std::string &file_name, const std::string &phase); | |||
| } // namespace pipeline | |||
| } // namespace mindspore | |||
| @@ -41,7 +41,7 @@ class AbstractFuncAtom : public AbstractFunction { | |||
| AbstractFunctionPtr Join(const AbstractFunctionPtr &other) final; | |||
| void Visit(std::function<void(const AbstractFuncAtomPtr &)>) const final; | |||
| bool operator==(const AbstractFunction &other) const; | |||
| bool operator==(const AbstractFunction &other) const override; | |||
| std::size_t hash() const override { return tid(); } | |||
| }; | |||
| @@ -270,7 +270,7 @@ class TypedPrimitiveAbstractClosure : public AbstractFuncAtom { | |||
| class DummyAbstractClosure : public AbstractFuncAtom { | |||
| public: | |||
| DummyAbstractClosure() = default; | |||
| ~DummyAbstractClosure() = default; | |||
| ~DummyAbstractClosure() override = default; | |||
| MS_DECLARE_PARENT(DummyAbstractClosure, AbstractFuncAtom) | |||
| EvaluatorPtr GetEvaluator(AnalysisEnginePtr) override { MS_LOG(EXCEPTION) << "A dummy function cannot eval."; } | |||
| @@ -295,7 +295,6 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) { | |||
| dic["shape"] = shape; | |||
| dic["dtype"] = arg_slice->BuildType(); | |||
| dic["value"] = BuildValue(arg_slice->BuildValue()); | |||
| } else if (abs_base->isa<AbstractTuple>()) { | |||
| auto arg_tuple = dyn_cast<AbstractTuple>(abs_base); | |||
| size_t len = arg_tuple->size(); | |||
| @@ -38,6 +38,7 @@ | |||
| #include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h" | |||
| #include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h" | |||
| #include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h" | |||
| #include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h" | |||
| #include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h" | |||
| #include "pre_activate/ascend/ir_fusion/transdata_split.h" | |||
| #include "pre_activate/ascend/ir_fission/topk_split.h" | |||
| @@ -46,7 +47,6 @@ | |||
| #include "pre_activate/ascend/ir_fusion/mul_addn_fusion.h" | |||
| #include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h" | |||
| #include "pre_activate/ascend/ir_fusion/remove_reshape_pair.h" | |||
| #include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h" | |||
| #include "pre_activate/ascend/ir_fusion/derelu_fusion.h" | |||
| #include "pre_activate/ascend/format_type/insert_trans_op.h" | |||
| #include "pre_activate/pass/getitem_tuple.h" | |||
| @@ -97,7 +97,6 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) { | |||
| ir_fusion_pm->AddPass(std::make_shared<MatmulBiasaddFusion>()); | |||
| ir_fusion_pm->AddPass(std::make_shared<AddnFission>()); | |||
| ir_fusion_pm->AddPass(std::make_shared<DereluFusion>()); | |||
| ir_fusion_pm->AddPass(std::make_shared<ConfusionMulGradFusion>()); | |||
| ir_fusion_pm->AddPass(std::make_shared<TransposeTransDataFusion>()); | |||
| ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>()); | |||
| } | |||
| @@ -267,6 +266,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern | |||
| other_pm->AddPass(std::make_shared<AllReduceFusion>()); | |||
| other_pm->AddPass(std::make_shared<AllGatherFusion>()); | |||
| other_pm->AddPass(std::make_shared<ParameterTransOpFusion>()); | |||
| other_pm->AddPass(std::make_shared<RefreshParameterFormat>()); | |||
| other_pm->AddPass(std::make_shared<BufferFusion>()); | |||
| other_pm->AddPass(std::make_shared<GetitemTuple>()); | |||
| other_pm->AddPass(std::make_shared<CommonSubexpressionElimination>()); | |||
| @@ -21,6 +21,7 @@ | |||
| #include <vector> | |||
| #include "device/ascend/kernel_select_ascend.h" | |||
| #include "kernel/kernel_query.h" | |||
| #include "kernel/tbe/tbe_kernel_select.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| @@ -36,6 +37,16 @@ class KernelSelect { | |||
| }; | |||
| using KernelSelectPtr = std::shared_ptr<KernelSelect>; | |||
| class SupportedChecker { | |||
| public: | |||
| SupportedChecker() = default; | |||
| virtual ~SupportedChecker() = default; | |||
| virtual bool CheckSupported(const AnfNodePtr &anf_node, const kernel::KernelBuildInfoPtr &select_kernel_build_info) { | |||
| return kernel::CheckSupported(anf_node, select_kernel_build_info); | |||
| } | |||
| }; | |||
| using SupportedCheckerPtr = std::shared_ptr<SupportedChecker>; | |||
| class KernelQuery { | |||
| public: | |||
| KernelQuery() = default; | |||
| @@ -17,12 +17,14 @@ | |||
| #include <vector> | |||
| #include <tuple> | |||
| #include <utility> | |||
| #include <unordered_set> | |||
| #include <unordered_map> | |||
| #include <deque> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include <iterator> | |||
| #include "kernel/kernel_fusion.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| @@ -260,33 +262,40 @@ CNodePtr CreateFusionOp(const std::vector<AnfNodePtr> &inputs_list, const std::v | |||
| return buffer_fusion_kernel; | |||
| } | |||
| kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list_in, | |||
| const std::vector<AnfNodePtr> &inputs_list, | |||
| kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list, | |||
| const std::vector<AnfNodePtr> &outputs_list) { | |||
| MS_LOG(DEBUG) << "Start Create Kernel Info"; | |||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; | |||
| // inputs format and data type | |||
| std::vector<std::string> inputs_format; | |||
| std::vector<TypeId> inputs_data_type; | |||
| for (auto node : inputs_list_in) { | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| auto &inputs = cnode->inputs(); | |||
| for (size_t input_index = 1; input_index < inputs.size(); ++input_index) { | |||
| if (std::find(inputs_list.begin(), inputs_list.end(), inputs[input_index]) != inputs_list.end()) { | |||
| inputs_format.push_back(AnfAlgo::GetInputFormat(node, input_index - 1)); | |||
| inputs_data_type.push_back(AnfAlgo::GetInputDeviceDataType(node, input_index - 1)); | |||
| } | |||
| for (const auto &input : inputs_list) { | |||
| if (input->isa<CNode>() && AnfAlgo::GetCNodeName(input) == prim::kPrimTupleGetItem->name()) { | |||
| auto tuple_getitem = input->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(tuple_getitem); | |||
| inputs_format.push_back(AnfAlgo::GetOutputFormat( | |||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||
| inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( | |||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||
| } else { | |||
| inputs_format.push_back(AnfAlgo::GetOutputFormat(input, 0)); | |||
| inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(input, 0)); | |||
| } | |||
| } | |||
| // outputs format and data type | |||
| std::vector<std::string> outputs_format; | |||
| std::vector<TypeId> outputs_data_type; | |||
| for (size_t index = 0; index < outputs_list.size(); ++index) { | |||
| for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) { | |||
| auto kernel_with_index = AnfAlgo::VisitKernel(outputs_list[index], idx); | |||
| outputs_format.push_back(AnfAlgo::GetOutputFormat(kernel_with_index.first, kernel_with_index.second)); | |||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(kernel_with_index.first, kernel_with_index.second)); | |||
| for (const auto &output : outputs_list) { | |||
| if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||
| auto tuple_getitem = output->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(tuple_getitem); | |||
| outputs_format.push_back(AnfAlgo::GetOutputFormat( | |||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( | |||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||
| } else { | |||
| outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0)); | |||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0)); | |||
| } | |||
| } | |||
| builder.SetInputsFormat(inputs_format); | |||
| @@ -320,140 +329,235 @@ AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::K | |||
| return tuple_item; | |||
| } | |||
| void ReplaceOldNode(const std::vector<AnfNodePtr> &outputs_list, const AnfNodePtr &buffer_fusion_kernel, | |||
| session::KernelGraph *kernel_graph) { | |||
| void ReplaceInputNodeInOtherFusionScope(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||
| int32_t fusion_id, const AnfNodePtr &output_item, | |||
| const AnfNodePtr &replace_item) { | |||
| for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) { | |||
| auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(), | |||
| output_item); | |||
| if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) { | |||
| MS_LOG(DEBUG) << "replace input of other pattern, id = " << id; | |||
| *itr = replace_item; | |||
| } | |||
| } | |||
| } | |||
| void ReplaceOldNode(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||
| const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto manager = kernel_graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| if (outputs_list.size() == 1) { // single output | |||
| (void)manager->Replace(outputs_list[0], buffer_fusion_kernel); | |||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||
| if (buffer_fusion_info.outputs_list.size() == 1) { // single output | |||
| (void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel); | |||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0], | |||
| buffer_fusion_kernel); | |||
| } else { // multiple output | |||
| size_t real_idx = 0; | |||
| for (size_t index = 0; index < outputs_list.size(); ++index) { | |||
| if (AnfAlgo::GetOutputTensorNum(outputs_list[index]) == 1) { | |||
| auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++); | |||
| (void)manager->Replace(outputs_list[index], tuple_item); | |||
| } else { | |||
| std::vector<AnfNodePtr> make_tuple_inputs; | |||
| AbstractBasePtrList abstract_list; | |||
| make_tuple_inputs.push_back(NewValueNode(prim::kPrimMakeTuple)); | |||
| for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) { | |||
| auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++); | |||
| abstract_list.push_back(tuple_item->abstract()); | |||
| make_tuple_inputs.push_back(tuple_item); | |||
| for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) { | |||
| auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index); | |||
| (void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item); | |||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index], | |||
| tuple_item); | |||
| } | |||
| } | |||
| } | |||
| void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| auto nodes = TopoSort(kernel_graph->get_return()); | |||
| for (auto &node : nodes) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) { | |||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(node, kOpAttrFusionId); | |||
| (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node); | |||
| } | |||
| } | |||
| } | |||
| void GetFusionScopeInputNodeList(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| auto manager = kernel_graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||
| auto fusion_id = buffer_fusion_info.first; | |||
| auto fusion_info = buffer_fusion_info.second; | |||
| for (const auto &node : fusion_info.anf_nodes) { | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) { | |||
| auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0); | |||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) == | |||
| fusion_info.anf_nodes.end()) { | |||
| if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(), | |||
| (*buffer_fusion_infos)[fusion_id].inputs_list.end(), | |||
| cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) { | |||
| (*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx)); | |||
| } | |||
| } | |||
| AnfNodePtr make_tuple = kernel_graph->NewCNode(make_tuple_inputs); | |||
| make_tuple->set_abstract(std::make_shared<abstract::AbstractTuple>(abstract_list)); | |||
| (void)manager->Replace(outputs_list[index], make_tuple); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void GetInputList(const CNodePtr &node, const int32_t cur_fusion_id, std::vector<AnfNodePtr> *inputs_list) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(inputs_list); | |||
| auto &inputs = node->inputs(); | |||
| for (size_t input_index = 1; input_index < inputs.size(); ++input_index) { | |||
| auto input = inputs[input_index]; | |||
| if (AnfAlgo::IsRealCNodeKernel(input)) { | |||
| if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) { | |||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input, kOpAttrFusionId); | |||
| if (fusion_id != cur_fusion_id) { | |||
| inputs_list->push_back(input); | |||
| bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) { | |||
| MS_EXCEPTION_IF_NULL(node1); | |||
| MS_EXCEPTION_IF_NULL(node2); | |||
| auto getitem1 = node1->cast<CNodePtr>(); | |||
| auto getitem2 = node2->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(getitem1); | |||
| MS_EXCEPTION_IF_NULL(getitem2); | |||
| auto output_idx1 = GetValue<int>(GetValueNode(getitem1->input(2))); | |||
| auto output_idx2 = GetValue<int>(GetValueNode(getitem2->input(2))); | |||
| return output_idx1 < output_idx2; | |||
| } | |||
| void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| auto manager = kernel_graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||
| auto fusion_id = buffer_fusion_info.first; | |||
| auto fusion_info = buffer_fusion_info.second; | |||
| for (const auto &node : fusion_info.anf_nodes) { | |||
| if (AnfAlgo::GetOutputTensorNum(node) == 1) { | |||
| for (auto use_node : manager->node_users()[node]) { | |||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) == | |||
| fusion_info.anf_nodes.end()) { | |||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node); | |||
| break; | |||
| } | |||
| } | |||
| } else { | |||
| inputs_list->push_back(input); | |||
| } | |||
| } else if (input->isa<CNode>()) { | |||
| for (auto &input_in : input->cast<CNodePtr>()->inputs()) { | |||
| if (AnfAlgo::IsRealCNodeKernel(input_in)) { | |||
| if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) { | |||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input_in, kOpAttrFusionId); | |||
| if (fusion_id != cur_fusion_id) { | |||
| inputs_list->push_back(input); | |||
| int prev_idx = 0; | |||
| std::vector<AnfNodePtr> tuple_getitem_nodes; | |||
| std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(), | |||
| std::back_inserter(tuple_getitem_nodes), | |||
| [](const std::pair<AnfNodePtr, int> &use_node) { return use_node.first; }); | |||
| std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare); | |||
| for (auto getitem : tuple_getitem_nodes) { | |||
| auto getitem_ptr = getitem->cast<CNodePtr>(); | |||
| auto input2 = getitem_ptr->input(2); | |||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||
| for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) { | |||
| auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx)); | |||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node); | |||
| } | |||
| prev_idx = output_idx + 1; | |||
| for (auto item_use_node : manager->node_users()[getitem]) { | |||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) == | |||
| fusion_info.anf_nodes.end()) { | |||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem); | |||
| break; | |||
| } | |||
| } else { | |||
| inputs_list->push_back(input); | |||
| } | |||
| } | |||
| } | |||
| } else { | |||
| inputs_list->push_back(input); | |||
| } | |||
| } | |||
| } | |||
| void CheckCurrentNodeIsInput(const CNodePtr &node, const int32_t &cur_fusion_id, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| if ((*buffer_fusion_infos).find(cur_fusion_id) == (*buffer_fusion_infos).end()) { | |||
| BufferFusionInfo_t buffer_fusion_info; | |||
| (*buffer_fusion_infos)[cur_fusion_id] = buffer_fusion_info; | |||
| } | |||
| std::vector<AnfNodePtr> inputs_list; | |||
| GetInputList(node, cur_fusion_id, &inputs_list); | |||
| if (!inputs_list.empty()) { | |||
| if (!(*buffer_fusion_infos)[cur_fusion_id].inputs_list.empty()) { | |||
| (void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list.insert( | |||
| (*buffer_fusion_infos)[cur_fusion_id].inputs_list.end(), inputs_list.begin(), inputs_list.end()); | |||
| (void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.insert( | |||
| (*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.end(), node); | |||
| void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector<AnfNodePtr> &outputs_list, | |||
| const AnfNodePtr &fusion_kernel) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto manager = kernel_graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| for (size_t idx = 0; idx < outputs_list.size(); ++idx) { | |||
| auto output = outputs_list[idx]; | |||
| if (output->isa<CNode>() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||
| auto real_output = AnfAlgo::VisitKernel(output, 0); | |||
| auto output_cnode = output->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(output_cnode); | |||
| auto input2 = output_cnode->input(2); | |||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||
| session::AnfWithOutIndex out_pair(real_output.first, output_idx); | |||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||
| } | |||
| } else { | |||
| (*buffer_fusion_infos)[cur_fusion_id].inputs_list = inputs_list; | |||
| (*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.push_back(node); | |||
| session::AnfWithOutIndex out_pair(output, 0); | |||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void InsertNode(const AnfNodePtr &node, std::vector<AnfNodePtr> *list) { | |||
| MS_EXCEPTION_IF_NULL(list); | |||
| if (std::find(list->begin(), list->end(), node) == list->end()) { | |||
| (void)list->insert(list->end(), node); | |||
| void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||
| std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(fused_set); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto conv = cnode->input(1); | |||
| if (conv->isa<CNode>() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { | |||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[conv].size())}; | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), conv); | |||
| std::unordered_set<AnfNodePtr> record{cnode, conv}; | |||
| candidate_fusion->push_back(record); | |||
| fused_set->insert(record.begin(), record.end()); | |||
| } | |||
| } | |||
| void CheckCurrentNodeIsOutput(const CNodePtr &node, const int32_t &cur_fusion_id, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| for (auto &input : node->inputs()) { | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| if (AnfAlgo::IsRealCNodeKernel(input) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) { | |||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input, kOpAttrFusionId); | |||
| if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) { | |||
| BufferFusionInfo_t buffer_fusion_info; | |||
| (*buffer_fusion_infos)[fusion_id] = buffer_fusion_info; | |||
| } | |||
| if (fusion_id != cur_fusion_id) { | |||
| InsertNode(input, &((*buffer_fusion_infos)[fusion_id].outputs_list)); | |||
| } | |||
| } else if (input->isa<CNode>()) { | |||
| for (auto &input_in : input->cast<CNodePtr>()->inputs()) { | |||
| if (AnfAlgo::IsRealCNodeKernel(input_in) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) { | |||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input_in, kOpAttrFusionId); | |||
| if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) { | |||
| BufferFusionInfo_t buffer_fusion_info; | |||
| (*buffer_fusion_infos)[fusion_id] = buffer_fusion_info; | |||
| } | |||
| if (fusion_id != cur_fusion_id) { | |||
| InsertNode(input_in, &((*buffer_fusion_infos)[fusion_id].outputs_list)); | |||
| } | |||
| } | |||
| } | |||
| void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, | |||
| std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(fused_set); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto getitem = relu_input->cast<CNodePtr>(); | |||
| auto bnupdate = getitem->input(1); | |||
| if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { | |||
| std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); | |||
| for (auto out_getitem : manager->node_users()[bnupdate]) { | |||
| auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>(); | |||
| auto input2 = out_getitem_ptr->input(2); | |||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||
| output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); | |||
| } | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); | |||
| std::unordered_set<AnfNodePtr> record{cnode, bnupdate}; | |||
| candidate_fusion->push_back(record); | |||
| fused_set->insert(record.begin(), record.end()); | |||
| } | |||
| } | |||
| void GetFusionScopeNodeList(const session::KernelGraph &kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| auto nodes = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : nodes) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) { | |||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(node, kOpAttrFusionId); | |||
| (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node); | |||
| void MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, | |||
| std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(fused_set); | |||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto add = relu_input->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(add); | |||
| auto tuple_getitem = add->input(1); | |||
| if (tuple_getitem->isa<CNode>() && AnfAlgo::GetCNodeName(tuple_getitem) == prim::kPrimTupleGetItem->name()) { | |||
| auto getitem = tuple_getitem->cast<CNodePtr>(); | |||
| auto bnupdate = getitem->input(1); | |||
| if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { | |||
| std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); | |||
| for (auto out_getitem : manager->node_users()[bnupdate]) { | |||
| auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>(); | |||
| auto input2 = out_getitem_ptr->input(2); | |||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||
| output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); | |||
| } | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); | |||
| std::unordered_set<AnfNodePtr> record{cnode, relu_input, bnupdate}; | |||
| candidate_fusion->push_back(record); | |||
| fused_set->insert(record.begin(), record.end()); | |||
| } | |||
| } | |||
| } | |||
| @@ -470,15 +574,14 @@ void MatchOpNamePattern(const session::KernelGraph &kernel_graph, std::unordered | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::GetCNodeName(cnode) == kBNTrainingReduceOpName) { | |||
| auto conv = cnode->input(1); | |||
| if (conv->isa<CNode>() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { | |||
| auto manager = kernel_graph.manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| auto &users = manager->node_users(); | |||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(users[conv].size()), conv); | |||
| std::unordered_set<AnfNodePtr> record({cnode, conv}); | |||
| candidate_fusion->push_back(record); | |||
| fused_set->insert(record.begin(), record.end()); | |||
| MatchConvBnreduce(cnode, kernel_graph, fused_set, candidate_fusion); | |||
| } else if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || | |||
| AnfAlgo::GetCNodeName(cnode) == prim::kPrimRelu->name()) { | |||
| auto relu_input = cnode->input(1); | |||
| if (relu_input->isa<CNode>() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTensorAdd->name()) { | |||
| MatchBnupdateAddRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion); | |||
| } else if (relu_input->isa<CNode>() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTupleGetItem->name()) { | |||
| MatchBnupdateRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion); | |||
| } | |||
| } | |||
| } | |||
| @@ -536,31 +639,15 @@ void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, std::unord | |||
| } | |||
| } // namespace | |||
| void BufferFusion::GetBufferFusionInfo(const session::KernelGraph &kernel_graph, | |||
| void BufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const { | |||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||
| for (auto &node : node_list) { | |||
| if (!AnfAlgo::IsRealCNodeKernel(node)) { | |||
| continue; | |||
| } | |||
| int32_t cur_fusion_id = -1; | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) { | |||
| cur_fusion_id = AnfAlgo::GetNodeAttr<int32_t>(cnode, kOpAttrFusionId); | |||
| CheckCurrentNodeIsInput(cnode, cur_fusion_id, buffer_fusion_infos); | |||
| } | |||
| // Check if current node is output | |||
| CheckCurrentNodeIsOutput(cnode, cur_fusion_id, buffer_fusion_infos); | |||
| } | |||
| GetFusionScopeNodeList(kernel_graph, buffer_fusion_infos); | |||
| GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); | |||
| GetFusionScopeInputNodeList(kernel_graph, buffer_fusion_infos); | |||
| GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); | |||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||
| buffer_fusion_info.second.kernel_build_info = | |||
| CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list_in, buffer_fusion_info.second.inputs_list, | |||
| buffer_fusion_info.second.outputs_list); | |||
| CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); | |||
| } | |||
| } | |||
| @@ -569,7 +656,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c | |||
| bool change = false; | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> buffer_fusion_infos; | |||
| buffer_fusion_infos.clear(); | |||
| GetBufferFusionInfo(*kernel_graph, &buffer_fusion_infos); | |||
| GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos); | |||
| std::vector<mindspore::kernel::FusionScopeInfo> fusion_scope_infos; | |||
| for (auto &buffer_fusion_info : buffer_fusion_infos) { | |||
| @@ -600,7 +687,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c | |||
| MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed"; | |||
| continue; | |||
| } | |||
| change = ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_mods[fusion_id], kernel_graph); | |||
| change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph); | |||
| } | |||
| MS_LOG(DEBUG) << "End Buffer Fusion"; | |||
| return change; | |||
| @@ -630,8 +717,10 @@ bool BufferFusion::MatchBufferFusionPattern(const session::KernelGraph &kernel_g | |||
| return true; | |||
| } | |||
| bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr, | |||
| bool BufferFusion::ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||
| int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr, | |||
| session::KernelGraph *kernel_graph) const { | |||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||
| auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, | |||
| buffer_fusion_info.anf_nodes, kernel_graph); | |||
| AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); | |||
| @@ -650,8 +739,8 @@ bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, | |||
| } | |||
| AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get()); | |||
| AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get()); | |||
| // replace node | |||
| ReplaceOldNode(buffer_fusion_info.outputs_list, buffer_fusion, kernel_graph); | |||
| SetFusionOpRefInfos(kernel_graph, buffer_fusion_info.outputs_list, buffer_fusion); | |||
| ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph); | |||
| return true; | |||
| } | |||
| @@ -30,7 +30,6 @@ namespace opt { | |||
| struct BufferFusionInfo_t { | |||
| std::vector<AnfNodePtr> anf_nodes; | |||
| std::vector<AnfNodePtr> inputs_list; | |||
| std::vector<AnfNodePtr> inputs_list_in; | |||
| std::vector<AnfNodePtr> outputs_list; | |||
| kernel::KernelBuildInfoPtr kernel_build_info; | |||
| }; | |||
| @@ -44,10 +43,10 @@ class BufferFusion : public Pass { | |||
| bool Run(const FuncGraphPtr &graph) override; | |||
| private: | |||
| void GetBufferFusionInfo(const session::KernelGraph &kernel_graph, | |||
| void GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const; | |||
| bool ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr, | |||
| session::KernelGraph *kernel_graph) const; | |||
| bool ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||
| const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const; | |||
| bool MatchBufferFusionPattern(const session::KernelGraph &kernel_graph) const; | |||
| bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const; | |||
| }; | |||
| @@ -16,6 +16,9 @@ | |||
| #include "pre_activate/ascend/ir_fission/topk_split.h" | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <unordered_set> | |||
| #include "pre_activate/common/helper.h" | |||
| #include "kernel/kernel_build_info.h" | |||
| #include "utils/utils.h" | |||
| #include "session/kernel_graph.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| @@ -25,6 +28,7 @@ | |||
| namespace mindspore { | |||
| namespace opt { | |||
| constexpr size_t kFloat16Len = 2; // size of float16; | |||
| constexpr size_t kTopkIndexK = 1; | |||
| namespace { | |||
| tensor::TensorPtr CreateTensor(const AnfNodePtr &node) { | |||
| // 1 create tensor | |||
| @@ -70,37 +74,68 @@ ValueNodePtr CreateValueNode(const AnfNodePtr &node) { | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), indices_const.get()); | |||
| return indices_const; | |||
| } | |||
| kernel::KernelBuildInfoPtr CreateKernelBuildInfo() { | |||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; | |||
| builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT}); | |||
| builder.SetOutputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT}); | |||
| builder.SetInputsDeviceType({kNumberTypeFloat16, kNumberTypeFloat16}); | |||
| builder.SetOutputsDeviceType({kNumberTypeFloat16, kNumberTypeInt32}); | |||
| return builder.Build(); | |||
| } | |||
| } // namespace | |||
| const BaseRef TopKSplit::DefinePattern() const { | |||
| VarPtr X = std::make_shared<Var>(); | |||
| MS_EXCEPTION_IF_NULL(X); | |||
| VarPtr X1 = std::make_shared<Var>(); | |||
| VarPtr X2 = std::make_shared<Var>(); | |||
| auto prim = std::make_shared<Primitive>(kTopKOpName); | |||
| MS_EXCEPTION_IF_NULL(prim); | |||
| return VectorRef({prim, X}); | |||
| return VectorRef({prim, X1, X2}); | |||
| } | |||
| const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const { | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto kernel_graph = func_graph->cast<KernelGraphPtr>(); | |||
| auto indices_const = CreateValueNode(node); | |||
| // set value node as topk's input | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_LOG(INFO) << "already has input size: " << cnode->inputs().size(); | |||
| cnode->add_input(indices_const); | |||
| // Copy a new node to check supported. | |||
| std::vector<AnfNodePtr> new_inputs{NewValueNode(std::make_shared<Primitive>(kTopKOpName))}; | |||
| new_inputs.insert(new_inputs.end(), cnode->inputs().begin() + 1, cnode->inputs().end()); | |||
| CNodePtr new_cnode = func_graph->NewCNode(new_inputs); | |||
| MS_EXCEPTION_IF_NULL(new_cnode); | |||
| new_cnode->set_abstract(cnode->abstract()); | |||
| new_cnode->set_scope(cnode->scope()); | |||
| AnfAlgo::CopyNodeAttrs(cnode, new_cnode); | |||
| CheckCNodeInputSize(new_cnode, kTopkInputNum); | |||
| // Convert the tensor input to scalar and convert it to attr | |||
| auto input_k = new_cnode->input(kTopkIndexK + 1); | |||
| MS_EXCEPTION_IF_NULL(input_k); | |||
| if (!IsValueNode<tensor::Tensor>(input_k)) { | |||
| return nullptr; | |||
| } | |||
| ValuePtr value = GetValueNode(input_k); | |||
| MS_EXCEPTION_IF_NULL(value); | |||
| auto tensor = value->cast<tensor::TensorPtr>(); | |||
| MS_EXCEPTION_IF_NULL(tensor); | |||
| int32_t *data = reinterpret_cast<int32_t *>(tensor->data_c()); | |||
| MS_EXCEPTION_IF_NULL(data); | |||
| auto new_value_node = std::make_shared<ValueNode>(MakeValue(*data)); | |||
| new_cnode->set_input(kTopkIndexK + 1, new_value_node); | |||
| std::unordered_set<size_t> attr_index{kTopkIndexK}; | |||
| ConstInputToAttr(new_cnode, attr_index); | |||
| auto indices_const = CreateValueNode(new_cnode); | |||
| new_cnode->add_input(indices_const); | |||
| MS_EXCEPTION_IF_NULL(supported_checker_); | |||
| if (!supported_checker_->CheckSupported(new_cnode, CreateKernelBuildInfo())) { | |||
| return nullptr; | |||
| } | |||
| if (kernel_graph != nullptr) { | |||
| kernel_graph->AddValueNodeToGraph(indices_const); | |||
| } | |||
| CNodePtr new_cnode = nullptr; | |||
| if (kernel_graph == nullptr) { | |||
| new_cnode = std::make_shared<CNode>(*cnode); | |||
| } else { | |||
| new_cnode = kernel_graph->NewCNode(cnode); | |||
| } | |||
| MS_EXCEPTION_IF_NULL(new_cnode); | |||
| return new_cnode; | |||
| } | |||
| } // namespace opt | |||
| @@ -16,15 +16,22 @@ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_ | |||
| #include <memory> | |||
| #include "pre_activate/common/optimizer.h" | |||
| #include "pre_activate/ascend/ascend_helper.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| class TopKSplit : public PatternProcessPass { | |||
| public: | |||
| explicit TopKSplit(bool multigraph = true) : PatternProcessPass("topk_split", multigraph) {} | |||
| explicit TopKSplit(bool multigraph = true) | |||
| : PatternProcessPass("topk_split", multigraph), supported_checker_(std::make_shared<SupportedChecker>()) {} | |||
| ~TopKSplit() override = default; | |||
| const BaseRef DefinePattern() const override; | |||
| const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; | |||
| private: | |||
| SupportedCheckerPtr supported_checker_; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -72,6 +72,38 @@ AnfNodePtr GetMul0(const FuncGraphPtr &graph, const AnfNodePtr &input2, const An | |||
| } | |||
| return mul0; | |||
| } | |||
| bool QuitFusion(const FuncGraphPtr &graph, const AnfNodePtr &mul0_anf, const AnfNodePtr &reduce_sum) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(mul0_anf); | |||
| MS_EXCEPTION_IF_NULL(reduce_sum); | |||
| if (!mul0_anf->isa<CNode>()) { | |||
| return true; | |||
| } | |||
| auto mul0 = mul0_anf->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(mul0); | |||
| // when network is _VirtualDatasetCell, quit fusion | |||
| if (mul0->fullname_with_scope().find("network-_VirtualDatasetCell") != std::string::npos) { | |||
| return true; | |||
| } | |||
| auto manager = graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| if (manager->node_users().find(reduce_sum) == manager->node_users().end()) { | |||
| MS_LOG(EXCEPTION) << "node has no output in manager"; | |||
| } | |||
| const AnfNodeIndexSet &outputs_set = manager->node_users()[reduce_sum]; | |||
| auto it = std::find_if(outputs_set.begin(), outputs_set.end(), [&mul0](const std::pair<AnfNodePtr, int> &node_index) { | |||
| return node_index.first == mul0->input(1) || node_index.first == mul0; | |||
| }); | |||
| if (it != outputs_set.end()) { | |||
| MS_LOG(INFO) << "ReduceSum's output node is mul0's input or mul0! If do fusion, graph will exist a circle"; | |||
| return true; | |||
| } | |||
| return false; | |||
| } | |||
| } // namespace | |||
| const BaseRef ConfusionMulGradFusion::DefinePattern() const { | |||
| @@ -90,9 +122,6 @@ const AnfNodePtr ConfusionMulGradFusion::Process(const FuncGraphPtr &graph, cons | |||
| auto reduce_sum = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(reduce_sum); | |||
| auto mul1 = reduce_sum->input(1); | |||
| if (mul1->fullname_with_scope().find("bert/encoder") == std::string::npos) { | |||
| return nullptr; | |||
| } | |||
| if (IsUsedByOthers(graph, mul1)) { | |||
| MS_LOG(INFO) << "Mul1 is used by others, quit fusion!"; | |||
| return nullptr; | |||
| @@ -102,6 +131,9 @@ const AnfNodePtr ConfusionMulGradFusion::Process(const FuncGraphPtr &graph, cons | |||
| MS_LOG(INFO) << "Mul0 do not exist, quit fusion"; | |||
| return nullptr; | |||
| } | |||
| if (QuitFusion(graph, mul0, node)) { | |||
| return nullptr; | |||
| } | |||
| auto fusion_node = CreateFusionNode(graph, reduce_sum, mul0, input3); | |||
| std::vector<AnfNodePtr> fusion_node_outputs; | |||
| @@ -0,0 +1,71 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "utils/utils.h" | |||
| #include "operator/ops.h" | |||
| #include "device/kernel_info.h" | |||
| #include "pre_activate/common/helper.h" | |||
| #include "pre_activate/common/optimizer.h" | |||
| #include "pre_activate/ascend/ascend_helper.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| void DoRefresh(const CNodePtr &cnode) { | |||
| if (cnode == nullptr) { | |||
| MS_LOG(EXCEPTION) << "node is nullptr"; | |||
| } | |||
| for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); input_index++) { | |||
| auto input_kernel_node = AnfAlgo::GetInputNode(cnode, input_index); | |||
| if (input_kernel_node->isa<Parameter>()) { | |||
| std::shared_ptr<kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder = | |||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||
| auto cnode_input_format = AnfAlgo::GetInputFormat(cnode, input_index); | |||
| auto kernel_node_format = AnfAlgo::GetOutputFormat(input_kernel_node, 0); | |||
| auto dtype = AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0); | |||
| if (kernel_node_format != cnode_input_format) { | |||
| builder->SetOutputsFormat({cnode_input_format}); | |||
| builder->SetOutputsDeviceType({dtype}); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get()); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| bool RefreshParameterFormat::Run(const FuncGraphPtr &func_graph) { | |||
| if (func_graph == nullptr) { | |||
| MS_LOG(ERROR) << "func_graph is nullptr."; | |||
| return false; | |||
| } | |||
| std::vector<AnfNodePtr> node_list = TopoSort(func_graph->get_return()); | |||
| for (auto node : node_list) { | |||
| if (node == nullptr || !node->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| if (cnode == nullptr) { | |||
| continue; | |||
| } | |||
| auto node_name = AnfAlgo::GetCNodeName(cnode); | |||
| if (node_name == kBNTrainingUpdateOpName) { | |||
| DoRefresh(cnode); | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,40 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <utility> | |||
| #include "ir/anf.h" | |||
| #include "pre_activate/common/pass.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| class RefreshParameterFormat : public Pass { | |||
| public: | |||
| explicit RefreshParameterFormat(size_t groups = 1) : Pass("refresh_parameter_format"), groups_(groups) {} | |||
| ~RefreshParameterFormat() override = default; | |||
| bool Run(const FuncGraphPtr &graph) override; | |||
| private: | |||
| size_t groups_ = 1; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ | |||
| @@ -299,6 +299,10 @@ tensor::TensorPtr CreateTensorWithValueTuple(const ValueTuplePtr &value_tuple_pt | |||
| tensor::TensorPtr CreateTupleTensor(const ValueTuplePtr &value_tuple) { | |||
| MS_EXCEPTION_IF_NULL(value_tuple); | |||
| tensor::TensorPtr tensor = nullptr; | |||
| if (value_tuple->value().empty()) { | |||
| MS_LOG(WARNING) << "The value tuple is empty."; | |||
| return nullptr; | |||
| } | |||
| ValuePtr v = *(value_tuple->value().begin()); | |||
| MS_EXCEPTION_IF_NULL(v); | |||
| // Currently we only deal with the scalar tuple | |||
| @@ -422,5 +426,47 @@ AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePt | |||
| AnfAlgo::SetOutputInferTypeAndShape({origin_type}, {origin_shape}, tuple_getitem.get()); | |||
| return tuple_getitem; | |||
| } | |||
| void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| std::vector<AnfNodePtr> new_inputs; | |||
| std::vector<std::string> new_input_names; | |||
| auto primitive = AnfAlgo::GetCNodePrimitive(cnode); | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| auto input_names = primitive->GetAttr(kAttrInputNames); | |||
| if (input_names == nullptr) { | |||
| MS_LOG(DEBUG) << "input_names are nullptr in cnode[" + cnode->DebugString() + "]"; | |||
| return; | |||
| } | |||
| auto input_names_vec = GetValue<std::vector<std::string>>(input_names); | |||
| auto inputs = cnode->inputs(); | |||
| new_inputs.push_back(inputs[0]); | |||
| bool need_update = false; | |||
| for (size_t i = 0; i < inputs.size() - 1; ++i) { | |||
| auto input_node = inputs[i + 1]; | |||
| MS_EXCEPTION_IF_NULL(input_node); | |||
| if (input_attrs.find(i) != input_attrs.end() && input_node->isa<ValueNode>()) { | |||
| auto value_node = input_node->cast<ValueNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(value_node); | |||
| MS_LOG(DEBUG) << "start erase input[" << i << "] of cnode[" + cnode->DebugString() + "]"; | |||
| if (i >= input_names_vec.size()) { | |||
| MS_LOG(EXCEPTION) << "index " << i << " is larger than input names size [" << input_names_vec.size() << "]"; | |||
| } | |||
| primitive->set_attr(input_names_vec[i], value_node->value()); | |||
| need_update = true; | |||
| } else { | |||
| new_inputs.push_back(input_node); | |||
| if (i < input_names_vec.size()) { | |||
| new_input_names.push_back(input_names_vec[i]); | |||
| } | |||
| } | |||
| } | |||
| if (need_update) { | |||
| // Update cnode's inputs | |||
| cnode->set_inputs(new_inputs); | |||
| // Update cnode's input_names attr | |||
| primitive->set_attr(kAttrInputNames, MakeValue(new_input_names)); | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -19,6 +19,7 @@ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <unordered_set> | |||
| #include "ir/func_graph.h" | |||
| #include "session/kernel_graph.h" | |||
| #include "common/utils.h" | |||
| @@ -86,6 +87,7 @@ constexpr size_t kAdamApplyOneOutputNum = 3; | |||
| constexpr size_t kBackendTransDataInputNum = 2; | |||
| constexpr size_t kApplyMomentumInputNum = 6; | |||
| constexpr size_t kBiasAddInputNum = 3; | |||
| constexpr size_t kTopkInputNum = 3; | |||
| enum FusedBatchNormInput { | |||
| kX = 1, | |||
| @@ -150,6 +152,8 @@ void RemoveNopNode(session::KernelGraph *const graph); | |||
| AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t output_idx); | |||
| bool IsUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node); | |||
| void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs); | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_HELPER_H_ | |||
| @@ -52,7 +52,6 @@ ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() { | |||
| Register(kFlattenGradOpName, {1}); | |||
| Register(kExpandDimsOpName, {1}); | |||
| Register(kSplitOpName, {0}); | |||
| Register(kTopKOpName, {1}); | |||
| Register(kErfOpName, {1}); | |||
| Register(kSparseApplyAdagradOpName, {2}); | |||
| Register(kResizeNearestNeighborGrad, {1}); | |||
| @@ -18,10 +18,10 @@ | |||
| #include <vector> | |||
| #include <string> | |||
| #include <unordered_map> | |||
| #include <unordered_set> | |||
| #include <memory> | |||
| #include "pre_activate/pass/const_input_to_attr_registry.h" | |||
| #include "pre_activate/common/helper.h" | |||
| #include "utils/utils.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "operator/ops.h" | |||
| @@ -29,50 +29,6 @@ | |||
| namespace mindspore { | |||
| namespace opt { | |||
| namespace { | |||
| void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| std::vector<AnfNodePtr> new_inputs; | |||
| std::vector<std::string> new_input_names; | |||
| auto primitive = AnfAlgo::GetCNodePrimitive(cnode); | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| auto input_names = primitive->GetAttr(kAttrInputNames); | |||
| if (input_names == nullptr) { | |||
| MS_LOG(DEBUG) << "input_names are nullptr in cnode[" + cnode->DebugString() + "]"; | |||
| return; | |||
| } | |||
| auto input_names_vec = GetValue<std::vector<std::string>>(input_names); | |||
| auto inputs = cnode->inputs(); | |||
| new_inputs.push_back(inputs[0]); | |||
| bool need_update = false; | |||
| for (size_t i = 0; i < inputs.size() - 1; ++i) { | |||
| auto input_node = inputs[i + 1]; | |||
| MS_EXCEPTION_IF_NULL(input_node); | |||
| if (input_attrs.find(i) != input_attrs.end() && input_node->isa<ValueNode>()) { | |||
| auto value_node = input_node->cast<ValueNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(value_node); | |||
| MS_LOG(DEBUG) << "start erase input[" << i << "] of cnode[" + cnode->DebugString() + "]"; | |||
| if (i >= input_names_vec.size()) { | |||
| MS_LOG(EXCEPTION) << "index " << i << " is larger than input names size [" << input_names_vec.size() << "]"; | |||
| } | |||
| primitive->set_attr(input_names_vec[i], value_node->value()); | |||
| need_update = true; | |||
| } else { | |||
| new_inputs.push_back(input_node); | |||
| if (i < input_names_vec.size()) { | |||
| new_input_names.push_back(input_names_vec[i]); | |||
| } | |||
| } | |||
| } | |||
| if (need_update) { | |||
| // Update cnode's inputs | |||
| cnode->set_inputs(new_inputs); | |||
| // Update cnode's input_names attr | |||
| primitive->set_attr(kAttrInputNames, MakeValue(new_input_names)); | |||
| } | |||
| } | |||
| } // namespace | |||
| const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const AnfNodePtr &node, | |||
| const EquivPtr &) const { | |||
| if (node == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) { | |||
| @@ -825,6 +825,8 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n | |||
| static std::map<std::string, std::map<size_t, size_t>> spec_node_list = { | |||
| {prim::kPrimConv2DBackpropInput->name(), {{0, 1}, {1, 0}}}, | |||
| {prim::kPrimConv2DBackpropFilter->name(), {{0, 1}, {1, 0}}}, | |||
| {kFusionOpConv2DBackpropInputReluGradV2Name, {{0, 1}, {1, 0}}}, | |||
| {kFusionOpConv2DBackpropInputAddNReluGradV2Name, {{0, 1}, {1, 0}}}, | |||
| {prim::kPrimLogSoftmaxGrad->name(), {{0, 1}, {1, 0}}}, | |||
| {prim::kPrimLayerNormGrad->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}, {4, 4}}}, | |||
| {prim::kPrimLayerNormBetaGammaBackprop->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}}}, | |||
| @@ -835,7 +837,7 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n | |||
| auto node_name = AnfAlgo::GetCNodeName(anf_node); | |||
| if (AnfAlgo::GetKernelType(anf_node) == TBE_KERNEL) { | |||
| auto find = spec_node_list.find(node_name); | |||
| if (find != spec_node_list.end()) { | |||
| if (find != spec_node_list.end() && cur_index < find->second.size()) { | |||
| ret = find->second[cur_index]; | |||
| MS_LOG(INFO) << "Real input index change to" << ret << ", node name:" << node_name; | |||
| } | |||
| @@ -171,20 +171,17 @@ GeTensorPtr TransformUtil::ConvertTensor(const MeTensorPtr &tensor, const std::s | |||
| MS_LOG(ERROR) << "The Me Tensor data type size is wrong, type size is: " << type_size; | |||
| return nullptr; | |||
| } | |||
| // get tensor buff size | |||
| size_t data_buff_size = 0; | |||
| size_t elements_num = IntToSize(tensor->ElementsNum()); | |||
| if (elements_num > 0 && type_size > 0 && UINT_MAX / type_size >= elements_num) { | |||
| data_buff_size = elements_num * type_size; | |||
| if (UINT_MAX / type_size < elements_num) { | |||
| MS_LOG(ERROR) << "The required Me Tensor data buff size " << elements_num << " x " << type_size | |||
| << " overflowed UINT_MAX: " << UINT_MAX << "."; | |||
| return nullptr; | |||
| } | |||
| // get tensor buff size | |||
| size_t data_buff_size = elements_num * type_size; | |||
| if (data_buff_size == 0) { | |||
| if (elements_num > 0 && type_size > 0 && UINT_MAX / type_size < elements_num) { | |||
| MS_LOG(ERROR) << "The required Me Tensor data buff size " << elements_num << " x " << type_size | |||
| << " overflowed UINT_MAX: " << UINT_MAX << "."; | |||
| } else { | |||
| MS_LOG(ERROR) << "The Me Tensor data buff size is 0."; | |||
| } | |||
| return nullptr; | |||
| MS_LOG(INFO) << "The Me Tensor data buff size is 0."; | |||
| } | |||
| // create ge tensor | |||
| auto desc = GetGeTensorDesc(tensor->shape_c(), tensor->data_type(), format); | |||
| @@ -359,7 +359,12 @@ void MsContext::GetGeOptions(std::map<std::string, std::string> *ge_options) con | |||
| } | |||
| // Enable auto mixed precision according to the context options | |||
| (*ge_options)["ge.exec.auto_mix_precision"] = std::to_string(auto_mixed_precision_flag_); | |||
| if (auto_mixed_precision_flag_) { | |||
| (*ge_options)["ge.exec.precision_mode"] = "allow_mix_precision"; | |||
| } else { | |||
| (*ge_options)["ge.exec.precision_mode"] = "must_keep_origin_dtype"; | |||
| } | |||
| // Disable the global variable acc, only enable it whlie adding training graph in pipeline | |||
| (*ge_options)["ge.exec.variable_acc"] = "0"; | |||
| #endif | |||
| @@ -438,4 +443,18 @@ bool MsContext::PynativeInitGe() { | |||
| is_pynative_ge_init_ = true; | |||
| return true; | |||
| } | |||
| bool MsContext::IsTsdOpened() { | |||
| if (tsd_ref_ > 0) { | |||
| return true; | |||
| } | |||
| return false; | |||
| } | |||
| bool MsContext::IsGeInited() { | |||
| if (ge_ref_ > 0) { | |||
| return true; | |||
| } | |||
| return false; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -82,8 +82,10 @@ class MsContext { | |||
| bool OpenTsd(); | |||
| bool CloseTsd(bool force = false); | |||
| bool IsTsdOpened(); | |||
| bool InitGe(); | |||
| bool FinalizeGe(bool force = false); | |||
| bool IsGeInited(); | |||
| void set_enable_hccl(bool enable_hccl) { enable_hccl_ = enable_hccl; } | |||
| bool enable_hccl() const { return enable_hccl_; } | |||
| bool PynativeInitGe(); | |||
| @@ -122,6 +122,10 @@ constexpr auto kSendOpName = "Send"; | |||
| constexpr auto kRecvOpName = "Recv"; | |||
| constexpr auto kReluV2OpName = "ReLUV2"; | |||
| constexpr auto kReluGradV2OpName = "ReluGradV2"; | |||
| constexpr auto kAddNOpName = "AddN"; | |||
| constexpr auto kConv2DBackpropInputOpName = "Conv2DBackpropInput"; | |||
| constexpr auto kFusionOpConv2DBackpropInputReluGradV2Name = "FusionOp_Conv2DBackpropInput_ReluGradV2"; | |||
| constexpr auto kFusionOpConv2DBackpropInputAddNReluGradV2Name = "FusionOp_Conv2DBackpropInput_AddN_ReluGradV2"; | |||
| // attr key name | |||
| constexpr auto kAttrInputNames = "input_names"; | |||
| @@ -22,7 +22,7 @@ from mindspore import context | |||
| from mindspore import log as logger | |||
| from mindspore.parallel._utils import _get_parallel_mode | |||
| from .._c_expression import generate_key, Executor_, Tensor, MetaTensor | |||
| from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_ge | |||
| from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend | |||
| from .tensor import Tensor as MsTensor | |||
| # store ms_function class compiled pipeline cache | |||
| @@ -184,7 +184,7 @@ class _MindSporeFunction: | |||
| @_wrap_func | |||
| def __call__(self, *args): | |||
| init_ge() | |||
| init_backend() | |||
| converted, arguments_dict, parse_method = _convert_function_arguments(self.fn, *args) | |||
| if not converted: | |||
| raise RuntimeError('Process function parameter is failure') | |||
| @@ -15,6 +15,7 @@ | |||
| """Alexnet.""" | |||
| import mindspore.nn as nn | |||
| from mindspore.common.initializer import TruncatedNormal | |||
| from mindspore.ops import operations as P | |||
| def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid"): | |||
| weight = weight_variable() | |||
| @@ -44,7 +45,7 @@ class AlexNet(nn.Cell): | |||
| self.conv4 = conv(384, 384, 3, pad_mode="same") | |||
| self.conv5 = conv(384, 256, 3, pad_mode="same") | |||
| self.relu = nn.ReLU() | |||
| self.max_pool2d = nn.MaxPool2d(kernel_size=3, stride=2) | |||
| self.max_pool2d = P.MaxPool(ksize=3, strides=2) | |||
| self.flatten = nn.Flatten() | |||
| self.fc1 = fc_with_initialize(6*6*256, 4096) | |||
| self.fc2 = fc_with_initialize(4096, 4096) | |||
| @@ -0,0 +1,284 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """MobileNetV2 model define""" | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.operations import TensorAdd | |||
| from mindspore import Parameter, Tensor | |||
| from mindspore.common.initializer import initializer | |||
| __all__ = ['MobileNetV2', 'mobilenet_v2'] | |||
| def _make_divisible(v, divisor, min_value=None): | |||
| """ | |||
| This function is taken from the original tf repo. | |||
| It ensures that all layers have a channel number that is divisible by 8 | |||
| It can be seen here: | |||
| https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py | |||
| :param v: | |||
| :param divisor: | |||
| :param min_value: | |||
| :return: | |||
| """ | |||
| if min_value is None: | |||
| min_value = divisor | |||
| new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) | |||
| # Make sure that round down does not go down by more than 10%. | |||
| if new_v < 0.9 * v: | |||
| new_v += divisor | |||
| return new_v | |||
| class GlobalAvgPooling(nn.Cell): | |||
| """ | |||
| Global avg pooling definition. | |||
| Args: | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> GlobalAvgPooling() | |||
| """ | |||
| def __init__(self): | |||
| super(GlobalAvgPooling, self).__init__() | |||
| self.mean = P.ReduceMean(keep_dims=False) | |||
| def construct(self, x): | |||
| x = self.mean(x, (2, 3)) | |||
| return x | |||
| class DepthwiseConv(nn.Cell): | |||
| """ | |||
| Depthwise Convolution warpper definition. | |||
| Args: | |||
| in_planes (int): Input channel. | |||
| kernel_size (int): Input kernel size. | |||
| stride (int): Stride size. | |||
| pad_mode (str): pad mode in (pad, same, valid) | |||
| channel_multiplier (int): Output channel multiplier | |||
| has_bias (bool): has bias or not | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> DepthwiseConv(16, 3, 1, 'pad', 1, channel_multiplier=1) | |||
| """ | |||
| def __init__(self, in_planes, kernel_size, stride, pad_mode, pad, channel_multiplier=1, has_bias=False): | |||
| super(DepthwiseConv, self).__init__() | |||
| self.has_bias = has_bias | |||
| self.in_channels = in_planes | |||
| self.channel_multiplier = channel_multiplier | |||
| self.out_channels = in_planes * channel_multiplier | |||
| self.kernel_size = (kernel_size, kernel_size) | |||
| self.depthwise_conv = P.DepthwiseConv2dNative(channel_multiplier=channel_multiplier, kernel_size=kernel_size, | |||
| stride=stride, pad_mode=pad_mode, pad=pad) | |||
| self.bias_add = P.BiasAdd() | |||
| weight_shape = [channel_multiplier, in_planes, *self.kernel_size] | |||
| self.weight = Parameter(initializer('ones', weight_shape), name='weight') | |||
| if has_bias: | |||
| bias_shape = [channel_multiplier * in_planes] | |||
| self.bias = Parameter(initializer('zeros', bias_shape), name='bias') | |||
| else: | |||
| self.bias = None | |||
| def construct(self, x): | |||
| output = self.depthwise_conv(x, self.weight) | |||
| if self.has_bias: | |||
| output = self.bias_add(output, self.bias) | |||
| return output | |||
| class ConvBNReLU(nn.Cell): | |||
| """ | |||
| Convolution/Depthwise fused with Batchnorm and ReLU block definition. | |||
| Args: | |||
| in_planes (int): Input channel. | |||
| out_planes (int): Output channel. | |||
| kernel_size (int): Input kernel size. | |||
| stride (int): Stride size for the first convolutional layer. Default: 1. | |||
| groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1. | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1) | |||
| """ | |||
| def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): | |||
| super(ConvBNReLU, self).__init__() | |||
| padding = (kernel_size - 1) // 2 | |||
| if groups == 1: | |||
| conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='pad', | |||
| padding=padding) | |||
| else: | |||
| conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='pad', pad=padding) | |||
| layers = [conv, nn.BatchNorm2d(out_planes), nn.ReLU6()] | |||
| self.features = nn.SequentialCell(layers) | |||
| def construct(self, x): | |||
| output = self.features(x) | |||
| return output | |||
| class InvertedResidual(nn.Cell): | |||
| """ | |||
| Mobilenetv2 residual block definition. | |||
| Args: | |||
| inp (int): Input channel. | |||
| oup (int): Output channel. | |||
| stride (int): Stride size for the first convolutional layer. Default: 1. | |||
| expand_ratio (int): expand ration of input channel | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> ResidualBlock(3, 256, 1, 1) | |||
| """ | |||
| def __init__(self, inp, oup, stride, expand_ratio): | |||
| super(InvertedResidual, self).__init__() | |||
| assert stride in [1, 2] | |||
| hidden_dim = int(round(inp * expand_ratio)) | |||
| self.use_res_connect = stride == 1 and inp == oup | |||
| layers = [] | |||
| if expand_ratio != 1: | |||
| layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) | |||
| layers.extend([ | |||
| # dw | |||
| ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), | |||
| # pw-linear | |||
| nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, has_bias=False), | |||
| nn.BatchNorm2d(oup), | |||
| ]) | |||
| self.conv = nn.SequentialCell(layers) | |||
| self.add = TensorAdd() | |||
| self.cast = P.Cast() | |||
| def construct(self, x): | |||
| identity = x | |||
| x = self.conv(x) | |||
| if self.use_res_connect: | |||
| return self.add(identity, x) | |||
| return x | |||
| class MobileNetV2(nn.Cell): | |||
| """ | |||
| MobileNetV2 architecture. | |||
| Args: | |||
| class_num (Cell): number of classes. | |||
| width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1. | |||
| has_dropout (bool): Is dropout used. Default is false | |||
| inverted_residual_setting (list): Inverted residual settings. Default is None | |||
| round_nearest (list): Channel round to . Default is 8 | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> MobileNetV2(num_classes=1000) | |||
| """ | |||
| def __init__(self, num_classes=1000, width_mult=1., | |||
| has_dropout=False, inverted_residual_setting=None, round_nearest=8): | |||
| super(MobileNetV2, self).__init__() | |||
| block = InvertedResidual | |||
| input_channel = 32 | |||
| last_channel = 1280 | |||
| # setting of inverted residual blocks | |||
| self.cfgs = inverted_residual_setting | |||
| if inverted_residual_setting is None: | |||
| self.cfgs = [ | |||
| # t, c, n, s | |||
| [1, 16, 1, 1], | |||
| [6, 24, 2, 2], | |||
| [6, 32, 3, 2], | |||
| [6, 64, 4, 2], | |||
| [6, 96, 3, 1], | |||
| [6, 160, 3, 2], | |||
| [6, 320, 1, 1], | |||
| ] | |||
| # building first layer | |||
| input_channel = _make_divisible(input_channel * width_mult, round_nearest) | |||
| self.out_channels = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) | |||
| features = [ConvBNReLU(3, input_channel, stride=2)] | |||
| # building inverted residual blocks | |||
| for t, c, n, s in self.cfgs: | |||
| output_channel = _make_divisible(c * width_mult, round_nearest) | |||
| for i in range(n): | |||
| stride = s if i == 0 else 1 | |||
| features.append(block(input_channel, output_channel, stride, expand_ratio=t)) | |||
| input_channel = output_channel | |||
| # building last several layers | |||
| features.append(ConvBNReLU(input_channel, self.out_channels, kernel_size=1)) | |||
| # make it nn.CellList | |||
| self.features = nn.SequentialCell(features) | |||
| # mobilenet head | |||
| head = ([GlobalAvgPooling(), nn.Dense(self.out_channels, num_classes, has_bias=True)] if not has_dropout else | |||
| [GlobalAvgPooling(), nn.Dropout(0.2), nn.Dense(self.out_channels, num_classes, has_bias=True)]) | |||
| self.head = nn.SequentialCell(head) | |||
| self._initialize_weights() | |||
| def construct(self, x): | |||
| x = self.features(x) | |||
| x = self.head(x) | |||
| return x | |||
| def _initialize_weights(self): | |||
| """ | |||
| Initialize weights. | |||
| Args: | |||
| Returns: | |||
| None. | |||
| Examples: | |||
| >>> _initialize_weights() | |||
| """ | |||
| for _, m in self.cells_and_names(): | |||
| if isinstance(m, (nn.Conv2d, DepthwiseConv)): | |||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n), | |||
| m.weight.data.shape()).astype("float32"))) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) | |||
| elif isinstance(m, nn.BatchNorm2d): | |||
| m.gamma.set_parameter_data(Tensor(np.ones(m.gamma.data.shape(), dtype="float32"))) | |||
| m.beta.set_parameter_data(Tensor(np.zeros(m.beta.data.shape(), dtype="float32"))) | |||
| elif isinstance(m, nn.Dense): | |||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape()).astype("float32"))) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) | |||
| def mobilenet_v2(**kwargs): | |||
| """ | |||
| Constructs a MobileNet V2 model | |||
| """ | |||
| return MobileNetV2(**kwargs) | |||
| @@ -260,3 +260,23 @@ def resnet50(class_num=10): | |||
| [256, 512, 1024, 2048], | |||
| [1, 2, 2, 2], | |||
| class_num) | |||
| def resnet101(class_num=1001): | |||
| """ | |||
| Get ResNet101 neural network. | |||
| Args: | |||
| class_num (int): Class number. | |||
| Returns: | |||
| Cell, cell instance of ResNet101 neural network. | |||
| Examples: | |||
| >>> net = resnet101(1001) | |||
| """ | |||
| return ResNet(ResidualBlock, | |||
| [3, 4, 23, 3], | |||
| [64, 256, 512, 1024], | |||
| [256, 512, 1024, 2048], | |||
| [1, 2, 2, 2], | |||
| class_num) | |||
| @@ -22,7 +22,7 @@ from ..common import dtype as mstype | |||
| from ..common.api import _executor | |||
| from .._checkparam import _check_str_by_regular | |||
| from ..common.parameter import Parameter, ParameterTuple | |||
| from .._c_expression import init_ge | |||
| from .._c_expression import init_backend | |||
| from ..ops.primitive import Primitive | |||
| from ..parallel._tensor import _load_tensor_by_layout | |||
| from ..parallel._utils import _get_parallel_mode | |||
| @@ -66,7 +66,7 @@ class Cell: | |||
| self._phase = 'train' | |||
| self._parameter_layout_dict = {} | |||
| self._create_time = int(time.time() * 1e9) | |||
| init_ge() | |||
| init_backend() | |||
| # call gc to release GE session resources used by non-used cell objects | |||
| gc.collect() | |||
| self._construct_inputs_num = 0 | |||
| @@ -32,6 +32,7 @@ def piecewise_constant_lr(milestone, learning_rates): | |||
| Args: | |||
| milestone (Union[list[int], tuple[int]]): A list of milestone. This list is a monotone increasing list. | |||
| Every element is a milestone step, and must be greater than 0. | |||
| learning_rates (Union[list[float], tuple[float]]): A list of learning rates. | |||
| Returns: | |||
| @@ -40,7 +41,7 @@ def piecewise_constant_lr(milestone, learning_rates): | |||
| Examples: | |||
| >>> milestone = [2, 5, 10] | |||
| >>> learning_rates = [0.1, 0.05, 0.01] | |||
| >>> lr = piecewise_constant_lr(milestone, learning_rates) | |||
| >>> piecewise_constant_lr(milestone, learning_rates) | |||
| [0.1, 0.1, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01] | |||
| """ | |||
| validator.check_value_type('milestone', milestone, (tuple, list), None) | |||
| @@ -100,7 +101,7 @@ def exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, | |||
| >>> total_step = 6 | |||
| >>> step_per_epoch = 2 | |||
| >>> decay_epoch = 1 | |||
| >>> lr = exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch) | |||
| >>> exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch) | |||
| [0.1, 0.1, 0.09000000000000001, 0.09000000000000001, 0.08100000000000002, 0.08100000000000002] | |||
| """ | |||
| _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) | |||
| @@ -142,7 +143,7 @@ def natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, | |||
| >>> total_step = 6 | |||
| >>> step_per_epoch = 2 | |||
| >>> decay_epoch = 2 | |||
| >>> lr = natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) | |||
| >>> natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) | |||
| [0.1, 0.1, 0.1, 0.1, 0.016529888822158657, 0.016529888822158657] | |||
| """ | |||
| _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) | |||
| @@ -185,7 +186,7 @@ def inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, deca | |||
| >>> total_step = 6 | |||
| >>> step_per_epoch = 1 | |||
| >>> decay_epoch = 1 | |||
| >>> lr = inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) | |||
| >>> inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) | |||
| [0.1, 0.06666666666666667, 0.05, 0.04, 0.03333333333333333, 0.028571428571428574] | |||
| """ | |||
| _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) | |||
| @@ -227,7 +228,7 @@ def cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch): | |||
| >>> total_step = 6 | |||
| >>> step_per_epoch = 2 | |||
| >>> decay_epoch = 2 | |||
| >>> lr = cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch) | |||
| >>> cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch) | |||
| [0.1, 0.1, 0.05500000000000001, 0.05500000000000001, 0.01, 0.01] | |||
| """ | |||
| validator.check_float_positive('min_lr', min_lr, None) | |||
| @@ -282,7 +283,7 @@ def polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_e | |||
| >>> step_per_epoch = 2 | |||
| >>> decay_epoch = 2 | |||
| >>> power = 0.5 | |||
| >>> lr = polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power) | |||
| >>> polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power) | |||
| [0.1, 0.1, 0.07363961030678928, 0.07363961030678928, 0.01, 0.01] | |||
| """ | |||
| validator.check_float_positive('learning_rate', learning_rate, None) | |||
| @@ -104,7 +104,7 @@ class FTRL(Optimizer): | |||
| self.lr_power = lr_power | |||
| self.reciprocal_scale = 1.0 / loss_scale | |||
| self.weight_decay = weight_decay | |||
| self.decay_tf = tuple((lambda:True)() for x in self.parameters) | |||
| self.decay_tf = tuple((lambda: True)() for x in self.parameters) | |||
| self.hyper_map = C.HyperMap() | |||
| self.opt = P.ApplyFtrl(use_locking=use_locking) | |||
| self.one = Tensor(1, mstype.int32) | |||
| @@ -118,5 +118,6 @@ class FTRL(Optimizer): | |||
| if self.reciprocal_scale != 1.0: | |||
| grads = self.hyper_map(F.partial(grad_scale, self.reciprocal_scale), grads) | |||
| lr = self.learning_rate | |||
| success = self.hyper_map(F.partial(ftrl_opt, self.opt, lr, self.l1, self.l2, self.lr_power), linear, grads, params, moments) | |||
| success = self.hyper_map(F.partial(ftrl_opt, self.opt, lr, self.l1, self.l2, self.lr_power), linear, grads, | |||
| params, moments) | |||
| return success | |||