| @@ -1,7 +1,7 @@ | |||||
|  |  | ||||
| ============================================================ | ============================================================ | ||||
| - [What is MindSpore?](#what-is-mindspore) | |||||
| - [What Is MindSpore?](#what-is-mindspore) | |||||
| - [Automatic Differentiation](#automatic-differentiation) | - [Automatic Differentiation](#automatic-differentiation) | ||||
| - [Automatic Parallel](#automatic-parallel) | - [Automatic Parallel](#automatic-parallel) | ||||
| - [Installation](#installation) | - [Installation](#installation) | ||||
| @@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem. | |||||
| <img src="docs/MindSpore-architecture.png" alt="MindSpore Architecture" width="600"/> | <img src="docs/MindSpore-architecture.png" alt="MindSpore Architecture" width="600"/> | ||||
| For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.1.0-alpha/architecture.html). | |||||
| For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.2.0-alpha/architecture.html). | |||||
| ### Automatic Differentiation | ### Automatic Differentiation | ||||
| @@ -76,13 +76,36 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex | |||||
| 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package. | 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package. | ||||
| ``` | ``` | ||||
| pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl | |||||
| pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl | |||||
| ``` | ``` | ||||
| 2. Run the following command to verify the install. | 2. Run the following command to verify the install. | ||||
| ```python | |||||
| import numpy as np | |||||
| import mindspore.context as context | |||||
| import mindspore.nn as nn | |||||
| from mindspore import Tensor | |||||
| from mindspore.ops import operations as P | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||||
| class Mul(nn.Cell): | |||||
| def __init__(self): | |||||
| super(Mul, self).__init__() | |||||
| self.mul = P.Mul() | |||||
| def construct(self, x, y): | |||||
| return self.mul(x, y) | |||||
| x = Tensor(np.array([1.0, 2.0, 3.0]).astype(np.float32)) | |||||
| y = Tensor(np.array([4.0, 5.0, 6.0]).astype(np.float32)) | |||||
| mul = Mul() | |||||
| print(mul(x, y)) | |||||
| ``` | ``` | ||||
| python -c 'import mindspore' | |||||
| ``` | |||||
| [ 4. 10. 18.] | |||||
| ``` | ``` | ||||
| ### From Source | ### From Source | ||||
| @@ -96,20 +119,22 @@ currently the containerized build options are supported as follows: | |||||
| | Hardware Platform | Docker Image Repository | Tag | Description | | | Hardware Platform | Docker Image Repository | Tag | Description | | ||||
| | :---------------- | :---------------------- | :-- | :---------- | | | :---------------- | :---------------------- | :-- | :---------- | | ||||
| | CPU | `mindspore/mindspore-cpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` CPU release. | | |||||
| | CPU | `mindspore/mindspore-cpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` CPU release. | | |||||
| | | | `devel` | Development environment provided to build MindSpore (with `CPU` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. | | | | | `devel` | Development environment provided to build MindSpore (with `CPU` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. | | ||||
| | | | `runtime` | Runtime environment provided to install MindSpore binary package with `CPU` backend. | | | | | `runtime` | Runtime environment provided to install MindSpore binary package with `CPU` backend. | | ||||
| | GPU | `mindspore/mindspore-gpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` GPU release. | | |||||
| | GPU | `mindspore/mindspore-gpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` GPU release. | | |||||
| | | | `devel` | Development environment provided to build MindSpore (with `GPU CUDA10.1` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. | | | | | `devel` | Development environment provided to build MindSpore (with `GPU CUDA10.1` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. | | ||||
| | | | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU` backend. | | |||||
| | | | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU CUDA10.1` backend. | | |||||
| | Ascend | <center>—</center> | <center>—</center> | Coming soon. | | | Ascend | <center>—</center> | <center>—</center> | Coming soon. | | ||||
| > **NOTICE:** For GPU `devel` docker image, it's NOT suggested to directly install the whl package after building from the source, instead we strongly RECOMMEND you transfer and install the whl package inside GPU `runtime` docker image. | |||||
| * CPU | * CPU | ||||
| For `CPU` backend, you can directly pull and run the image using the below command: | |||||
| For `CPU` backend, you can directly pull and run the latest stable image using the below command: | |||||
| ``` | ``` | ||||
| docker pull mindspore/mindspore-cpu:0.1.0-alpha | |||||
| docker run -it mindspore/mindspore-cpu:0.1.0-alpha python -c 'import mindspore' | |||||
| docker pull mindspore/mindspore-cpu:0.2.0-alpha | |||||
| docker run -it mindspore/mindspore-cpu:0.2.0-alpha /bin/bash | |||||
| ``` | ``` | ||||
| * GPU | * GPU | ||||
| @@ -124,20 +149,21 @@ currently the containerized build options are supported as follows: | |||||
| sudo systemctl restart docker | sudo systemctl restart docker | ||||
| ``` | ``` | ||||
| Then you can pull and run the image using the below command: | |||||
| Then you can pull and run the latest stable image using the below command: | |||||
| ``` | ``` | ||||
| docker pull mindspore/mindspore-gpu:0.1.0-alpha | |||||
| docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.1.0-alpha /bin/bash | |||||
| docker pull mindspore/mindspore-gpu:0.2.0-alpha | |||||
| docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.2.0-alpha /bin/bash | |||||
| ``` | ``` | ||||
| To test if the docker image works, please execute the python code below and check the output: | To test if the docker image works, please execute the python code below and check the output: | ||||
| ```python | ```python | ||||
| import numpy as np | import numpy as np | ||||
| import mindspore.context as context | |||||
| from mindspore import Tensor | from mindspore import Tensor | ||||
| from mindspore.ops import functional as F | from mindspore.ops import functional as F | ||||
| import mindspore.context as context | |||||
| context.set_context(device_target="GPU") | context.set_context(device_target="GPU") | ||||
| x = Tensor(np.ones([1,3,3,4]).astype(np.float32)) | x = Tensor(np.ones([1,3,3,4]).astype(np.float32)) | ||||
| y = Tensor(np.ones([1,3,3,4]).astype(np.float32)) | y = Tensor(np.ones([1,3,3,4]).astype(np.float32)) | ||||
| print(F.tensor_add(x, y)) | print(F.tensor_add(x, y)) | ||||
| @@ -157,11 +183,11 @@ currently the containerized build options are supported as follows: | |||||
| ``` | ``` | ||||
| If you want to learn more about the building process of MindSpore docker images, | If you want to learn more about the building process of MindSpore docker images, | ||||
| please check out `docker` folder for the details. | |||||
| please check out [docker](docker/README.md) repo for the details. | |||||
| ## Quickstart | ## Quickstart | ||||
| See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.1.0-alpha/quick_start/quick_start.html) | |||||
| See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.2.0-alpha/quick_start/quick_start.html) | |||||
| to implement the image classification. | to implement the image classification. | ||||
| ## Docs | ## Docs | ||||
| @@ -1,3 +1,75 @@ | |||||
| # Release 0.2.0-alpha | |||||
| ## Major Features and Improvements | |||||
| ### Ascend 910 Training and Inference Framework | |||||
| * New models | |||||
| * MobileNetV2: Inverted Residuals and Linear Bottlenecks. | |||||
| * ResNet101: Deep Residual Learning for Image Recognition. | |||||
| * Frontend and User Interface | |||||
| * Support for all python comparison operators. | |||||
| * Support for math operators **,//,%. Support for other python operators like and/or/not/is/is not/ in/ not in. | |||||
| * Support for the gradients of function with variable arguments. | |||||
| * Support for tensor indexing assignment for certain indexing type. | |||||
| * Support for dynamic learning rate. | |||||
| * User interfaces change log | |||||
| * DepthwiseConv2dNative, DepthwiseConv2dNativeBackpropFilter, DepthwiseConv2dNativeBackpropInput([!424](https://gitee.com/mindspore/mindspore/pulls/424)) | |||||
| * ReLU6, ReLU6Grad([!224](https://gitee.com/mindspore/mindspore/pulls/224)) | |||||
| * GeneratorDataset([!183](https://gitee.com/mindspore/mindspore/pulls/183)) | |||||
| * VOCDataset([!477](https://gitee.com/mindspore/mindspore/pulls/477)) | |||||
| * MindDataset, PKSampler([!514](https://gitee.com/mindspore/mindspore/pulls/514)) | |||||
| * map([!506](https://gitee.com/mindspore/mindspore/pulls/506)) | |||||
| * Conv([!226](https://gitee.com/mindspore/mindspore/pulls/226)) | |||||
| * Adam([!253](https://gitee.com/mindspore/mindspore/pulls/253)) | |||||
| * _set_fusion_strategy_by_idx, _set_fusion_strategy_by_size([!189](https://gitee.com/mindspore/mindspore/pulls/189)) | |||||
| * CheckpointConfig([!122](https://gitee.com/mindspore/mindspore/pulls/122)) | |||||
| * Constant([!54](https://gitee.com/mindspore/mindspore/pulls/54)) | |||||
| * Executor and Performance Optimization | |||||
| * Support parallel execution of data prefetching and forward/backward computing. | |||||
| * Support parallel execution of gradient aggregation and forward/backward computing in distributed training scenarios. | |||||
| * Support operator fusion optimization. | |||||
| * Optimize compilation process and improve the performance. | |||||
| * Data processing, augmentation, and save format | |||||
| * Support multi-process of GeneratorDataset/PyFunc for high performance | |||||
| * Support variable batchsize | |||||
| * Support new Dataset operators, such as filter,skip,take,TextLineDataset | |||||
| ### Other Hardware Support | |||||
| * GPU platform | |||||
| * Use dynamic memory pool by default on GPU. | |||||
| * Support parallel execution of computation and communication. | |||||
| * Support continuous address allocation by memory pool. | |||||
| * CPU platform | |||||
| * Support for windows 10 OS. | |||||
| ## Bugfixes | |||||
| * Models | |||||
| * Fix mixed precision bug for VGG16 model ([!629](https://gitee.com/mindspore/mindspore/pulls/629)). | |||||
| * Python API | |||||
| * Fix ControlDepend operator bugs on CPU and GPU ([!396](https://gitee.com/mindspore/mindspore/pulls/396)). | |||||
| * Fix ArgMinWithValue operator bugs ([!338](https://gitee.com/mindspore/mindspore/pulls/338)). | |||||
| * Fix Dense operator bugs on PyNative mode ([!276](https://gitee.com/mindspore/mindspore/pulls/276)). | |||||
| * Fix MatMul operator bugs on PyNative mode ([!288](https://gitee.com/mindspore/mindspore/pulls/288)). | |||||
| * Executor | |||||
| * Fix operator selection bugs and make it general ([!300](https://gitee.com/mindspore/mindspore/pulls/300)). | |||||
| * Fix memory reuse bug for GetNext op ([!291](https://gitee.com/mindspore/mindspore/pulls/291)). | |||||
| * GPU platform | |||||
| * Fix memory allocation in multi-graph scenarios ([!444](https://gitee.com/mindspore/mindspore/pulls/444)). | |||||
| * Fix bias_add_grad under fp16 precision ([!598](https://gitee.com/mindspore/mindspore/pulls/598)). | |||||
| * Fix support for fp16 kernels on nvidia 1080Ti([!571](https://gitee.com/mindspore/mindspore/pulls/571)). | |||||
| * Fix parsing of tuple type parameters ([!316](https://gitee.com/mindspore/mindspore/pulls/316)). | |||||
| * Data processing | |||||
| * Fix TypeErrors about can't pickle mindspore._c_dataengine.DEPipeline objects([!434](https://gitee.com/mindspore/mindspore/pulls/434)). | |||||
| * Add TFRecord file verification([!406](https://gitee.com/mindspore/mindspore/pulls/406)). | |||||
| ## Contributors | |||||
| Thanks goes to these wonderful people: | |||||
| Alexey_Shevlyakov, Cathy, Chong, Hoai, Jonathan, Junhan, JunhanHu, Peilin, SanjayChan, StrawNoBerry, VectorSL, Wei, WeibiaoYu, Xiaoda, Yanjun, YuJianfeng, ZPaC, Zhang, ZhangQinghua, ZiruiWu, amongo, anthonyaje, anzhengqi, biffex, caifubi, candanzg, caojian05, casgj, cathwong, ch-l, chang, changzherui, chenfei, chengang, chenhaozhe, chenjianping, chentingting, chenzomi, chujinjin, dengwentao, dinghao, fanglei, fary86, flywind, gaojing, geekun, gengdongjie, ghzl, gong, gongchen, gukecai, guohongzilong, guozhijian, gziyan, h.farahat, hesham, huangdongrun, huanghui, jiangzhiwen, jinyaohui, jjfeing, jojobugfree, jonathan_yan, jonyguo, jzw, kingfo, kisnwang, laiyongqiang, leonwanghui, lianliguang, lichen, lichenever, limingqi107, liubuyu, liuxiao, liyong, liyong126, lizhenyu, lupengcheng, lvliang, maoweiyong, ms_yan, mxm, ougongchang, panfengfeng, panyifeng, pengyanjun, penn, qianlong, seatea, simson, suteng, thlinh, vlne-v1, wangchengke, wanghua, wangnan39, wangqiuliang, wenchunjiang, wenkai, wukesong, xiefangqi, xulei, yanghaitao, yanghaoran, yangjie159, yangzhenzhang, yankai10, yanzhenxiang2020, yao_yf, yoonlee666, zhangbuxue, zhangz0911gm, zhangzheng, zhaojichen, zhaoting, zhaozhenlong, zhongligeng, zhoufeng, zhousiyi, zjun, zyli2020, yuhuijun, limingqi107, lizhenyu, chenweifeng. | |||||
| Contributions of any kind are welcome! | |||||
| # Release 0.1.0-alpha | # Release 0.1.0-alpha | ||||
| ## Main Features | ## Main Features | ||||
| @@ -3042,6 +3042,60 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", AND | |||||
| Why Three Licenses? | Why Three Licenses? | ||||
| The zlib License could have been used instead of the Modified (3-clause) BSD License, and since the IJG License effectively subsumes the distribution conditions of the zlib License, this would have effectively placed libjpeg-turbo binary distributions under the IJG License. However, the IJG License specifically refers to the Independent JPEG Group and does not extend attribution and endorsement protections to other entities. Thus, it was desirable to choose a license that granted us the same protections for new code that were granted to the IJG for code derived from their software. | The zlib License could have been used instead of the Modified (3-clause) BSD License, and since the IJG License effectively subsumes the distribution conditions of the zlib License, this would have effectively placed libjpeg-turbo binary distributions under the IJG License. However, the IJG License specifically refers to the Independent JPEG Group and does not extend attribution and endorsement protections to other entities. Thus, it was desirable to choose a license that granted us the same protections for new code that were granted to the IJG for code derived from their software. | ||||
| Software: libtiff 4.1.0 | |||||
| Copyright notice: | |||||
| Copyright © 2015 Open Microscopy Environment / University of Dundee | |||||
| Copyright (c) 2004, Andrey Kiselev <dron@ak4719.spb.edu> | |||||
| Copyright (c) 1990-1997 Sam Leffler | |||||
| Copyright (c) 1991-1997 Silicon Graphics, Inc. | |||||
| Copyright (c) 1988-1997 Sam Leffler | |||||
| Copyright (c) 1991-1997 Sam Leffler | |||||
| Use and Copyright | |||||
| Copyright (C) 1990, 1995 Frank D. Cringle. | |||||
| Copyright (c) 1994-1997 Sam Leffler | |||||
| Copyright (c) 1994-1997 Silicon Graphics, Inc. | |||||
| Copyright (c) 1997 Greg Ward Larson | |||||
| Copyright (c) 1997 Silicon Graphics, Inc. | |||||
| Copyright (c) 2010, Andrey Kiselev <dron@ak4719.spb.edu> | |||||
| Copyright (c) Joris Van Damme <info@awaresystems.be> | |||||
| Copyright (c) AWare Systems <http:www.awaresystems.be/> | |||||
| Copyright (c) 1996-1997 Sam Leffler | |||||
| Copyright (c) 1996 Pixar | |||||
| Copyright (c) 1995-1997 Sam Leffler | |||||
| Copyright (c) 1995-1997 Silicon Graphics, Inc. | |||||
| Copyright (c) 1988-1996 Sam Leffler | |||||
| Copyright (c) 1991-1996 Silicon Graphics, Inc. | |||||
| Copyright (c) 1992-1997 Sam Leffler | |||||
| Copyright (c) 1992-1997 Silicon Graphics, Inc. | |||||
| Copyright (c) 2018, Mapbox | |||||
| Copyright (c) 2017, Planet Labs | |||||
| Copyright (c) 1990 by Sun Microsystems, Inc. | |||||
| Copyright 1990 by Digital Equipment Corporation, Maynard, Massachusetts. | |||||
| Copyright 1991 by Digital Equipment Corporation, Maynard, Massachusetts. | |||||
| Copyright (c) 2002, Andrey Kiselev <dron@ak4719.spb.edu> | |||||
| Copyright (c) 2003 Ross Finlayson | |||||
| Additions (c) Richard Nolde 2006-2010 | |||||
| Copyright (c) 2003, Andrey Kiselev <dron@ak4719.spb.edu> | |||||
| Copyright (c) 2000, Frank Warmerdam | |||||
| Copyright (c) 1987, 1993, 1994 | |||||
| Copyright (c) 1989, 1993 | |||||
| Copyright (c) 2009 Frank Warmerdam | |||||
| Copyright (c) 1987, 1993 | |||||
| Copyright (c) 2005 The DragonFly Project. All rights reserved. | |||||
| Copyright (c) 2003 Citrus Project, | |||||
| All rights reserved. | |||||
| Copyright (c) 1990, 1993 | |||||
| Copyright (c) 1996 Mike Johnson | |||||
| Copyright (c) 1996 BancTec AB | |||||
| Copyright (c) 2004, Andrey Kiselev <dron@ak4719.spb.edu> | |||||
| Copyright (c) 2012, Frank Warmerdam <warmerdam@pobox.com> | |||||
| Copyright (c) 2019, Even Rouault <even.rouault at spatialys.com> | |||||
| Copyright (c) 2007, Frank Warmerdam <warmerdam@pobox.com> | |||||
| Copyright (c) 2019, Thomas Bernard <miniupnp@free.fr> | |||||
| Copyright (c) 2008, Andrey Kiselev <dron@ak4719.spb.edu> | |||||
| Copyright (c) 1999, Frank Warmerdam | |||||
| Copyright (c) 1991-1996 Sam Leffler | |||||
| Copyright (c) 1996 USAF Phillips Laboratory | |||||
| Software: opencv 4.2.0 | Software: opencv 4.2.0 | ||||
| Copyright notice: | Copyright notice: | ||||
| @@ -14,27 +14,27 @@ | |||||
| @rem ============================================================================ | @rem ============================================================================ | ||||
| @echo off | @echo off | ||||
| @title mindspore_build | @title mindspore_build | ||||
| SET BASEPATH=%CD% | SET BASEPATH=%CD% | ||||
| IF NOT EXIST %BASEPATH%/build ( | IF NOT EXIST %BASEPATH%/build ( | ||||
| md "build" | md "build" | ||||
| ) | ) | ||||
| cd %BASEPATH%/build | cd %BASEPATH%/build | ||||
| SET BUILD_PATH=%CD% | SET BUILD_PATH=%CD% | ||||
| IF NOT EXIST %BUILD_PATH%/mindspore ( | IF NOT EXIST %BUILD_PATH%/mindspore ( | ||||
| md "mindspore" | md "mindspore" | ||||
| ) | ) | ||||
| cd %CD%/mindspore | cd %CD%/mindspore | ||||
| cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_CPU=ON -DENABLE_MINDDATA=ON -DUSE_GLOG=ON -G "CodeBlocks - MinGW Makefiles" ../.. | cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_CPU=ON -DENABLE_MINDDATA=ON -DUSE_GLOG=ON -G "CodeBlocks - MinGW Makefiles" ../.. | ||||
| IF NOT %errorlevel% == 0 ( | IF NOT %errorlevel% == 0 ( | ||||
| echo "cmake fail." | echo "cmake fail." | ||||
| goto run_fail | goto run_fail | ||||
| ) | ) | ||||
| IF "%1%" == "" ( | IF "%1%" == "" ( | ||||
| cmake --build . --target package -- -j6 | cmake --build . --target package -- -j6 | ||||
| ) ELSE ( | ) ELSE ( | ||||
| @@ -433,9 +433,9 @@ build_predict() | |||||
| cd "${BASEPATH}/predict/output/" | cd "${BASEPATH}/predict/output/" | ||||
| if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then | if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then | ||||
| tar -cf MSPredict-0.1.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed | |||||
| tar -cf MSPredict-0.2.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed | |||||
| elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then | elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then | ||||
| tar -cf MSPredict-0.1.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed | |||||
| tar -cf MSPredict-0.2.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed | |||||
| fi | fi | ||||
| echo "success to build predict project!" | echo "success to build predict project!" | ||||
| } | } | ||||
| @@ -4,14 +4,13 @@ This folder hosts all the `Dockerfile` to build MindSpore container images with | |||||
| ### MindSpore docker build command | ### MindSpore docker build command | ||||
| * CPU | |||||
| | Hardware Platform | Version | Build Command | | |||||
| | :---------------- | :------ | :------------ | | |||||
| | CPU | `x.y.z` | cd mindspore-cpu/x.y.z && docker build . -t mindspore/mindspore-cpu:x.y.z | | |||||
| | | `devel` | cd mindspore-cpu/devel && docker build . -t mindspore/mindspore-cpu:devel | | |||||
| | | `runtime` | cd mindspore-cpu/runtime && docker build . -t mindspore/mindspore-cpu:runtime | | |||||
| | GPU | `x.y.z` | cd mindspore-gpu/x.y.z && docker build . -t mindspore/mindspore-gpu:x.y.z | | |||||
| | | `devel` | cd mindspore-gpu/devel && docker build . -t mindspore/mindspore-gpu:devel | | |||||
| | | `runtime` | cd mindspore-gpu/runtime && docker build . -t mindspore/mindspore-gpu:runtime | | |||||
| ``` | |||||
| cd mindspore-cpu/0.1.0-alpha && docker build . -t mindspore/mindspore-cpu:0.1.0-alpha | |||||
| ``` | |||||
| * GPU | |||||
| ``` | |||||
| cd mindspore-gpu/0.1.0-alpha && docker build . -t mindspore/mindspore-gpu:0.1.0-alpha | |||||
| ``` | |||||
| > **NOTICE:** The `x.y.z` version shown above should be replaced with the real version number. | |||||
| @@ -64,4 +64,4 @@ RUN mkdir -pv /root/.pip \ | |||||
| && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf | && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf | ||||
| # Install MindSpore cpu whl package | # Install MindSpore cpu whl package | ||||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl | |||||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl | |||||
| @@ -0,0 +1,67 @@ | |||||
| FROM ubuntu:18.04 | |||||
| MAINTAINER leonwanghui <leon.wanghui@huawei.com> | |||||
| # Set env | |||||
| ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5 | |||||
| ENV PATH /usr/local/bin:$PATH | |||||
| # Install base tools | |||||
| RUN apt update \ | |||||
| && DEBIAN_FRONTEND=noninteractive apt install -y \ | |||||
| vim \ | |||||
| wget \ | |||||
| curl \ | |||||
| xz-utils \ | |||||
| net-tools \ | |||||
| openssh-client \ | |||||
| git \ | |||||
| ntpdate \ | |||||
| tzdata \ | |||||
| tcl \ | |||||
| sudo \ | |||||
| bash-completion | |||||
| # Install compile tools | |||||
| RUN DEBIAN_FRONTEND=noninteractive apt install -y \ | |||||
| gcc \ | |||||
| g++ \ | |||||
| zlibc \ | |||||
| make \ | |||||
| libgmp-dev \ | |||||
| patch \ | |||||
| autoconf \ | |||||
| libtool \ | |||||
| automake \ | |||||
| flex | |||||
| # Set bash | |||||
| RUN echo "dash dash/sh boolean false" | debconf-set-selections | |||||
| RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash | |||||
| # Install python (v3.7.5) | |||||
| RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ | |||||
| libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \ | |||||
| && cd /tmp \ | |||||
| && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \ | |||||
| && tar -xvf v3.7.5.tar.gz \ | |||||
| && cd /tmp/cpython-3.7.5 \ | |||||
| && mkdir -p ${PYTHON_ROOT_PATH} \ | |||||
| && ./configure --prefix=${PYTHON_ROOT_PATH} \ | |||||
| && make -j4 \ | |||||
| && make install -j4 \ | |||||
| && rm -f /usr/local/bin/python \ | |||||
| && rm -f /usr/local/bin/pip \ | |||||
| && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ | |||||
| && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ | |||||
| && rm -rf /tmp/cpython-3.7.5 \ | |||||
| && rm -f /tmp/v3.7.5.tar.gz | |||||
| # Set pip source | |||||
| RUN mkdir -pv /root/.pip \ | |||||
| && echo "[global]" > /root/.pip/pip.conf \ | |||||
| && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \ | |||||
| && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf | |||||
| # Install MindSpore cpu whl package | |||||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl | |||||
| @@ -80,4 +80,4 @@ RUN cd /tmp \ | |||||
| && rm -f /tmp/openmpi-3.1.5.tar.gz | && rm -f /tmp/openmpi-3.1.5.tar.gz | ||||
| # Install MindSpore cuda-10.1 whl package | # Install MindSpore cuda-10.1 whl package | ||||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl | |||||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl | |||||
| @@ -0,0 +1,83 @@ | |||||
| FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 | |||||
| MAINTAINER leonwanghui <leon.wanghui@huawei.com> | |||||
| # Set env | |||||
| ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5 | |||||
| ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5 | |||||
| ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH | |||||
| ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH | |||||
| # Install base tools | |||||
| RUN apt update \ | |||||
| && DEBIAN_FRONTEND=noninteractive apt install -y \ | |||||
| vim \ | |||||
| wget \ | |||||
| curl \ | |||||
| xz-utils \ | |||||
| net-tools \ | |||||
| openssh-client \ | |||||
| git \ | |||||
| ntpdate \ | |||||
| tzdata \ | |||||
| tcl \ | |||||
| sudo \ | |||||
| bash-completion | |||||
| # Install compile tools | |||||
| RUN DEBIAN_FRONTEND=noninteractive apt install -y \ | |||||
| gcc \ | |||||
| g++ \ | |||||
| zlibc \ | |||||
| make \ | |||||
| libgmp-dev \ | |||||
| patch \ | |||||
| autoconf \ | |||||
| libtool \ | |||||
| automake \ | |||||
| flex \ | |||||
| libnccl2=2.4.8-1+cuda10.1 \ | |||||
| libnccl-dev=2.4.8-1+cuda10.1 | |||||
| # Set bash | |||||
| RUN echo "dash dash/sh boolean false" | debconf-set-selections | |||||
| RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash | |||||
| # Install python (v3.7.5) | |||||
| RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ | |||||
| libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \ | |||||
| && cd /tmp \ | |||||
| && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \ | |||||
| && tar -xvf v3.7.5.tar.gz \ | |||||
| && cd /tmp/cpython-3.7.5 \ | |||||
| && mkdir -p ${PYTHON_ROOT_PATH} \ | |||||
| && ./configure --prefix=${PYTHON_ROOT_PATH} \ | |||||
| && make -j4 \ | |||||
| && make install -j4 \ | |||||
| && rm -f /usr/local/bin/python \ | |||||
| && rm -f /usr/local/bin/pip \ | |||||
| && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ | |||||
| && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ | |||||
| && rm -rf /tmp/cpython-3.7.5 \ | |||||
| && rm -f /tmp/v3.7.5.tar.gz | |||||
| # Set pip source | |||||
| RUN mkdir -pv /root/.pip \ | |||||
| && echo "[global]" > /root/.pip/pip.conf \ | |||||
| && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \ | |||||
| && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf | |||||
| # Install openmpi (v3.1.5) | |||||
| RUN cd /tmp \ | |||||
| && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \ | |||||
| && tar -xvf openmpi-3.1.5.tar.gz \ | |||||
| && cd /tmp/openmpi-3.1.5 \ | |||||
| && mkdir -p ${OMPI_ROOT_PATH} \ | |||||
| && ./configure --prefix=${OMPI_ROOT_PATH} \ | |||||
| && make -j4 \ | |||||
| && make install -j4 \ | |||||
| && rm -rf /tmp/openmpi-3.1.5 \ | |||||
| && rm -f /tmp/openmpi-3.1.5.tar.gz | |||||
| # Install MindSpore cuda-10.1 whl package | |||||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore_gpu-0.2.0-cp37-cp37m-linux_x86_64.whl | |||||
| @@ -4,8 +4,8 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base]( | |||||
| ## Requirements | ## Requirements | ||||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | - Install [MindSpore](https://www.mindspore.cn/install/en). | ||||
| - Download the zhwiki dataset from <https://dumps.wikimedia.org/zhwiki> for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wiliextractor). Convert the dataset to TFRecord format and move the files to a specified path. | |||||
| - Download the CLUE dataset from <https://www.cluebenchmarks.com> for fine-tuning and evaluation. | |||||
| - Download the zhwiki dataset for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wikiextractor). Convert the dataset to TFRecord format and move the files to a specified path. | |||||
| - Download the CLUE dataset for fine-tuning and evaluation. | |||||
| > Notes: | > Notes: | ||||
| If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file. | If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file. | ||||
| @@ -10,7 +10,7 @@ This is the simple tutorial for training AlexNet in MindSpore. | |||||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | - Install [MindSpore](https://www.mindspore.cn/install/en). | ||||
| - Download the CIFAR-10 dataset at <http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz>. The directory structure is as follows: | |||||
| - Download the CIFAR-10 dataset, the directory structure is as follows: | |||||
| ``` | ``` | ||||
| ├─cifar-10-batches-bin | ├─cifar-10-batches-bin | ||||
| @@ -10,7 +10,7 @@ This is the simple and basic tutorial for constructing a network in MindSpore. | |||||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | - Install [MindSpore](https://www.mindspore.cn/install/en). | ||||
| - Download the MNIST dataset at <http://yann.lecun.com/exdb/mnist/>. The directory structure is as follows: | |||||
| - Download the MNIST dataset, the directory structure is as follows: | |||||
| ``` | ``` | ||||
| └─MNIST_Data | └─MNIST_Data | ||||
| @@ -0,0 +1,101 @@ | |||||
| # MobileNetV2 Example | |||||
| ## Description | |||||
| This is an example of training MobileNetV2 with ImageNet2012 dataset in MindSpore. | |||||
| ## Requirements | |||||
| * Install [MindSpore](https://www.mindspore.cn/install/en). | |||||
| * Download the dataset [ImageNet2012]. | |||||
| > Unzip the ImageNet2012 dataset to any path you want and the folder structure should be as follows: | |||||
| > ``` | |||||
| > . | |||||
| > ├── train # train dataset | |||||
| > └── val # infer dataset | |||||
| > ``` | |||||
| ## Example structure | |||||
| ``` shell | |||||
| . | |||||
| ├── config.py # parameter configuration | |||||
| ├── dataset.py # data preprocessing | |||||
| ├── eval.py # infer script | |||||
| ├── launch.py # launcher for distributed training | |||||
| ├── lr_generator.py # generate learning rate for each step | |||||
| ├── run_infer.sh # launch infering | |||||
| ├── run_train.sh # launch training | |||||
| └── train.py # train script | |||||
| ``` | |||||
| ## Parameter configuration | |||||
| Parameters for both training and inference can be set in 'config.py'. | |||||
| ``` | |||||
| "num_classes": 1000, # dataset class num | |||||
| "image_height": 224, # image height | |||||
| "image_width": 224, # image width | |||||
| "batch_size": 256, # training or infering batch size | |||||
| "epoch_size": 200, # total training epochs, including warmup_epochs | |||||
| "warmup_epochs": 4, # warmup epochs | |||||
| "lr": 0.4, # base learning rate | |||||
| "momentum": 0.9, # momentum | |||||
| "weight_decay": 4e-5, # weight decay | |||||
| "loss_scale": 1024, # loss scale | |||||
| "save_checkpoint": True, # whether save checkpoint | |||||
| "save_checkpoint_epochs": 1, # the epoch interval between two checkpoints | |||||
| "keep_checkpoint_max": 200, # only keep the last keep_checkpoint_max checkpoint | |||||
| "save_checkpoint_path": "./checkpoint" # path to save checkpoint | |||||
| ``` | |||||
| ## Running the example | |||||
| ### Train | |||||
| #### Usage | |||||
| Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] | |||||
| #### Launch | |||||
| ``` | |||||
| # training example | |||||
| sh run_train.sh 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet | |||||
| ``` | |||||
| #### Result | |||||
| Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log will be redirected to `./train/train.log` like followings. | |||||
| ``` | |||||
| epoch: [ 0/200], step:[ 624/ 625], loss:[5.258/5.258], time:[140412.236], lr:[0.100] | |||||
| epoch time: 140522.500, per step time: 224.836, avg loss: 5.258 | |||||
| epoch: [ 1/200], step:[ 624/ 625], loss:[3.917/3.917], time:[138221.250], lr:[0.200] | |||||
| epoch time: 138331.250, per step time: 221.330, avg loss: 3.917 | |||||
| ``` | |||||
| ### Infer | |||||
| #### Usage | |||||
| Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH] | |||||
| #### Launch | |||||
| ``` | |||||
| # infer example | |||||
| sh run_infer.sh ~/imagenet ~/train/mobilenet-200_625.ckpt | |||||
| ``` | |||||
| > checkpoint can be produced in training process. | |||||
| #### Result | |||||
| Inference result will be stored in the example path, you can find result like the followings in `val.log`. | |||||
| ``` | |||||
| result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt | |||||
| ``` | |||||
| @@ -0,0 +1,35 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| network config setting, will be used in train.py and eval.py | |||||
| """ | |||||
| from easydict import EasyDict as ed | |||||
| config = ed({ | |||||
| "num_classes": 1000, | |||||
| "image_height": 224, | |||||
| "image_width": 224, | |||||
| "batch_size": 256, | |||||
| "epoch_size": 200, | |||||
| "warmup_epochs": 4, | |||||
| "lr": 0.4, | |||||
| "momentum": 0.9, | |||||
| "weight_decay": 4e-5, | |||||
| "loss_scale": 1024, | |||||
| "save_checkpoint": True, | |||||
| "save_checkpoint_epochs": 1, | |||||
| "keep_checkpoint_max": 200, | |||||
| "save_checkpoint_path": "./checkpoint", | |||||
| }) | |||||
| @@ -0,0 +1,84 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| create train or eval dataset. | |||||
| """ | |||||
| import os | |||||
| import mindspore.common.dtype as mstype | |||||
| import mindspore.dataset.engine as de | |||||
| import mindspore.dataset.transforms.vision.c_transforms as C | |||||
| import mindspore.dataset.transforms.c_transforms as C2 | |||||
| from config import config | |||||
| def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): | |||||
| """ | |||||
| create a train or eval dataset | |||||
| Args: | |||||
| dataset_path(string): the path of dataset. | |||||
| do_train(bool): whether dataset is used for train or eval. | |||||
| repeat_num(int): the repeat times of dataset. Default: 1 | |||||
| batch_size(int): the batch size of dataset. Default: 32 | |||||
| Returns: | |||||
| dataset | |||||
| """ | |||||
| rank_size = int(os.getenv("RANK_SIZE")) | |||||
| rank_id = int(os.getenv("RANK_ID")) | |||||
| if rank_size == 1: | |||||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True) | |||||
| else: | |||||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True, | |||||
| num_shards=rank_size, shard_id=rank_id) | |||||
| resize_height = config.image_height | |||||
| resize_width = config.image_width | |||||
| rescale = 1.0 / 255.0 | |||||
| shift = 0.0 | |||||
| buffer_size = 1000 | |||||
| # define map operations | |||||
| decode_op = C.Decode() | |||||
| resize_crop_op = C.RandomResizedCrop(resize_height, scale=(0.2, 1.0)) | |||||
| horizontal_flip_op = C.RandomHorizontalFlip() | |||||
| resize_op = C.Resize((256, 256)) | |||||
| center_crop = C.CenterCrop(resize_width) | |||||
| rescale_op = C.Rescale(rescale, shift) | |||||
| normalize_op = C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | |||||
| change_swap_op = C.HWC2CHW() | |||||
| if do_train: | |||||
| trans = [decode_op, resize_crop_op, horizontal_flip_op, rescale_op, normalize_op, change_swap_op] | |||||
| else: | |||||
| trans = [decode_op, resize_op, center_crop, rescale_op, normalize_op, change_swap_op] | |||||
| type_cast_op = C2.TypeCast(mstype.int32) | |||||
| ds = ds.map(input_columns="image", operations=trans) | |||||
| ds = ds.map(input_columns="label", operations=type_cast_op) | |||||
| # apply shuffle operations | |||||
| ds = ds.shuffle(buffer_size=buffer_size) | |||||
| # apply batch operations | |||||
| ds = ds.batch(batch_size, drop_remainder=True) | |||||
| # apply dataset repeat operation | |||||
| ds = ds.repeat(repeat_num) | |||||
| return ds | |||||
| @@ -0,0 +1,56 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| eval. | |||||
| """ | |||||
| import os | |||||
| import argparse | |||||
| from dataset import create_dataset | |||||
| from config import config | |||||
| from mindspore import context | |||||
| from mindspore.model_zoo.mobilenet import mobilenet_v2 | |||||
| from mindspore.train.model import Model | |||||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||||
| from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits | |||||
| parser = argparse.ArgumentParser(description='Image classification') | |||||
| parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') | |||||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||||
| args_opt = parser.parse_args() | |||||
| device_id = int(os.getenv('DEVICE_ID')) | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False) | |||||
| context.set_context(enable_task_sink=True) | |||||
| context.set_context(enable_loop_sink=True) | |||||
| context.set_context(enable_mem_reuse=True) | |||||
| if __name__ == '__main__': | |||||
| context.set_context(enable_hccl=False) | |||||
| loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') | |||||
| net = mobilenet_v2() | |||||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size) | |||||
| step_size = dataset.get_dataset_size() | |||||
| if args_opt.checkpoint_path: | |||||
| param_dict = load_checkpoint(args_opt.checkpoint_path) | |||||
| load_param_into_net(net, param_dict) | |||||
| net.set_train(False) | |||||
| model = Model(net, loss_fn=loss, metrics={'acc'}) | |||||
| res = model.eval(dataset) | |||||
| print("result:", res, "ckpt=", args_opt.checkpoint_path) | |||||
| @@ -0,0 +1,143 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """launch train script""" | |||||
| import os | |||||
| import sys | |||||
| import json | |||||
| from argparse import ArgumentParser | |||||
| def parse_args(): | |||||
| """ | |||||
| parse args . | |||||
| Args: | |||||
| Returns: | |||||
| args. | |||||
| Examples: | |||||
| >>> parse_args() | |||||
| """ | |||||
| parser = ArgumentParser(description="mindspore distributed training launch " | |||||
| "helper utilty that will spawn up " | |||||
| "multiple distributed processes") | |||||
| parser.add_argument("--nproc_per_node", type=int, default=1, | |||||
| help="The number of processes to launch on each node, " | |||||
| "for D training, this is recommended to be set " | |||||
| "to the number of D in your system so that " | |||||
| "each process can be bound to a single D.") | |||||
| parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7", | |||||
| help="will use the visible devices sequentially") | |||||
| parser.add_argument("--server_id", type=str, default="", | |||||
| help="server ip") | |||||
| parser.add_argument("--training_script", type=str, | |||||
| help="The full path to the single D training " | |||||
| "program/script to be launched in parallel, " | |||||
| "followed by all the arguments for the " | |||||
| "training script") | |||||
| # rest from the training program | |||||
| args, unknown = parser.parse_known_args() | |||||
| args.training_script_args = unknown | |||||
| return args | |||||
| def main(): | |||||
| print("start", __file__) | |||||
| args = parse_args() | |||||
| print(args) | |||||
| visible_devices = args.visible_devices.split(',') | |||||
| assert os.path.isfile(args.training_script) | |||||
| assert len(visible_devices) >= args.nproc_per_node | |||||
| print('visible_devices:{}'.format(visible_devices)) | |||||
| if not args.server_id: | |||||
| print('pleaser input server ip!!!') | |||||
| exit(0) | |||||
| print('server_id:{}'.format(args.server_id)) | |||||
| # construct hccn_table | |||||
| hccn_configs = open('/etc/hccn.conf', 'r').readlines() | |||||
| device_ips = {} | |||||
| for hccn_item in hccn_configs: | |||||
| hccn_item = hccn_item.strip() | |||||
| if hccn_item.startswith('address_'): | |||||
| device_id, device_ip = hccn_item.split('=') | |||||
| device_id = device_id.split('_')[1] | |||||
| device_ips[device_id] = device_ip | |||||
| print('device_id:{}, device_ip:{}'.format(device_id, device_ip)) | |||||
| hccn_table = {} | |||||
| hccn_table['board_id'] = '0x0000' | |||||
| hccn_table['chip_info'] = '910' | |||||
| hccn_table['deploy_mode'] = 'lab' | |||||
| hccn_table['group_count'] = '1' | |||||
| hccn_table['group_list'] = [] | |||||
| instance_list = [] | |||||
| usable_dev = '' | |||||
| for instance_id in range(args.nproc_per_node): | |||||
| instance = {} | |||||
| instance['devices'] = [] | |||||
| device_id = visible_devices[instance_id] | |||||
| device_ip = device_ips[device_id] | |||||
| usable_dev += str(device_id) | |||||
| instance['devices'].append({ | |||||
| 'device_id': device_id, | |||||
| 'device_ip': device_ip, | |||||
| }) | |||||
| instance['rank_id'] = str(instance_id) | |||||
| instance['server_id'] = args.server_id | |||||
| instance_list.append(instance) | |||||
| hccn_table['group_list'].append({ | |||||
| 'device_num': str(args.nproc_per_node), | |||||
| 'server_num': '1', | |||||
| 'group_name': '', | |||||
| 'instance_count': str(args.nproc_per_node), | |||||
| 'instance_list': instance_list, | |||||
| }) | |||||
| hccn_table['para_plane_nic_location'] = 'device' | |||||
| hccn_table['para_plane_nic_name'] = [] | |||||
| for instance_id in range(args.nproc_per_node): | |||||
| eth_id = visible_devices[instance_id] | |||||
| hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id)) | |||||
| hccn_table['para_plane_nic_num'] = str(args.nproc_per_node) | |||||
| hccn_table['status'] = 'completed' | |||||
| # save hccn_table to file | |||||
| table_path = os.getcwd() | |||||
| if not os.path.exists(table_path): | |||||
| os.mkdir(table_path) | |||||
| table_fn = os.path.join(table_path, | |||||
| 'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id)) | |||||
| with open(table_fn, 'w') as table_fp: | |||||
| json.dump(hccn_table, table_fp, indent=4) | |||||
| sys.stdout.flush() | |||||
| # spawn the processes | |||||
| for rank_id in range(0, args.nproc_per_node): | |||||
| device_id = visible_devices[rank_id] | |||||
| device_dir = os.path.join(os.getcwd(), 'device{}'.format(rank_id)) | |||||
| rank_process = 'export RANK_SIZE={} && export RANK_ID={} && export DEVICE_ID={} && '.format(args.nproc_per_node, | |||||
| rank_id, device_id) | |||||
| if args.nproc_per_node > 1: | |||||
| rank_process += 'export MINDSPORE_HCCL_CONFIG_PATH={} && '.format(table_fn) | |||||
| rank_process += 'export RANK_TABLE_FILE={} && '.format(table_fn) | |||||
| rank_process += 'rm -rf {dir} && mkdir {dir} && cd {dir} && python {script} '.format(dir=device_dir, | |||||
| script=args.training_script | |||||
| ) | |||||
| rank_process += ' '.join(args.training_script_args) + ' > log{}.log 2>&1 &'.format(rank_id) | |||||
| os.system(rank_process) | |||||
| if __name__ == "__main__": | |||||
| main() | |||||
| @@ -0,0 +1,54 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """learning rate generator""" | |||||
| import math | |||||
| import numpy as np | |||||
| def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch): | |||||
| """ | |||||
| generate learning rate array | |||||
| Args: | |||||
| global_step(int): total steps of the training | |||||
| lr_init(float): init learning rate | |||||
| lr_end(float): end learning rate | |||||
| lr_max(float): max learning rate | |||||
| warmup_epochs(int): number of warmup epochs | |||||
| total_epochs(int): total epoch of training | |||||
| steps_per_epoch(int): steps of one epoch | |||||
| Returns: | |||||
| np.array, learning rate array | |||||
| """ | |||||
| lr_each_step = [] | |||||
| total_steps = steps_per_epoch * total_epochs | |||||
| warmup_steps = steps_per_epoch * warmup_epochs | |||||
| for i in range(total_steps): | |||||
| if i < warmup_steps: | |||||
| lr = lr_init + (lr_max - lr_init) * i / warmup_steps | |||||
| else: | |||||
| lr = lr_end + \ | |||||
| (lr_max - lr_end) * \ | |||||
| (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2. | |||||
| if lr < 0.0: | |||||
| lr = 0.0 | |||||
| lr_each_step.append(lr) | |||||
| current_step = global_step | |||||
| lr_each_step = np.array(lr_each_step).astype(np.float32) | |||||
| learning_rate = lr_each_step[current_step:] | |||||
| return learning_rate | |||||
| @@ -0,0 +1,33 @@ | |||||
| #!/usr/bin/env bash | |||||
| if [ $# != 2 ] | |||||
| then | |||||
| echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]" | |||||
| exit 1 | |||||
| fi | |||||
| if [ ! -d $1 ] | |||||
| then | |||||
| echo "error: DATASET_PATH=$1 is not a directory" | |||||
| exit 1 | |||||
| fi | |||||
| if [ ! -f $2 ] | |||||
| then | |||||
| echo "error: CHECKPOINT_PATH=$2 is not a file" | |||||
| exit 1 | |||||
| fi | |||||
| BASEPATH=$(cd "`dirname $0`" || exit; pwd) | |||||
| export PYTHONPATH=${BASEPATH}:$PYTHONPATH | |||||
| export DEVICE_ID=0 | |||||
| export RANK_ID=0 | |||||
| export RANK_SIZE=1 | |||||
| if [ -d "eval" ]; | |||||
| then | |||||
| rm -rf ./eval | |||||
| fi | |||||
| mkdir ./eval | |||||
| cd ./eval || exit | |||||
| python ${BASEPATH}/eval.py \ | |||||
| --checkpoint_path=$2 \ | |||||
| --dataset_path=$1 &> infer.log & # dataset val folder path | |||||
| @@ -0,0 +1,33 @@ | |||||
| #!/usr/bin/env bash | |||||
| if [ $# != 4 ] | |||||
| then | |||||
| echo "Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]" | |||||
| exit 1 | |||||
| fi | |||||
| if [ $1 -lt 1 ] && [ $1 -gt 8 ] | |||||
| then | |||||
| echo "error: DEVICE_NUM=$1 is not in (1-8)" | |||||
| exit 1 | |||||
| fi | |||||
| if [ ! -d $4 ] | |||||
| then | |||||
| echo "error: DATASET_PATH=$4 is not a directory" | |||||
| exit 1 | |||||
| fi | |||||
| BASEPATH=$(cd "`dirname $0`" || exit; pwd) | |||||
| export PYTHONPATH=${BASEPATH}:$PYTHONPATH | |||||
| if [ -d "train" ]; | |||||
| then | |||||
| rm -rf ./train | |||||
| fi | |||||
| mkdir ./train | |||||
| cd ./train || exit | |||||
| python ${BASEPATH}/launch.py \ | |||||
| --nproc_per_node=$1 \ | |||||
| --visible_devices=$3 \ | |||||
| --server_id=$2 \ | |||||
| --training_script=${BASEPATH}/train.py \ | |||||
| --dataset_path=$4 &> train.log & # dataset train folder | |||||
| @@ -0,0 +1,148 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """train_imagenet.""" | |||||
| import os | |||||
| import time | |||||
| import argparse | |||||
| import random | |||||
| import numpy as np | |||||
| from dataset import create_dataset | |||||
| from lr_generator import get_lr | |||||
| from config import config | |||||
| from mindspore import context | |||||
| from mindspore import Tensor | |||||
| from mindspore.model_zoo.mobilenet import mobilenet_v2 | |||||
| from mindspore.parallel._auto_parallel_context import auto_parallel_context | |||||
| from mindspore.nn.optim.momentum import Momentum | |||||
| from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits | |||||
| from mindspore.train.model import Model, ParallelMode | |||||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback | |||||
| from mindspore.train.loss_scale_manager import FixedLossScaleManager | |||||
| import mindspore.dataset.engine as de | |||||
| from mindspore.communication.management import init | |||||
| random.seed(1) | |||||
| np.random.seed(1) | |||||
| de.config.set_seed(1) | |||||
| parser = argparse.ArgumentParser(description='Image classification') | |||||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||||
| args_opt = parser.parse_args() | |||||
| device_id = int(os.getenv('DEVICE_ID')) | |||||
| rank_id = int(os.getenv('RANK_ID')) | |||||
| rank_size = int(os.getenv('RANK_SIZE')) | |||||
| run_distribute = rank_size > 1 | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False) | |||||
| context.set_context(enable_task_sink=True) | |||||
| context.set_context(enable_loop_sink=True) | |||||
| context.set_context(enable_mem_reuse=True) | |||||
| class Monitor(Callback): | |||||
| """ | |||||
| Monitor loss and time. | |||||
| Args: | |||||
| lr_init (numpy array): train lr | |||||
| Returns: | |||||
| None. | |||||
| Examples: | |||||
| >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy()) | |||||
| """ | |||||
| def __init__(self, lr_init=None): | |||||
| super(Monitor, self).__init__() | |||||
| self.lr_init = lr_init | |||||
| self.lr_init_len = len(lr_init) | |||||
| def epoch_begin(self, run_context): | |||||
| self.losses = [] | |||||
| self.epoch_time = time.time() | |||||
| def epoch_end(self, run_context): | |||||
| cb_params = run_context.original_args() | |||||
| epoch_mseconds = (time.time() - self.epoch_time) * 1000 | |||||
| per_step_mseconds = epoch_mseconds / cb_params.batch_num | |||||
| print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds, | |||||
| per_step_mseconds, | |||||
| np.mean(self.losses) | |||||
| ), flush=True) | |||||
| def step_begin(self, run_context): | |||||
| self.step_time = time.time() | |||||
| def step_end(self, run_context): | |||||
| cb_params = run_context.original_args() | |||||
| step_mseconds = (time.time() - self.step_time) * 1000 | |||||
| step_loss = cb_params.net_outputs | |||||
| if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor): | |||||
| step_loss = step_loss[0] | |||||
| if isinstance(step_loss, Tensor): | |||||
| step_loss = np.mean(step_loss.asnumpy()) | |||||
| self.losses.append(step_loss) | |||||
| cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num | |||||
| print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format( | |||||
| cb_params.cur_epoch_num - 1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss, | |||||
| np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]), flush=True) | |||||
| if __name__ == '__main__': | |||||
| if run_distribute: | |||||
| context.set_context(enable_hccl=True) | |||||
| context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL, | |||||
| parameter_broadcast=True, mirror_mean=True) | |||||
| auto_parallel_context().set_all_reduce_fusion_split_indices([140]) | |||||
| init() | |||||
| else: | |||||
| context.set_context(enable_hccl=False) | |||||
| epoch_size = config.epoch_size | |||||
| net = mobilenet_v2(num_classes=config.num_classes) | |||||
| loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') | |||||
| print("train args: ", args_opt, "\ncfg: ", config, | |||||
| "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size)) | |||||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, | |||||
| repeat_num=epoch_size, batch_size=config.batch_size) | |||||
| step_size = dataset.get_dataset_size() | |||||
| loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) | |||||
| lr = Tensor(get_lr(global_step=0, lr_init=0, lr_end=0, lr_max=config.lr, | |||||
| warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size)) | |||||
| opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, | |||||
| config.weight_decay, config.loss_scale) | |||||
| model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale) | |||||
| cb = None | |||||
| if rank_id == 0: | |||||
| cb = [Monitor(lr_init=lr.asnumpy())] | |||||
| if config.save_checkpoint: | |||||
| config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * step_size, | |||||
| keep_checkpoint_max=config.keep_checkpoint_max) | |||||
| ckpt_cb = ModelCheckpoint(prefix="mobilenet", directory=config.save_checkpoint_path, config=config_ck) | |||||
| cb += [ckpt_cb] | |||||
| model.train(epoch_size, dataset, callbacks=cb) | |||||
| @@ -0,0 +1,135 @@ | |||||
| # ResNet101 Example | |||||
| ## Description | |||||
| This is an example of training ResNet101 with ImageNet dataset in MindSpore. | |||||
| ## Requirements | |||||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||||
| - Download the dataset ImageNet2012. | |||||
| > Unzip the ImageNet2012 dataset to any path you want, the folder should include train and eval dataset as follows: | |||||
| ``` | |||||
| . | |||||
| └─dataset | |||||
| ├─ilsvrc | |||||
| │ | |||||
| └─validation_preprocess | |||||
| ``` | |||||
| ## Example structure | |||||
| ```shell | |||||
| . | |||||
| ├── crossentropy.py # CrossEntropy loss function | |||||
| ├── config.py # parameter configuration | |||||
| ├── dataset.py # data preprocessing | |||||
| ├── eval.py # eval net | |||||
| ├── lr_generator.py # generate learning rate | |||||
| ├── run_distribute_train.sh # launch distributed training(8p) | |||||
| ├── run_infer.sh # launch evaluating | |||||
| ├── run_standalone_train.sh # launch standalone training(1p) | |||||
| └── train.py # train net | |||||
| ``` | |||||
| ## Parameter configuration | |||||
| Parameters for both training and evaluating can be set in config.py. | |||||
| ``` | |||||
| "class_num": 1001, # dataset class number | |||||
| "batch_size": 32, # batch size of input tensor | |||||
| "loss_scale": 1024, # loss scale | |||||
| "momentum": 0.9, # momentum optimizer | |||||
| "weight_decay": 1e-4, # weight decay | |||||
| "epoch_size": 120, # epoch sizes for training | |||||
| "buffer_size": 1000, # number of queue size in data preprocessing | |||||
| "image_height": 224, # image height | |||||
| "image_width": 224, # image width | |||||
| "save_checkpoint": True, # whether save checkpoint or not | |||||
| "save_checkpoint_steps": 500, # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step | |||||
| "keep_checkpoint_max": 10, # only keep the last keep_checkpoint_max checkpoint | |||||
| "save_checkpoint_path": "./", # path to save checkpoint relative to the executed path | |||||
| "warmup_epochs": 0, # number of warmup epoch | |||||
| "lr_decay_mode": "cosine" # decay mode for generating learning rate | |||||
| "label_smooth": 1, # label_smooth | |||||
| "label_smooth_factor": 0.1, # label_smooth_factor | |||||
| "lr": 0.1 # base learning rate | |||||
| ``` | |||||
| ## Running the example | |||||
| ### Train | |||||
| #### Usage | |||||
| ``` | |||||
| # distributed training | |||||
| sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH] | |||||
| # standalone training | |||||
| sh run_standalone_train.sh [DATASET_PATH] | |||||
| ``` | |||||
| #### Launch | |||||
| ```bash | |||||
| # distributed training example(8p) | |||||
| sh run_distribute_train.sh rank_table_8p.json dataset/ilsvrc | |||||
| # standalone training example(1p) | |||||
| sh run_standalone_train.sh dataset/ilsvrc | |||||
| ``` | |||||
| > About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). | |||||
| #### Result | |||||
| Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". You can find checkpoint file together with result like the followings in log. | |||||
| ``` | |||||
| # distribute training result(8p) | |||||
| epoch: 1 step: 5004, loss is 4.805483 | |||||
| epoch: 2 step: 5004, loss is 3.2121816 | |||||
| epoch: 3 step: 5004, loss is 3.429647 | |||||
| epoch: 4 step: 5004, loss is 3.3667371 | |||||
| epoch: 5 step: 5004, loss is 3.1718972 | |||||
| ... | |||||
| epoch: 67 step: 5004, loss is 2.2768745 | |||||
| epoch: 68 step: 5004, loss is 1.7223864 | |||||
| epoch: 69 step: 5004, loss is 2.0665488 | |||||
| epoch: 70 step: 5004, loss is 1.8717369 | |||||
| ... | |||||
| ``` | |||||
| ### Infer | |||||
| #### Usage | |||||
| ``` | |||||
| # infer | |||||
| sh run_infer.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH] | |||||
| ``` | |||||
| #### Launch | |||||
| ```bash | |||||
| # infer with checkpoint | |||||
| sh run_infer.sh dataset/validation_preprocess/ train_parallel0/resnet-120_5004.ckpt | |||||
| ``` | |||||
| > checkpoint can be produced in training process. | |||||
| #### Result | |||||
| Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log. | |||||
| ``` | |||||
| result: {'top_5_accuracy': 0.9429417413572343, 'top_1_accuracy': 0.7853513124199744} ckpt=train_parallel0/resnet-120_5004.ckpt | |||||
| ``` | |||||
| @@ -0,0 +1,39 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| network config setting, will be used in train.py and eval.py | |||||
| """ | |||||
| from easydict import EasyDict as ed | |||||
| config = ed({ | |||||
| "class_num": 1001, | |||||
| "batch_size": 32, | |||||
| "loss_scale": 1024, | |||||
| "momentum": 0.9, | |||||
| "weight_decay": 1e-4, | |||||
| "epoch_size": 120, | |||||
| "buffer_size": 1000, | |||||
| "image_height": 224, | |||||
| "image_width": 224, | |||||
| "save_checkpoint": True, | |||||
| "save_checkpoint_steps": 500, | |||||
| "keep_checkpoint_max": 10, | |||||
| "save_checkpoint_path": "./", | |||||
| "warmup_epochs": 0, | |||||
| "lr_decay_mode": "cosine", | |||||
| "label_smooth": 1, | |||||
| "label_smooth_factor": 0.1, | |||||
| "lr": 0.1 | |||||
| }) | |||||
| @@ -0,0 +1,36 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """define loss function for network""" | |||||
| from mindspore.nn.loss.loss import _Loss | |||||
| from mindspore.ops import operations as P | |||||
| from mindspore.ops import functional as F | |||||
| from mindspore import Tensor | |||||
| from mindspore.common import dtype as mstype | |||||
| import mindspore.nn as nn | |||||
| class CrossEntropy(_Loss): | |||||
| """the redefined loss function with SoftmaxCrossEntropyWithLogits""" | |||||
| def __init__(self, smooth_factor=0., num_classes=1001): | |||||
| super(CrossEntropy, self).__init__() | |||||
| self.onehot = P.OneHot() | |||||
| self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) | |||||
| self.off_value = Tensor(1.0 * smooth_factor / (num_classes -1), mstype.float32) | |||||
| self.ce = nn.SoftmaxCrossEntropyWithLogits() | |||||
| self.mean = P.ReduceMean(False) | |||||
| def construct(self, logit, label): | |||||
| one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) | |||||
| loss = self.ce(logit, one_hot_label) | |||||
| loss = self.mean(loss, 0) | |||||
| return loss | |||||
| @@ -0,0 +1,89 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| create train or eval dataset. | |||||
| """ | |||||
| import os | |||||
| import mindspore.common.dtype as mstype | |||||
| import mindspore.dataset.engine as de | |||||
| import mindspore.dataset.transforms.vision.c_transforms as C | |||||
| import mindspore.dataset.transforms.c_transforms as C2 | |||||
| from config import config | |||||
| def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): | |||||
| """ | |||||
| create a train or evaluate dataset | |||||
| Args: | |||||
| dataset_path(string): the path of dataset. | |||||
| do_train(bool): whether dataset is used for train or eval. | |||||
| repeat_num(int): the repeat times of dataset. Default: 1 | |||||
| batch_size(int): the batch size of dataset. Default: 32 | |||||
| Returns: | |||||
| dataset | |||||
| """ | |||||
| device_num = int(os.getenv("RANK_SIZE")) | |||||
| rank_id = int(os.getenv("RANK_ID")) | |||||
| if device_num == 1: | |||||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) | |||||
| else: | |||||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, | |||||
| num_shards=device_num, shard_id=rank_id) | |||||
| resize_height = 224 | |||||
| rescale = 1.0 / 255.0 | |||||
| shift = 0.0 | |||||
| # define map operations | |||||
| decode_op = C.Decode() | |||||
| random_resize_crop_op = C.RandomResizedCrop(resize_height, (0.08, 1.0), (0.75, 1.33), max_attempts=100) | |||||
| horizontal_flip_op = C.RandomHorizontalFlip(rank_id / (rank_id + 1)) | |||||
| resize_op_256 = C.Resize((256, 256)) | |||||
| center_crop = C.CenterCrop(224) | |||||
| rescale_op = C.Rescale(rescale, shift) | |||||
| normalize_op = C.Normalize((0.475, 0.451, 0.392), (0.275, 0.267, 0.278)) | |||||
| changeswap_op = C.HWC2CHW() | |||||
| trans = [] | |||||
| if do_train: | |||||
| trans = [decode_op, | |||||
| random_resize_crop_op, | |||||
| horizontal_flip_op, | |||||
| rescale_op, | |||||
| normalize_op, | |||||
| changeswap_op] | |||||
| else: | |||||
| trans = [decode_op, | |||||
| resize_op_256, | |||||
| center_crop, | |||||
| rescale_op, | |||||
| normalize_op, | |||||
| changeswap_op] | |||||
| type_cast_op = C2.TypeCast(mstype.int32) | |||||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) | |||||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) | |||||
| # apply shuffle operations | |||||
| ds = ds.shuffle(buffer_size=config.buffer_size) | |||||
| # apply batch operations | |||||
| ds = ds.batch(batch_size, drop_remainder=True) | |||||
| # apply dataset repeat operation | |||||
| ds = ds.repeat(repeat_num) | |||||
| return ds | |||||
| @@ -0,0 +1,84 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| eval. | |||||
| """ | |||||
| import os | |||||
| import argparse | |||||
| import random | |||||
| import numpy as np | |||||
| from dataset import create_dataset | |||||
| from config import config | |||||
| from mindspore import context | |||||
| from mindspore.model_zoo.resnet import resnet101 | |||||
| from mindspore.parallel._auto_parallel_context import auto_parallel_context | |||||
| from mindspore.train.model import Model, ParallelMode | |||||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||||
| import mindspore.dataset.engine as de | |||||
| from mindspore.communication.management import init | |||||
| from crossentropy import CrossEntropy | |||||
| random.seed(1) | |||||
| np.random.seed(1) | |||||
| de.config.set_seed(1) | |||||
| parser = argparse.ArgumentParser(description='Image classification') | |||||
| parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') | |||||
| parser.add_argument('--device_num', type=int, default=1, help='Device num.') | |||||
| parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.') | |||||
| parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.') | |||||
| parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') | |||||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||||
| args_opt = parser.parse_args() | |||||
| device_id = int(os.getenv('DEVICE_ID')) | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id) | |||||
| context.set_context(enable_task_sink=True) | |||||
| context.set_context(enable_loop_sink=True) | |||||
| context.set_context(enable_mem_reuse=True) | |||||
| if __name__ == '__main__': | |||||
| if args_opt.do_eval: | |||||
| context.set_context(enable_hccl=False) | |||||
| else: | |||||
| if args_opt.run_distribute: | |||||
| context.set_context(enable_hccl=True) | |||||
| context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, | |||||
| mirror_mean=True, parameter_broadcast=True) | |||||
| auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313]) | |||||
| init() | |||||
| else: | |||||
| context.set_context(enable_hccl=False) | |||||
| epoch_size = config.epoch_size | |||||
| net = resnet101(class_num=config.class_num) | |||||
| if not config.label_smooth: | |||||
| config.label_smooth_factor = 0.0 | |||||
| loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) | |||||
| if args_opt.do_eval: | |||||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size) | |||||
| step_size = dataset.get_dataset_size() | |||||
| if args_opt.checkpoint_path: | |||||
| param_dict = load_checkpoint(args_opt.checkpoint_path) | |||||
| load_param_into_net(net, param_dict) | |||||
| net.set_train(False) | |||||
| model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) | |||||
| res = model.eval(dataset) | |||||
| print("result:", res, "ckpt=", args_opt.checkpoint_path) | |||||
| @@ -0,0 +1,52 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """learning rate generator""" | |||||
| import math | |||||
| import numpy as np | |||||
| def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr): | |||||
| lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps) | |||||
| lr = float(init_lr) + lr_inc * current_step | |||||
| return lr | |||||
| def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch): | |||||
| """ | |||||
| generate learning rate array with cosine | |||||
| Args: | |||||
| lr(float): base learning rate | |||||
| steps_per_epoch(int): steps size of one epoch | |||||
| warmup_epochs(int): number of warmup epochs | |||||
| max_epoch(int): total epochs of training | |||||
| Returns: | |||||
| np.array, learning rate array | |||||
| """ | |||||
| base_lr = lr | |||||
| warmup_init_lr = 0 | |||||
| total_steps = int(max_epoch * steps_per_epoch) | |||||
| warmup_steps = int(warmup_epochs * steps_per_epoch) | |||||
| decay_steps = total_steps - warmup_steps | |||||
| lr_each_step = [] | |||||
| for i in range(total_steps): | |||||
| if i < warmup_steps: | |||||
| lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr) | |||||
| else: | |||||
| linear_decay = (total_steps - i) / decay_steps | |||||
| cosine_decay = 0.5 * (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps)) | |||||
| decayed = linear_decay * cosine_decay + 0.00001 | |||||
| lr = base_lr * decayed | |||||
| lr_each_step.append(lr) | |||||
| return np.array(lr_each_step).astype(np.float32) | |||||
| @@ -0,0 +1,66 @@ | |||||
| #!/bin/bash | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| if [ $# != 2 ] | |||||
| then | |||||
| echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]" | |||||
| exit 1 | |||||
| fi | |||||
| get_real_path(){ | |||||
| if [ "${1:0:1}" == "/" ]; then | |||||
| echo "$1" | |||||
| else | |||||
| echo "$(realpath -m $PWD/$1)" | |||||
| fi | |||||
| } | |||||
| PATH1=$(get_real_path $1) | |||||
| PATH2=$(get_real_path $2) | |||||
| echo $PATH1 | |||||
| echo $PATH2 | |||||
| if [ ! -f $PATH1 ] | |||||
| then | |||||
| echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file" | |||||
| exit 1 | |||||
| fi | |||||
| if [ ! -d $PATH2 ] | |||||
| then | |||||
| echo "error: DATASET_PATH=$PATH2 is not a directory" | |||||
| exit 1 | |||||
| fi | |||||
| ulimit -u unlimited | |||||
| export DEVICE_NUM=8 | |||||
| export RANK_SIZE=8 | |||||
| export MINDSPORE_HCCL_CONFIG_PATH=$PATH1 | |||||
| export RANK_TABLE_FILE=$PATH1 | |||||
| for((i=0; i<${DEVICE_NUM}; i++)) | |||||
| do | |||||
| export DEVICE_ID=$i | |||||
| export RANK_ID=$i | |||||
| rm -rf ./train_parallel$i | |||||
| mkdir ./train_parallel$i | |||||
| cp *.py ./train_parallel$i | |||||
| cp *.sh ./train_parallel$i | |||||
| cd ./train_parallel$i || exit | |||||
| echo "start training for rank $RANK_ID, device $DEVICE_ID" | |||||
| env > env.log | |||||
| python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log & | |||||
| cd .. | |||||
| done | |||||
| @@ -0,0 +1,64 @@ | |||||
| #!/bin/bash | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| if [ $# != 2 ] | |||||
| then | |||||
| echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]" | |||||
| exit 1 | |||||
| fi | |||||
| get_real_path(){ | |||||
| if [ "${1:0:1}" == "/" ]; then | |||||
| echo "$1" | |||||
| else | |||||
| echo "$(realpath -m $PWD/$1)" | |||||
| fi | |||||
| } | |||||
| PATH1=$(get_real_path $1) | |||||
| PATH2=$(get_real_path $2) | |||||
| echo $PATH1 | |||||
| echo $PATH2 | |||||
| if [ ! -d $PATH1 ] | |||||
| then | |||||
| echo "error: DATASET_PATH=$PATH1 is not a directory" | |||||
| exit 1 | |||||
| fi | |||||
| if [ ! -f $PATH2 ] | |||||
| then | |||||
| echo "error: CHECKPOINT_PATH=$PATH2 is not a file" | |||||
| exit 1 | |||||
| fi | |||||
| ulimit -u unlimited | |||||
| export DEVICE_NUM=1 | |||||
| export DEVICE_ID=0 | |||||
| export RANK_SIZE=$DEVICE_NUM | |||||
| export RANK_ID=0 | |||||
| if [ -d "infer" ]; | |||||
| then | |||||
| rm -rf ./infer | |||||
| fi | |||||
| mkdir ./infer | |||||
| cp *.py ./infer | |||||
| cp *.sh ./infer | |||||
| cd ./infer || exit | |||||
| env > env.log | |||||
| echo "start infering for device $DEVICE_ID" | |||||
| python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log & | |||||
| cd .. | |||||
| @@ -0,0 +1,56 @@ | |||||
| #!/bin/bash | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| if [ $# != 1 ] | |||||
| then | |||||
| echo "Usage: sh run_standalone_train.sh [DATASET_PATH]" | |||||
| exit 1 | |||||
| fi | |||||
| get_real_path(){ | |||||
| if [ "${1:0:1}" == "/" ]; then | |||||
| echo "$1" | |||||
| else | |||||
| echo "$(realpath -m $PWD/$1)" | |||||
| fi | |||||
| } | |||||
| PATH1=$(get_real_path $1) | |||||
| echo $PATH1 | |||||
| if [ ! -d $PATH1 ] | |||||
| then | |||||
| echo "error: DATASET_PATH=$PATH1 is not a directory" | |||||
| exit 1 | |||||
| fi | |||||
| ulimit -u unlimited | |||||
| export DEVICE_NUM=1 | |||||
| export DEVICE_ID=0 | |||||
| export RANK_ID=0 | |||||
| export RANK_SIZE=1 | |||||
| if [ -d "train" ]; | |||||
| then | |||||
| rm -rf ./train | |||||
| fi | |||||
| mkdir ./train | |||||
| cp *.py ./train | |||||
| cp *.sh ./train | |||||
| cd ./train || exit | |||||
| echo "start training for device $DEVICE_ID" | |||||
| env > env.log | |||||
| python train.py --do_train=True --dataset_path=$PATH1 &> log & | |||||
| cd .. | |||||
| @@ -0,0 +1,103 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """train_imagenet.""" | |||||
| import os | |||||
| import argparse | |||||
| import random | |||||
| import numpy as np | |||||
| from dataset import create_dataset | |||||
| from lr_generator import warmup_cosine_annealing_lr | |||||
| from config import config | |||||
| from mindspore import context | |||||
| from mindspore import Tensor | |||||
| from mindspore.model_zoo.resnet import resnet101 | |||||
| from mindspore.parallel._auto_parallel_context import auto_parallel_context | |||||
| from mindspore.nn.optim.momentum import Momentum | |||||
| from mindspore.train.model import Model, ParallelMode | |||||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor | |||||
| from mindspore.train.loss_scale_manager import FixedLossScaleManager | |||||
| import mindspore.dataset.engine as de | |||||
| from mindspore.communication.management import init | |||||
| import mindspore.nn as nn | |||||
| import mindspore.common.initializer as weight_init | |||||
| from crossentropy import CrossEntropy | |||||
| random.seed(1) | |||||
| np.random.seed(1) | |||||
| de.config.set_seed(1) | |||||
| parser = argparse.ArgumentParser(description='Image classification') | |||||
| parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') | |||||
| parser.add_argument('--device_num', type=int, default=1, help='Device num.') | |||||
| parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.') | |||||
| parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.') | |||||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||||
| args_opt = parser.parse_args() | |||||
| device_id = int(os.getenv('DEVICE_ID')) | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id) | |||||
| context.set_context(enable_task_sink=True) | |||||
| context.set_context(enable_loop_sink=True) | |||||
| context.set_context(enable_mem_reuse=True) | |||||
| if __name__ == '__main__': | |||||
| if args_opt.do_eval: | |||||
| context.set_context(enable_hccl=False) | |||||
| else: | |||||
| if args_opt.run_distribute: | |||||
| context.set_context(enable_hccl=True) | |||||
| context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, | |||||
| mirror_mean=True, parameter_broadcast=True) | |||||
| auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313]) | |||||
| init() | |||||
| else: | |||||
| context.set_context(enable_hccl=False) | |||||
| epoch_size = config.epoch_size | |||||
| net = resnet101(class_num=config.class_num) | |||||
| # weight init | |||||
| for _, cell in net.cells_and_names(): | |||||
| if isinstance(cell, nn.Conv2d): | |||||
| cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(), | |||||
| cell.weight.default_input.shape(), | |||||
| cell.weight.default_input.dtype()) | |||||
| if isinstance(cell, nn.Dense): | |||||
| cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(), | |||||
| cell.weight.default_input.shape(), | |||||
| cell.weight.default_input.dtype()) | |||||
| if not config.label_smooth: | |||||
| config.label_smooth_factor = 0.0 | |||||
| loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) | |||||
| if args_opt.do_train: | |||||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, | |||||
| repeat_num=epoch_size, batch_size=config.batch_size) | |||||
| step_size = dataset.get_dataset_size() | |||||
| loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) | |||||
| # learning rate strategy with cosine | |||||
| lr = Tensor(warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size)) | |||||
| opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, | |||||
| config.weight_decay, config.loss_scale) | |||||
| model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) | |||||
| time_cb = TimeMonitor(data_size=step_size) | |||||
| loss_cb = LossMonitor() | |||||
| cb = [time_cb, loss_cb] | |||||
| if config.save_checkpoint: | |||||
| config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps, | |||||
| keep_checkpoint_max=config.keep_checkpoint_max) | |||||
| ckpt_cb = ModelCheckpoint(prefix="resnet", directory=config.save_checkpoint_path, config=config_ck) | |||||
| cb += [ckpt_cb] | |||||
| model.train(epoch_size, dataset, callbacks=cb) | |||||
| @@ -8,7 +8,7 @@ This is an example of training ResNet-50 with CIFAR-10 dataset in MindSpore. | |||||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | - Install [MindSpore](https://www.mindspore.cn/install/en). | ||||
| - Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz). | |||||
| - Download the dataset CIFAR-10. | |||||
| > Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows: | > Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows: | ||||
| > ``` | > ``` | ||||
| @@ -40,9 +40,9 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): | |||||
| rank_id = int(os.getenv("RANK_ID")) | rank_id = int(os.getenv("RANK_ID")) | ||||
| if device_num == 1: | if device_num == 1: | ||||
| ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=4, shuffle=True) | |||||
| ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True) | |||||
| else: | else: | ||||
| ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=4, shuffle=True, | |||||
| ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True, | |||||
| num_shards=device_num, shard_id=rank_id) | num_shards=device_num, shard_id=rank_id) | ||||
| resize_height = config.image_height | resize_height = config.image_height | ||||
| @@ -68,11 +68,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): | |||||
| type_cast_op = C2.TypeCast(mstype.int32) | type_cast_op = C2.TypeCast(mstype.int32) | ||||
| ds = ds.map(input_columns="label", operations=type_cast_op) | |||||
| ds = ds.map(input_columns="image", operations=trans) | |||||
| # apply shuffle operations | |||||
| ds = ds.shuffle(buffer_size=config.buffer_size) | |||||
| ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) | |||||
| ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) | |||||
| # apply batch operations | # apply batch operations | ||||
| ds = ds.batch(batch_size, drop_remainder=True) | ds = ds.batch(batch_size, drop_remainder=True) | ||||
| @@ -22,7 +22,7 @@ fi | |||||
| if [ ! -f $1 ] | if [ ! -f $1 ] | ||||
| then | then | ||||
| echo "error: DMINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" | |||||
| echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" | |||||
| exit 1 | exit 1 | ||||
| fi | fi | ||||
| @@ -36,6 +36,7 @@ ulimit -u unlimited | |||||
| export DEVICE_NUM=8 | export DEVICE_NUM=8 | ||||
| export RANK_SIZE=8 | export RANK_SIZE=8 | ||||
| export MINDSPORE_HCCL_CONFIG_PATH=$1 | export MINDSPORE_HCCL_CONFIG_PATH=$1 | ||||
| export RANK_TABLE_FILE=$1 | |||||
| for((i=0; i<${DEVICE_NUM}; i++)) | for((i=0; i<${DEVICE_NUM}; i++)) | ||||
| do | do | ||||
| @@ -61,14 +61,14 @@ if __name__ == '__main__': | |||||
| context.set_context(enable_hccl=True) | context.set_context(enable_hccl=True) | ||||
| context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, | context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, | ||||
| mirror_mean=True) | mirror_mean=True) | ||||
| auto_parallel_context().set_all_reduce_fusion_split_indices([140]) | |||||
| auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160]) | |||||
| init() | init() | ||||
| else: | else: | ||||
| context.set_context(enable_hccl=False) | context.set_context(enable_hccl=False) | ||||
| epoch_size = config.epoch_size | epoch_size = config.epoch_size | ||||
| net = resnet50(class_num=config.class_num) | net = resnet50(class_num=config.class_num) | ||||
| loss = SoftmaxCrossEntropyWithLogits(sparse=True) | |||||
| loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") | |||||
| if args_opt.do_train: | if args_opt.do_train: | ||||
| @@ -8,7 +8,7 @@ This example is for VGG16 model training and evaluation. | |||||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | - Install [MindSpore](https://www.mindspore.cn/install/en). | ||||
| - Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz). | |||||
| - Download the CIFAR-10 binary version dataset. | |||||
| > Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows: | > Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows: | ||||
| > ``` | > ``` | ||||
| @@ -49,6 +49,24 @@ You will get the accuracy as following: | |||||
| result: {'acc': 0.92} | result: {'acc': 0.92} | ||||
| ``` | ``` | ||||
| ### Distribute Training | |||||
| ``` | |||||
| sh run_distribute_train.sh rank_table.json your_data_path | |||||
| ``` | |||||
| The above shell script will run distribute training in the background, you can view the results through the file `train_parallel[X]/log`. | |||||
| You will get the loss value as following: | |||||
| ``` | |||||
| # grep "result: " train_parallel*/log | |||||
| train_parallel0/log:epoch: 1 step: 97, loss is 1.9060308 | |||||
| train_parallel0/log:epcoh: 2 step: 97, loss is 1.6003821 | |||||
| ... | |||||
| train_parallel1/log:epoch: 1 step: 97, loss is 1.7095519 | |||||
| train_parallel1/log:epcoh: 2 step: 97, loss is 1.7133579 | |||||
| ... | |||||
| ... | |||||
| ``` | |||||
| > About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). | |||||
| ## Usage: | ## Usage: | ||||
| @@ -75,4 +93,14 @@ parameters/options: | |||||
| --data_path the storage path of datasetd | --data_path the storage path of datasetd | ||||
| --device_id the device which used to evaluate model. | --device_id the device which used to evaluate model. | ||||
| --checkpoint_path the checkpoint file path used to evaluate model. | --checkpoint_path the checkpoint file path used to evaluate model. | ||||
| ``` | |||||
| ``` | |||||
| ### Distribute Training | |||||
| ``` | |||||
| Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH] | |||||
| parameters/options: | |||||
| MINDSPORE_HCCL_CONFIG_PATH HCCL configuration file path. | |||||
| DATA_PATH the storage path of dataset. | |||||
| ``` | |||||
| @@ -28,7 +28,11 @@ def create_dataset(data_home, repeat_num=1, training=True): | |||||
| data_dir = os.path.join(data_home, "cifar-10-batches-bin") | data_dir = os.path.join(data_home, "cifar-10-batches-bin") | ||||
| if not training: | if not training: | ||||
| data_dir = os.path.join(data_home, "cifar-10-verify-bin") | data_dir = os.path.join(data_home, "cifar-10-verify-bin") | ||||
| data_set = ds.Cifar10Dataset(data_dir) | |||||
| rank_size = int(os.environ.get("RANK_SIZE")) if os.environ.get("RANK_SIZE") else None | |||||
| rank_id = int(os.environ.get("RANK_ID")) if os.environ.get("RANK_ID") else None | |||||
| data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id) | |||||
| resize_height = cfg.image_height | resize_height = cfg.image_height | ||||
| resize_width = cfg.image_width | resize_width = cfg.image_width | ||||
| rescale = 1.0 / 255.0 | rescale = 1.0 / 255.0 | ||||
| @@ -0,0 +1,54 @@ | |||||
| #!/bin/bash | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| if [ $# != 2 ] | |||||
| then | |||||
| echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]" | |||||
| exit 1 | |||||
| fi | |||||
| if [ ! -f $1 ] | |||||
| then | |||||
| echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" | |||||
| exit 1 | |||||
| fi | |||||
| if [ ! -d $2 ] | |||||
| then | |||||
| echo "error: DATA_PATH=$2 is not a directory" | |||||
| exit 1 | |||||
| fi | |||||
| ulimit -u unlimited | |||||
| export DEVICE_NUM=8 | |||||
| export RANK_SIZE=8 | |||||
| export MINDSPORE_HCCL_CONFIG_PATH=$1 | |||||
| export RANK_TABLE_FILE=$1 | |||||
| for((i=0; i<${DEVICE_NUM}; i++)) | |||||
| do | |||||
| export DEVICE_ID=$i | |||||
| export RANK_ID=$i | |||||
| rm -rf ./train_parallel$i | |||||
| mkdir ./train_parallel$i | |||||
| cp *.py ./train_parallel$i | |||||
| cp *.sh ./train_parallel$i | |||||
| cd ./train_parallel$i || exit | |||||
| echo "start training for rank $RANK_ID, device $DEVICE_ID" | |||||
| env > env.log | |||||
| python train.py --data_path=$2 --device_id=$i &> log & | |||||
| cd .. | |||||
| done | |||||
| @@ -17,16 +17,18 @@ | |||||
| python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID | python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID | ||||
| """ | """ | ||||
| import argparse | import argparse | ||||
| import os | |||||
| import random | import random | ||||
| import numpy as np | import numpy as np | ||||
| import mindspore.nn as nn | import mindspore.nn as nn | ||||
| from mindspore import Tensor | from mindspore import Tensor | ||||
| from mindspore.communication.management import init | |||||
| from mindspore.nn.optim.momentum import Momentum | from mindspore.nn.optim.momentum import Momentum | ||||
| from mindspore.train.model import Model | |||||
| from mindspore.train.model import Model, ParallelMode | |||||
| from mindspore import context | from mindspore import context | ||||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor | |||||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor | |||||
| from mindspore.model_zoo.vgg import vgg16 | from mindspore.model_zoo.vgg import vgg16 | ||||
| import dataset | |||||
| from dataset import create_dataset | |||||
| from config import cifar_cfg as cfg | from config import cifar_cfg as cfg | ||||
| random.seed(1) | random.seed(1) | ||||
| np.random.seed(1) | np.random.seed(1) | ||||
| @@ -62,17 +64,30 @@ if __name__ == '__main__': | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) | context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) | ||||
| context.set_context(device_id=args_opt.device_id) | context.set_context(device_id=args_opt.device_id) | ||||
| context.set_context(enable_task_sink=True) | |||||
| context.set_context(enable_loop_sink=True) | |||||
| context.set_context(enable_mem_reuse=True, enable_hccl=False) | context.set_context(enable_mem_reuse=True, enable_hccl=False) | ||||
| device_num = int(os.environ.get("DEVICE_NUM", 1)) | |||||
| if device_num > 1: | |||||
| context.reset_auto_parallel_context() | |||||
| context.set_context(enable_hccl=True) | |||||
| context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, | |||||
| mirror_mean=True) | |||||
| init() | |||||
| dataset = create_dataset(args_opt.data_path, cfg.epoch_size) | |||||
| batch_num = dataset.get_dataset_size() | |||||
| net = vgg16(num_classes=cfg.num_classes) | net = vgg16(num_classes=cfg.num_classes) | ||||
| lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=50000 // cfg.batch_size) | |||||
| lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num) | |||||
| opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay) | opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay) | ||||
| loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) | loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) | ||||
| model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) | model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) | ||||
| dataset = dataset.create_dataset(args_opt.data_path, cfg.epoch_size) | |||||
| batch_num = dataset.get_dataset_size() | |||||
| config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 5, keep_checkpoint_max=cfg.keep_checkpoint_max) | config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 5, keep_checkpoint_max=cfg.keep_checkpoint_max) | ||||
| time_cb = TimeMonitor(data_size=batch_num) | |||||
| ckpoint_cb = ModelCheckpoint(prefix="train_vgg_cifar10", directory="./", config=config_ck) | ckpoint_cb = ModelCheckpoint(prefix="train_vgg_cifar10", directory="./", config=config_ck) | ||||
| loss_cb = LossMonitor() | loss_cb = LossMonitor() | ||||
| model.train(cfg.epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb]) | |||||
| model.train(cfg.epoch_size, dataset, callbacks=[time_cb, ckpoint_cb, loss_cb]) | |||||
| print("train success") | |||||
| @@ -0,0 +1,94 @@ | |||||
| # YOLOv3 Example | |||||
| ## Description | |||||
| YOLOv3 network based on ResNet-18, with support for training and evaluation. | |||||
| ## Requirements | |||||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||||
| - Dataset | |||||
| We use coco2017 as training dataset. | |||||
| 1. The directory structure is as follows: | |||||
| > ``` | |||||
| > . | |||||
| > ├── annotations # annotation jsons | |||||
| > ├── train2017 # train dataset | |||||
| > └── val2017 # infer dataset | |||||
| > ``` | |||||
| 2. Organize the dataset infomation into a TXT file, each row in the file is as follows: | |||||
| ``` | |||||
| train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2 | |||||
| ``` | |||||
| Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. `dataset.py` is the parsing script, we read image from an image path joined by the `image_dir`(dataset directory) and the relative path in `anno_path`(the TXT file path), `image_dir` and `anno_path` are external inputs. | |||||
| ## Running the Example | |||||
| ### Training | |||||
| To train the model, run `train.py` with the dataset `image_dir`, `anno_path` and `mindrecord_dir`. If the `mindrecord_dir` is empty, it wil generate [mindrecord](https://www.mindspore.cn/tutorial/en/master/use/data_preparation/converting_datasets.html) file by `image_dir` and `anno_path`(the absolute image path is joined by the `image_dir` and the relative path in `anno_path`). **Note if `mindrecord_dir` isn't empty, it will use `mindrecord_dir` rather than `image_dir` and `anno_path`.** | |||||
| - Stand alone mode | |||||
| ``` | |||||
| sh run_standalone_train.sh 0 50 ./Mindrecord_train ./dataset ./dataset/train.txt | |||||
| ``` | |||||
| The input variables are device id, epoch size, mindrecord directory path, dataset directory path and train TXT file path. | |||||
| - Distributed mode | |||||
| ``` | |||||
| sh run_distribute_train.sh 8 150 /data/Mindrecord_train /data /data/train.txt /data/hccl.json | |||||
| ``` | |||||
| The input variables are device numbers, epoch size, mindrecord directory path, dataset directory path, train TXT file path and [hccl json configuration file](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). **It is better to use absolute path.** | |||||
| You will get the loss value and time of each step as following: | |||||
| ``` | |||||
| epoch: 145 step: 156, loss is 12.202981 | |||||
| epoch time: 25599.22742843628, per step time: 164.0976117207454 | |||||
| epoch: 146 step: 156, loss is 16.91706 | |||||
| epoch time: 23199.971675872803, per step time: 148.7177671530308 | |||||
| epoch: 147 step: 156, loss is 13.04007 | |||||
| epoch time: 23801.95164680481, per step time: 152.57661312054364 | |||||
| epoch: 148 step: 156, loss is 10.431475 | |||||
| epoch time: 23634.241580963135, per step time: 151.50154859591754 | |||||
| epoch: 149 step: 156, loss is 14.665991 | |||||
| epoch time: 24118.8325881958, per step time: 154.60790120638333 | |||||
| epoch: 150 step: 156, loss is 10.779521 | |||||
| epoch time: 25319.57221031189, per step time: 162.30495006610187 | |||||
| ``` | |||||
| Note the results is two-classification(person and face) used our own annotations with coco2017, you can change `num_classes` in `config.py` to train your dataset. And we will suport 80 classifications in coco2017 the near future. | |||||
| ### Evaluation | |||||
| To eval, run `eval.py` with the dataset `image_dir`, `anno_path`(eval txt), `mindrecord_dir` and `ckpt_path`. `ckpt_path` is the path of [checkpoint](https://www.mindspore.cn/tutorial/en/master/use/saving_and_loading_model_parameters.html) file. | |||||
| ``` | |||||
| sh run_eval.sh 0 yolo.ckpt ./Mindrecord_eval ./dataset ./dataset/eval.txt | |||||
| ``` | |||||
| The input variables are device id, checkpoint path, mindrecord directory path, dataset directory path and train TXT file path. | |||||
| You will get the precision and recall value of each class: | |||||
| ``` | |||||
| class 0 precision is 88.18%, recall is 66.00% | |||||
| class 1 precision is 85.34%, recall is 79.13% | |||||
| ``` | |||||
| Note the precision and recall values are results of two-classification(person and face) used our own annotations with coco2017. | |||||
| @@ -13,51 +13,6 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| """__init__""" | """__init__""" | ||||
| from __future__ import absolute_import as _abs | |||||
| import sys | |||||
| import os | |||||
| def AKGAddPath(): | |||||
| """_akg add path.""" | |||||
| pwd = os.path.dirname(os.path.realpath(__file__)) | |||||
| tvm_path = os.path.realpath(pwd) | |||||
| if tvm_path not in sys.path: | |||||
| sys.path.insert(0, tvm_path) | |||||
| else: | |||||
| sys.path.remove(tvm_path) | |||||
| sys.path.insert(0, tvm_path) | |||||
| class AKGMetaPathFinder: | |||||
| """class AKGMetaPath finder.""" | |||||
| def find_module(self, fullname, path=None): | |||||
| """method _akg find module.""" | |||||
| if fullname.startswith("_akg.tvm"): | |||||
| rname = fullname[5:] | |||||
| return AKGMetaPathLoader(rname) | |||||
| if fullname.startswith("_akg.topi"): | |||||
| rname = fullname[5:] | |||||
| return AKGMetaPathLoader(rname) | |||||
| return None | |||||
| class AKGMetaPathLoader: | |||||
| """class AKGMetaPathLoader loader.""" | |||||
| def __init__(self, rname): | |||||
| self.__rname = rname | |||||
| def load_module(self, fullname): | |||||
| if self.__rname in sys.modules: | |||||
| sys.modules.pop(self.__rname) | |||||
| AKGAddPath() | |||||
| __import__(self.__rname, globals(), locals()) | |||||
| self.__target_module = sys.modules[self.__rname] | |||||
| sys.modules[fullname] = self.__target_module | |||||
| return self.__target_module | |||||
| sys.meta_path.insert(0, AKGMetaPathFinder()) | |||||
| from . import add_path | |||||
| from .op_build import op_build | from .op_build import op_build | ||||
| from .message import compilewithjson | from .message import compilewithjson | ||||
| @@ -0,0 +1,61 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| """add tvm path""" | |||||
| import sys | |||||
| import os | |||||
| def AKGAddPath(): | |||||
| """_akg add path.""" | |||||
| pwd = os.path.dirname(os.path.realpath(__file__)) | |||||
| tvm_path = os.path.realpath(pwd) | |||||
| if tvm_path not in sys.path: | |||||
| sys.path.insert(0, tvm_path) | |||||
| else: | |||||
| sys.path.remove(tvm_path) | |||||
| sys.path.insert(0, tvm_path) | |||||
| class AKGMetaPathFinder: | |||||
| """class AKGMetaPath finder.""" | |||||
| def find_module(self, fullname, path=None): | |||||
| """method _akg find module.""" | |||||
| if fullname.startswith("_akg.tvm"): | |||||
| rname = fullname[5:] | |||||
| return AKGMetaPathLoader(rname) | |||||
| if fullname.startswith("_akg.topi"): | |||||
| rname = fullname[5:] | |||||
| return AKGMetaPathLoader(rname) | |||||
| return None | |||||
| class AKGMetaPathLoader: | |||||
| """class AKGMetaPathLoader loader.""" | |||||
| def __init__(self, rname): | |||||
| self.__rname = rname | |||||
| def load_module(self, fullname): | |||||
| if self.__rname in sys.modules: | |||||
| sys.modules.pop(self.__rname) | |||||
| AKGAddPath() | |||||
| __import__(self.__rname, globals(), locals()) | |||||
| self.__target_module = sys.modules[self.__rname] | |||||
| sys.modules[fullname] = self.__target_module | |||||
| return self.__target_module | |||||
| sys.meta_path.insert(0, AKGMetaPathFinder()) | |||||
| @@ -122,10 +122,12 @@ def get_args(op_info, arg_type): | |||||
| elif arg_type == 'attrs': | elif arg_type == 'attrs': | ||||
| for item in op_info[arg_type]: | for item in op_info[arg_type]: | ||||
| if 'value' not in item: | |||||
| raise ValueError("Json string Errors, attr key:value not found.") | |||||
| if item["name"] != "isRef": | |||||
| args.append(item['value']) | |||||
| if item["valid"]: | |||||
| if 'value' not in item: | |||||
| raise ValueError("Json string Errors, attr key:value not found.") | |||||
| if item["name"] != "isRef": | |||||
| args.append(item['value']) | |||||
| return args | return args | ||||
| @@ -91,6 +91,14 @@ void PrintNodeInputType(std::ostringstream &buffer, const AnfNodePtr &nd) { | |||||
| } | } | ||||
| } | } | ||||
| void PrintInputAndOutputInferType(std::ostringstream &buffer, const AnfNodePtr &nd) { | |||||
| buffer << " : ("; | |||||
| PrintNodeInputType(buffer, nd); | |||||
| buffer << ") -> ("; | |||||
| PrintNodeOutputType(buffer, nd); | |||||
| buffer << ")"; | |||||
| } | |||||
| struct SubGraphIRInfo { | struct SubGraphIRInfo { | ||||
| int32_t local_var; | int32_t local_var; | ||||
| std::ostringstream buffer; | std::ostringstream buffer; | ||||
| @@ -18,12 +18,14 @@ | |||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| #include "ir/dtype/type.h" | |||||
| #include "ir/anf.h" | #include "ir/anf.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| constexpr char PARALLEL_STRATEGY[] = "strategy"; | constexpr char PARALLEL_STRATEGY[] = "strategy"; | ||||
| void DumpIR(const std::string &filename, const FuncGraphPtr &func_graph, bool dump_full_name = false); | void DumpIR(const std::string &filename, const FuncGraphPtr &func_graph, bool dump_full_name = false); | ||||
| void PrintInputAndOutputInferType(std::ostringstream &buffer, const AnfNodePtr &nd); | |||||
| const std::string ToShortString(const TypeId &typeId); | |||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_CCSRC_DEBUG_ANF_IR_DUMP_H_ | #endif // MINDSPORE_CCSRC_DEBUG_ANF_IR_DUMP_H_ | ||||
| @@ -134,7 +134,7 @@ class DebugInfo : public Base { | |||||
| explicit DebugInfo(const LocationPtr &loc); | explicit DebugInfo(const LocationPtr &loc); | ||||
| virtual ~DebugInfo() = default; | |||||
| ~DebugInfo() override = default; | |||||
| MS_DECLARE_PARENT(DebugInfo, Base); | MS_DECLARE_PARENT(DebugInfo, Base); | ||||
| int64_t debug_id(); | int64_t debug_id(); | ||||
| int64_t unique_id() const { return unique_id_; } | int64_t unique_id() const { return unique_id_; } | ||||
| @@ -231,10 +231,10 @@ std::string AnalyzedFuncGraphExporter::GetNodeType(const AnfNodePtr &node) { | |||||
| auto engine = node_cfg_->engine(); | auto engine = node_cfg_->engine(); | ||||
| auto cfg = engine->MakeConfig(node, ctx); | auto cfg = engine->MakeConfig(node, ctx); | ||||
| auto abs = engine->cache().GetValue(cfg); | auto abs = engine->cache().GetValue(cfg); | ||||
| if (abs == nullptr) { | if (abs == nullptr) { | ||||
| return "Undefined"; | return "Undefined"; | ||||
| } | } | ||||
| auto dtype = abs->BuildType(); | auto dtype = abs->BuildType(); | ||||
| auto shape = abs->BuildShape(); | auto shape = abs->BuildShape(); | ||||
| std::ostringstream oss; | std::ostringstream oss; | ||||
| @@ -321,7 +321,7 @@ class TraceTransform : public TraceInfo { | |||||
| std::string full_name() override { return full_name_ + transform_name_; } | std::string full_name() override { return full_name_ + transform_name_; } | ||||
| MS_DECLARE_PARENT(TraceTransform, TraceInfo); | MS_DECLARE_PARENT(TraceTransform, TraceInfo); | ||||
| virtual std::string symbol() { | |||||
| std::string symbol() override { | |||||
| if (transform_name_.empty()) { | if (transform_name_.empty()) { | ||||
| return ""; | return ""; | ||||
| } | } | ||||
| @@ -18,14 +18,15 @@ | |||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| #include <memory> | #include <memory> | ||||
| #include <set> | |||||
| #include <unordered_map> | |||||
| #include <utility> | |||||
| #include <map> | |||||
| #include "kernel/oplib/oplib.h" | #include "kernel/oplib/oplib.h" | ||||
| #include "kernel/kernel_query.h" | #include "kernel/kernel_query.h" | ||||
| #include "session/anf_runtime_algorithm.h" | #include "session/anf_runtime_algorithm.h" | ||||
| #include "kernel/kernel_build_info.h" | #include "kernel/kernel_build_info.h" | ||||
| #include "utils/context/ms_context.h" | #include "utils/context/ms_context.h" | ||||
| #include "operator/ops.h" | #include "operator/ops.h" | ||||
| #include "debug/anf_ir_dump.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| @@ -180,6 +181,7 @@ void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, co | |||||
| } | } | ||||
| void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector<int> *support_index) { | void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector<int> *support_index) { | ||||
| MS_EXCEPTION_IF_NULL(support_index); | |||||
| int index = kUnSupportMixedDataTypeIndex; | int index = kUnSupportMixedDataTypeIndex; | ||||
| switch (data_type) { | switch (data_type) { | ||||
| case kNumberTypeFloat16: | case kNumberTypeFloat16: | ||||
| @@ -197,6 +199,7 @@ void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector<int> *s | |||||
| void AddKernelInputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t input_index, | void AddKernelInputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t input_index, | ||||
| std::vector<int> *support_datatype_index, std::vector<TypeId> *support_datatype) { | std::vector<int> *support_datatype_index, std::vector<TypeId> *support_datatype) { | ||||
| MS_EXCEPTION_IF_NULL(support_datatype); | |||||
| auto data_type = kernel_build_info.GetInputDeviceType(input_index); | auto data_type = kernel_build_info.GetInputDeviceType(input_index); | ||||
| support_datatype->push_back(data_type); | support_datatype->push_back(data_type); | ||||
| AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index); | AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index); | ||||
| @@ -204,6 +207,7 @@ void AddKernelInputSupportDataType(const kernel::KernelBuildInfo &kernel_build_i | |||||
| void AddKernelOutputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t output_index, | void AddKernelOutputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t output_index, | ||||
| std::vector<int> *support_datatype_index, std::vector<TypeId> *support_datatype) { | std::vector<int> *support_datatype_index, std::vector<TypeId> *support_datatype) { | ||||
| MS_EXCEPTION_IF_NULL(support_datatype); | |||||
| auto data_type = kernel_build_info.GetOutputDeviceType(output_index); | auto data_type = kernel_build_info.GetOutputDeviceType(output_index); | ||||
| support_datatype->push_back(data_type); | support_datatype->push_back(data_type); | ||||
| AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index); | AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index); | ||||
| @@ -214,16 +218,7 @@ void AddNodeInputDataType(const CNodePtr &kernel_node, size_t input_index, | |||||
| std::vector<TypeId> *node_mix_precision_datatype) { | std::vector<TypeId> *node_mix_precision_datatype) { | ||||
| AnfNodePtr cur_input = AnfAlgo::GetInputNode(kernel_node, input_index); | AnfNodePtr cur_input = AnfAlgo::GetInputNode(kernel_node, input_index); | ||||
| MS_EXCEPTION_IF_NULL(cur_input); | MS_EXCEPTION_IF_NULL(cur_input); | ||||
| TypeId input_origin_type; | |||||
| if (cur_input->isa<Parameter>() && AnfAlgo::IsParameterWeight(cur_input->cast<ParameterPtr>())) { | |||||
| // weight | |||||
| input_origin_type = AnfAlgo::GetOutputDeviceDataType(cur_input, 0); | |||||
| } else if (cur_input->isa<ValueNode>()) { | |||||
| input_origin_type = AnfAlgo::GetOutputDeviceDataType(cur_input, 0); | |||||
| } else { | |||||
| // feature map | |||||
| input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index); | |||||
| } | |||||
| TypeId input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index); | |||||
| AddSupportMixedPrecisionDataTypeIndex(input_origin_type, node_mix_precision_datatype_index); | AddSupportMixedPrecisionDataTypeIndex(input_origin_type, node_mix_precision_datatype_index); | ||||
| node_mix_precision_datatype->push_back(input_origin_type); | node_mix_precision_datatype->push_back(input_origin_type); | ||||
| } | } | ||||
| @@ -238,8 +233,8 @@ void AddNodeOutputDataType(const CNodePtr &kernel_node, size_t output_index, | |||||
| void CheckDataTypeInputs(const std::vector<int> &node_mix_precision_datatype_index, | void CheckDataTypeInputs(const std::vector<int> &node_mix_precision_datatype_index, | ||||
| const std::vector<TypeId> &node_mix_precision_datatype, | const std::vector<TypeId> &node_mix_precision_datatype, | ||||
| const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||||
| std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||||
| const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||||
| std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||||
| if (node_mix_precision_datatype_index.size() != node_mix_precision_datatype.size()) { | if (node_mix_precision_datatype_index.size() != node_mix_precision_datatype.size()) { | ||||
| MS_LOG(EXCEPTION) << "node datatype index size " << node_mix_precision_datatype_index.size() << " != datatype size " | MS_LOG(EXCEPTION) << "node datatype index size " << node_mix_precision_datatype_index.size() << " != datatype size " | ||||
| << node_mix_precision_datatype.size(); | << node_mix_precision_datatype.size(); | ||||
| @@ -251,10 +246,11 @@ void CheckDataTypeInputs(const std::vector<int> &node_mix_precision_datatype_ind | |||||
| } | } | ||||
| } | } | ||||
| int RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index, | |||||
| const std::vector<TypeId> &node_mix_precision_datatype, | |||||
| const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||||
| std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||||
| bool RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index, | |||||
| const std::vector<TypeId> &node_mix_precision_datatype, | |||||
| const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||||
| std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx); | |||||
| CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes, | CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes, | ||||
| kernel_match_datatype_idx); | kernel_match_datatype_idx); | ||||
| for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) { | for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) { | ||||
| @@ -289,40 +285,22 @@ int RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_data | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| if (kernel_match_datatype_idx->size() >= 1) { | |||||
| return SizeToInt(kernel_match_datatype_idx->begin()->first); | |||||
| } | |||||
| return -1; | |||||
| return !kernel_match_datatype_idx->empty(); | |||||
| } | } | ||||
| int GetMinReducePrecisionCountIndex(std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx, | |||||
| const std::unordered_map<size_t, size_t> &precision_reduce_count) { | |||||
| int selected_index = -1; | |||||
| size_t min_reduce_precision_count = kMaxCount; | |||||
| auto iter = kernel_match_datatype_idx->begin(); | |||||
| while (iter != kernel_match_datatype_idx->end()) { | |||||
| auto find_iter = precision_reduce_count.find(iter->first); | |||||
| if (find_iter == precision_reduce_count.end()) { | |||||
| continue; | |||||
| } | |||||
| if (min_reduce_precision_count > find_iter->second) { | |||||
| selected_index = SizeToInt(iter->first); | |||||
| min_reduce_precision_count = find_iter->second; | |||||
| } | |||||
| ++iter; | |||||
| } | |||||
| return selected_index; | |||||
| bool CanDataTypeReduce(const std::vector<int> &datatype_indexes, int check_index, | |||||
| const std::vector<int> &node_mix_precision_datatype_index) { | |||||
| return datatype_indexes[check_index] != kUnSupportMixedDataTypeIndex && | |||||
| datatype_indexes[check_index] <= node_mix_precision_datatype_index[check_index]; | |||||
| } | } | ||||
| int RaiseOrReduceDataTypePrecisionSelect( | |||||
| const std::vector<int> &node_mix_precision_datatype_index, const std::vector<TypeId> &node_mix_precision_datatype, | |||||
| const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||||
| std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||||
| bool RaiseOrReduceDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index, | |||||
| const std::vector<TypeId> &node_mix_precision_datatype, | |||||
| const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes, | |||||
| std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx); | |||||
| CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes, | CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes, | ||||
| kernel_match_datatype_idx); | kernel_match_datatype_idx); | ||||
| // reduce / raise | |||||
| std::unordered_map<size_t, size_t> precision_reduce_count; | |||||
| for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) { | for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) { | ||||
| if (node_mix_precision_datatype[i] == kTypeUnknown) { | if (node_mix_precision_datatype[i] == kTypeUnknown) { | ||||
| continue; | continue; | ||||
| @@ -348,29 +326,21 @@ int RaiseOrReduceDataTypePrecisionSelect( | |||||
| if (i >= datatype_indexes.size()) { | if (i >= datatype_indexes.size()) { | ||||
| MS_LOG(EXCEPTION) << "index " << i << "> kernel datatype indexes size " << datatype_indexes.size(); | MS_LOG(EXCEPTION) << "index " << i << "> kernel datatype indexes size " << datatype_indexes.size(); | ||||
| } | } | ||||
| if (datatype_indexes[i] == kUnSupportMixedDataTypeIndex) { | |||||
| if (!CanDataTypeReduce(datatype_indexes, i, node_mix_precision_datatype_index)) { | |||||
| iter = kernel_match_datatype_idx->erase(iter); | iter = kernel_match_datatype_idx->erase(iter); | ||||
| } else { | } else { | ||||
| if (datatype_indexes[i] < node_mix_precision_datatype_index[i]) { | |||||
| auto count_iter = precision_reduce_count.find(iter->first); | |||||
| if (count_iter != precision_reduce_count.end()) { | |||||
| count_iter->second++; | |||||
| } else { | |||||
| precision_reduce_count[iter->first] = 1; | |||||
| } | |||||
| } | |||||
| ++iter; | ++iter; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return GetMinReducePrecisionCountIndex(kernel_match_datatype_idx, precision_reduce_count); | |||||
| return !kernel_match_datatype_idx->empty(); | |||||
| } | } | ||||
| void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelBuildInfo &kernel_build_info, | void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelBuildInfo &kernel_build_info, | ||||
| std::vector<int> *support_indexes, std::vector<TypeId> *node_mix_precision_datatype, | std::vector<int> *support_indexes, std::vector<TypeId> *node_mix_precision_datatype, | ||||
| std::vector<TypeId> *support_datatypes, | std::vector<TypeId> *support_datatypes, | ||||
| std::vector<int> *node_mix_precision_datatype_index) { | std::vector<int> *node_mix_precision_datatype_index) { | ||||
| MS_EXCEPTION_IF_NULL(node_mix_precision_datatype); | |||||
| bool add_node_datatype_flag = false; | bool add_node_datatype_flag = false; | ||||
| if (node_mix_precision_datatype->size() == 0) { | if (node_mix_precision_datatype->size() == 0) { | ||||
| add_node_datatype_flag = true; | add_node_datatype_flag = true; | ||||
| @@ -390,104 +360,59 @@ void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelB | |||||
| } | } | ||||
| } | } | ||||
| int PrecisionReduce(const std::vector<int> &node_mix_precision_datatype_index, | |||||
| const std::vector<TypeId> &node_mix_precision_datatype, | |||||
| const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatype, | |||||
| std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx, bool *precision_reduce) { | |||||
| void PrecisionReduce(const std::vector<int> &node_mix_precision_datatype_index, | |||||
| const std::vector<TypeId> &node_mix_precision_datatype, | |||||
| const std::map<size_t, std::vector<TypeId>> &kernel_support_datatype, | |||||
| std::map<size_t, std::vector<int>> *kernel_match_datatype_idx, bool *precision_reduce) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx); | |||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(context_ptr); | MS_EXCEPTION_IF_NULL(context_ptr); | ||||
| MS_EXCEPTION_IF_NULL(precision_reduce); | MS_EXCEPTION_IF_NULL(precision_reduce); | ||||
| std::unordered_map<size_t, std::vector<int>> kernel_match_datatype_idx_copy = *kernel_match_datatype_idx; | |||||
| std::map<size_t, std::vector<int>> kernel_match_datatype_idx_copy = *kernel_match_datatype_idx; | |||||
| // raise precision | // raise precision | ||||
| int selected_index = RaiseDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, | |||||
| kernel_support_datatype, kernel_match_datatype_idx); | |||||
| if (selected_index != -1) { | |||||
| int max_match = 0; | |||||
| auto iter = kernel_match_datatype_idx->begin(); | |||||
| int match_count = 0; | |||||
| while (iter != kernel_match_datatype_idx->end()) { | |||||
| auto kernel_datatypes = kernel_support_datatype.find(iter->first); | |||||
| if (kernel_datatypes == kernel_support_datatype.end()) { | |||||
| MS_LOG(EXCEPTION) << "Can not find kernel index" << iter->first << "'s datatype."; | |||||
| } | |||||
| if (kernel_datatypes->second.size() < node_mix_precision_datatype.size()) { | |||||
| MS_LOG(EXCEPTION) << "Kernel datatype size is not equal to node datatype size!"; | |||||
| } | |||||
| for (size_t i = 0; i < node_mix_precision_datatype.size(); ++i) { | |||||
| if (node_mix_precision_datatype[i] == kernel_datatypes->second[i]) { | |||||
| ++match_count; | |||||
| } | |||||
| } | |||||
| if (match_count > max_match) { | |||||
| selected_index = SizeToInt(iter->first); | |||||
| } | |||||
| ++iter; | |||||
| } | |||||
| bool selected_ret = RaiseDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, | |||||
| kernel_support_datatype, kernel_match_datatype_idx); | |||||
| if (selected_ret) { | |||||
| *precision_reduce = false; | |||||
| return; | |||||
| } | } | ||||
| if (selected_index == -1 && context_ptr->enable_reduce_precision()) { | |||||
| selected_index = | |||||
| RaiseOrReduceDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, | |||||
| kernel_support_datatype, &kernel_match_datatype_idx_copy); | |||||
| if (selected_index != -1) { | |||||
| *precision_reduce = true; | |||||
| } | |||||
| if (context_ptr->enable_reduce_precision()) { | |||||
| selected_ret = RaiseOrReduceDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype, | |||||
| kernel_support_datatype, &kernel_match_datatype_idx_copy); | |||||
| } | |||||
| if (selected_ret) { | |||||
| *precision_reduce = true; | |||||
| *kernel_match_datatype_idx = kernel_match_datatype_idx_copy; | |||||
| } | } | ||||
| return selected_index; | |||||
| } | } | ||||
| void SelectKernel(const CNodePtr &kernel_node, bool precision_reduce, const std::vector<TypeId> &node_datatype, | |||||
| const std::shared_ptr<kernel::KernelBuildInfo> &selected_kernel_info_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(selected_kernel_info_ptr); | |||||
| void PrintRaiseOrReducePrecisionSelectedInfo(const CNodePtr &cnode, | |||||
| const std::shared_ptr<kernel::KernelBuildInfo> &selected_kernel_build_info, | |||||
| bool precision_reduce) { | |||||
| MS_EXCEPTION_IF_NULL(selected_kernel_build_info); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| std::ostringstream buffer; | |||||
| buffer << cnode->DebugString(); | |||||
| if (precision_reduce) { | if (precision_reduce) { | ||||
| std::ostringstream datatype; | |||||
| size_t input_num = selected_kernel_info_ptr->GetInputNum(); | |||||
| size_t i = 0; | |||||
| datatype << "("; | |||||
| for (; i < input_num && i < node_datatype.size(); ++i) { | |||||
| datatype << static_cast<int>(node_datatype[i]); | |||||
| if (i < input_num - 1) { | |||||
| datatype << ", "; | |||||
| } | |||||
| } | |||||
| datatype << ") -> ("; | |||||
| for (; i < node_datatype.size(); ++i) { | |||||
| datatype << static_cast<int>(node_datatype[i]); | |||||
| if (i < node_datatype.size() - 1) { | |||||
| datatype << ", "; | |||||
| } | |||||
| } | |||||
| datatype << ")"; | |||||
| MS_LOG(WARNING) << kernel_node->DebugString() << " reduce precision, node datatype: " << datatype.str() | |||||
| << ", select kernel: %s" << selected_kernel_info_ptr->ToString(); | |||||
| buffer << " reduce precision, node datatype: "; | |||||
| } else { | |||||
| buffer << " raise precision, node datatype: "; | |||||
| } | } | ||||
| AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info_ptr, kernel_node.get()); | |||||
| // Set format and data type for input tensor. | |||||
| SetTensorDeviceInfo(*selected_kernel_info_ptr, kernel_node); | |||||
| PrintInputAndOutputInferType(buffer, cnode); | |||||
| buffer << ", select kernel:" << selected_kernel_build_info->ToString(); | |||||
| MS_LOG(INFO) << buffer.str(); | |||||
| } | } | ||||
| } // namespace | |||||
| void SelectKernelInfo(const CNodePtr &kernel_node) { | |||||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list; | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| kernel::KernelQuery(kernel_node, &kernel_info_list); | |||||
| std::shared_ptr<kernel::KernelBuildInfo> ChooseMatchedKernelInfo( | |||||
| const CNodePtr &kernel_node, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) { | |||||
| if (kernel_info_list.empty()) { | |||||
| return nullptr; | |||||
| } | |||||
| std::vector<int> most_match_counts = {-1, -1, -1, -1}; | std::vector<int> most_match_counts = {-1, -1, -1, -1}; | ||||
| int selected_index = -1; | |||||
| std::unordered_map<size_t, std::vector<int>> kernel_match_datatype_idx; | |||||
| std::unordered_map<size_t, std::vector<TypeId>> kernel_support_datatype; | |||||
| std::vector<int> node_mix_precision_datatype_index; | |||||
| std::vector<TypeId> node_mix_precision_datatype; | |||||
| size_t selected_index = 0; | |||||
| for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) { | for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) { | ||||
| std::vector<int> cur_kernel_info_match_counts = {0, 0, 0, 0}; | std::vector<int> cur_kernel_info_match_counts = {0, 0, 0, 0}; | ||||
| auto kernel_build_info = *(kernel_info_list[info_index]); | auto kernel_build_info = *(kernel_info_list[info_index]); | ||||
| std::vector<int> support_indexes; | |||||
| std::vector<TypeId> support_datatypes; | |||||
| AddNodeAndKernelDataType(kernel_node, kernel_build_info, &support_indexes, &node_mix_precision_datatype, | |||||
| &support_datatypes, &node_mix_precision_datatype_index); | |||||
| kernel_match_datatype_idx[info_index] = support_indexes; | |||||
| kernel_support_datatype[info_index] = support_datatypes; | |||||
| if (!MatchInferOutputDataType(kernel_node, kernel_build_info)) { | |||||
| continue; | |||||
| } | |||||
| std::shared_ptr<kernel::KernelBuildInfo> kernel_info_ptr = kernel_info_list[info_index]; | std::shared_ptr<kernel::KernelBuildInfo> kernel_info_ptr = kernel_info_list[info_index]; | ||||
| UpdateCurMatchCounts(*kernel_info_ptr, kernel_node, &cur_kernel_info_match_counts); | UpdateCurMatchCounts(*kernel_info_ptr, kernel_node, &cur_kernel_info_match_counts); | ||||
| // Currently the selection policy is the match format count first, and then is datatype counts. | // Currently the selection policy is the match format count first, and then is datatype counts. | ||||
| @@ -495,22 +420,77 @@ void SelectKernelInfo(const CNodePtr &kernel_node) { | |||||
| selected_index = SizeToInt(info_index); | selected_index = SizeToInt(info_index); | ||||
| } | } | ||||
| } | } | ||||
| return kernel_info_list[selected_index]; | |||||
| } | |||||
| bool precision_reduce = false; | |||||
| if (selected_index == -1) { | |||||
| selected_index = PrecisionReduce(node_mix_precision_datatype_index, node_mix_precision_datatype, | |||||
| kernel_support_datatype, &kernel_match_datatype_idx, &precision_reduce); | |||||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> GetAllMatchedFilteredKernelInfo( | |||||
| const CNodePtr &cnode, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) { | |||||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> result; | |||||
| for (const auto &kernel_build_info : kernel_info_list) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_build_info); | |||||
| if (!MatchInferOutputDataType(cnode, *kernel_build_info)) { | |||||
| continue; | |||||
| } | |||||
| result.push_back(kernel_build_info); | |||||
| } | } | ||||
| if (selected_index == -1) { | |||||
| MS_LOG(EXCEPTION) << kernel_node->DebugString() << "Cannot find valid kernel Info !"; | |||||
| return result; | |||||
| } | |||||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> FilterRaisedOrReducePrecisionMatchedKernelInfo( | |||||
| const CNodePtr &cnode, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list, | |||||
| bool *precision_reduce) { | |||||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> filtered_kernel_info_list; | |||||
| std::map<size_t, std::vector<int>> kernel_match_datatype_idx; | |||||
| std::map<size_t, std::vector<TypeId>> kernel_support_datatype; | |||||
| std::vector<int> node_mix_precision_datatype_index; | |||||
| std::vector<TypeId> node_mix_precision_datatype; | |||||
| for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) { | |||||
| std::vector<int> support_indexes; | |||||
| std::vector<TypeId> support_datatypes; | |||||
| MS_EXCEPTION_IF_NULL(kernel_info_list[info_index]); | |||||
| AddNodeAndKernelDataType(cnode, *kernel_info_list[info_index], &support_indexes, &node_mix_precision_datatype, | |||||
| &support_datatypes, &node_mix_precision_datatype_index); | |||||
| kernel_match_datatype_idx[info_index] = support_indexes; | |||||
| kernel_support_datatype[info_index] = support_datatypes; | |||||
| } | } | ||||
| auto index = IntToSize(selected_index); | |||||
| if (index >= kernel_info_list.size()) { | |||||
| MS_LOG(EXCEPTION) << "index outof range"; | |||||
| PrecisionReduce(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatype, | |||||
| &kernel_match_datatype_idx, precision_reduce); | |||||
| std::transform( | |||||
| kernel_match_datatype_idx.begin(), kernel_match_datatype_idx.end(), std::back_inserter(filtered_kernel_info_list), | |||||
| [&](const std::pair<size_t, std::vector<int>> &matched_idx) -> std::shared_ptr<kernel::KernelBuildInfo> { | |||||
| return kernel_info_list[matched_idx.first]; | |||||
| }); | |||||
| return filtered_kernel_info_list; | |||||
| } | |||||
| } // namespace | |||||
| void SelectKernelInfo(const CNodePtr &kernel_node) { | |||||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list; | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| bool precision_reduce = false; | |||||
| std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info = nullptr; | |||||
| kernel::KernelQuery(kernel_node, &kernel_info_list); | |||||
| // filter kernel info matched with me infered type | |||||
| auto filtered_kernel_info_list = GetAllMatchedFilteredKernelInfo(kernel_node, kernel_info_list); | |||||
| if (!filtered_kernel_info_list.empty()) { | |||||
| selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list); | |||||
| } else { | |||||
| // selected kernel info using raised precision or reduce precision | |||||
| filtered_kernel_info_list = | |||||
| FilterRaisedOrReducePrecisionMatchedKernelInfo(kernel_node, kernel_info_list, &precision_reduce); | |||||
| selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list); | |||||
| if (selected_kernel_info == nullptr) { | |||||
| std::ostringstream buffer; | |||||
| PrintInputAndOutputInferType(buffer, kernel_node); | |||||
| MS_EXCEPTION(TypeError) << "The node [" << kernel_node->DebugString() | |||||
| << "] cannot find valid kernel info, not supported the type" << buffer.str(); | |||||
| } else { | |||||
| PrintRaiseOrReducePrecisionSelectedInfo(kernel_node, selected_kernel_info, precision_reduce); | |||||
| } | |||||
| } | } | ||||
| std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info_ptr = kernel_info_list[index]; | |||||
| MS_EXCEPTION_IF_NULL(selected_kernel_info_ptr); | |||||
| SelectKernel(kernel_node, precision_reduce, node_mix_precision_datatype, selected_kernel_info_ptr); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info, kernel_node.get()); | |||||
| // Set format and data type for input tensor. | |||||
| SetTensorDeviceInfo(*selected_kernel_info, kernel_node); | |||||
| } | } | ||||
| bool CheckKernelAccuracySupported(const CNodePtr &kernel_node, | bool CheckKernelAccuracySupported(const CNodePtr &kernel_node, | ||||
| @@ -148,18 +148,29 @@ std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exe | |||||
| } | } | ||||
| if (bp_end_str.empty()) { | if (bp_end_str.empty()) { | ||||
| auto last_cnode = cnode_exec_order.back(); | |||||
| MS_EXCEPTION_IF_NULL(last_cnode); | |||||
| bp_end_str = last_cnode->fullname_with_scope(); | |||||
| bp_end_str = GetGraphLastTbeKernelName(cnode_exec_order); | |||||
| } | } | ||||
| return bp_end_str; | return bp_end_str; | ||||
| } | } | ||||
| std::string ProfilingUtils::GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order) { | |||||
| std::string last_tbe_kernel_name = ""; | |||||
| // find last tbe_kernel | |||||
| for (auto iter = cnode_exec_order.rbegin(); iter != cnode_exec_order.rend(); ++iter) { | |||||
| if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL) { | |||||
| last_tbe_kernel_name = (*iter)->fullname_with_scope(); | |||||
| break; | |||||
| } | |||||
| } | |||||
| if (last_tbe_kernel_name.empty()) { | |||||
| MS_LOG(WARNING) << "tbe kernel not found in graph"; | |||||
| } | |||||
| return last_tbe_kernel_name; | |||||
| } | |||||
| std::string ProfilingUtils::GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order) { | std::string ProfilingUtils::GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order) { | ||||
| const char *trace_netoutput = std::getenv(kIterEndNode); | const char *trace_netoutput = std::getenv(kIterEndNode); | ||||
| auto &last_cnode = cnode_exec_order.back(); | |||||
| MS_EXCEPTION_IF_NULL(last_cnode); | |||||
| return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput); | |||||
| return trace_netoutput == nullptr ? GetGraphLastTbeKernelName(cnode_exec_order) : std::string(trace_netoutput); | |||||
| } | } | ||||
| NotNull<CNodePtr> ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content, | NotNull<CNodePtr> ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content, | ||||
| @@ -114,6 +114,7 @@ class ProfilingUtils { | |||||
| static std::string GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order); | static std::string GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order); | ||||
| static std::string GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order); | static std::string GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order); | ||||
| static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order); | static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order); | ||||
| static std::string GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order); | |||||
| static void GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order, | static void GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order, | ||||
| NotNull<ProfilingTraceInfo *> profiling_trace); | NotNull<ProfilingTraceInfo *> profiling_trace); | ||||
| static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order, | static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order, | ||||
| @@ -87,6 +87,12 @@ const char *MetaIdLabel(const TypeId &v) { | |||||
| return "kMetaTypeExternal"; | return "kMetaTypeExternal"; | ||||
| case kMetaTypeNone: | case kMetaTypeNone: | ||||
| return "kMetaTypeNone"; | return "kMetaTypeNone"; | ||||
| case kMetaTypeNull: | |||||
| return "kMetaTypeNull"; | |||||
| case kMetaTypeEllipsis: | |||||
| return "kMetaTypeEllipsis"; | |||||
| case kMetaTypeEnd: | |||||
| return "kMetaTypeEnd"; | |||||
| default: | default: | ||||
| return "[Unknown Type Id]"; | return "[Unknown Type Id]"; | ||||
| } | } | ||||
| @@ -166,9 +166,6 @@ Tensor::Tensor(const py::int_ &input, const TypePtr &data_type) { init(py::array | |||||
| Tensor::Tensor(const Tensor &tensor, const TypePtr &data_type) | Tensor::Tensor(const Tensor &tensor, const TypePtr &data_type) | ||||
| : MetaTensor(tensor), dirty_(tensor.dirty_), device_address_(tensor.device_address_) { | : MetaTensor(tensor), dirty_(tensor.dirty_), device_address_(tensor.device_address_) { | ||||
| init(tensor.data_, data_type); | init(tensor.data_, data_type); | ||||
| if (device_address_ != nullptr) { | |||||
| (void)data_sync(); | |||||
| } | |||||
| } | } | ||||
| Tensor &Tensor::operator=(const Tensor &tensor) { | Tensor &Tensor::operator=(const Tensor &tensor) { | ||||
| @@ -17,6 +17,7 @@ | |||||
| #include "kernel/kernel_build_info.h" | #include "kernel/kernel_build_info.h" | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "utils/log_adapter.h" | #include "utils/log_adapter.h" | ||||
| #include "debug/anf_ir_dump.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| std::string KernelBuildInfo::GetInputFormat(size_t input_index) const { | std::string KernelBuildInfo::GetInputFormat(size_t input_index) const { | ||||
| @@ -82,14 +83,14 @@ std::string KernelBuildInfo::ToString() const { | |||||
| if (index != 0) { | if (index != 0) { | ||||
| output_buffer << ", "; | output_buffer << ", "; | ||||
| } | } | ||||
| output_buffer << "<" << static_cast<int>(GetInputDeviceType(index)) << "x" << GetInputFormat(index) << ">"; | |||||
| output_buffer << "<" << ToShortString(GetInputDeviceType(index)) << "x" << GetInputFormat(index) << ">"; | |||||
| } | } | ||||
| output_buffer << ") -> ("; | output_buffer << ") -> ("; | ||||
| for (size_t index = 0; index < GetOutputNum(); ++index) { | for (size_t index = 0; index < GetOutputNum(); ++index) { | ||||
| if (index != 0) { | if (index != 0) { | ||||
| output_buffer << ", "; | output_buffer << ", "; | ||||
| } | } | ||||
| output_buffer << "<" << static_cast<int>(GetOutputDeviceType(index)) << "x" << GetOutputFormat(index) << ">"; | |||||
| output_buffer << "<" << ToShortString(GetOutputDeviceType(index)) << "x" << GetOutputFormat(index) << ">"; | |||||
| } | } | ||||
| output_buffer << ")"; | output_buffer << ")"; | ||||
| return output_buffer.str(); | return output_buffer.str(); | ||||
| @@ -108,7 +108,8 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||||
| } | } | ||||
| if ((task_result != nullptr) && (strcmp(task_result, "Success") != 0)) { | if ((task_result != nullptr) && (strcmp(task_result, "Success") != 0)) { | ||||
| MS_LOG(DEBUG) << "fuison op build failed, err log: " << task_result << " change to single op build."; | |||||
| MS_LOG(INFO) << "Fusion warning: Fuison op build failed, err log: " << task_result | |||||
| << " change to single op build."; | |||||
| build_failed_num++; | build_failed_num++; | ||||
| } | } | ||||
| auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, false); | auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, false); | ||||
| @@ -153,6 +153,52 @@ void TbeAdapter::InputOrderPass(const std::string &op_name, std::vector<std::vec | |||||
| } | } | ||||
| } | } | ||||
| void TbeAdapter::FusionInputOrderPass(const std::string &op_name, const std::vector<nlohmann::json> &inputs_list, | |||||
| std::vector<nlohmann::json> *inputs_json) { | |||||
| MS_EXCEPTION_IF_NULL(inputs_json); | |||||
| if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) { | |||||
| (void)std::copy(inputs_list.begin(), inputs_list.end(), std::back_inserter((*inputs_json))); | |||||
| } else { | |||||
| if (op_name == "MinimumGrad" || op_name == "MaximumGrad") { | |||||
| inputs_json->emplace_back(inputs_list[2]); | |||||
| inputs_json->emplace_back(inputs_list[0]); | |||||
| inputs_json->emplace_back(inputs_list[1]); | |||||
| for (size_t i = 3; i < inputs_list.size(); ++i) { | |||||
| inputs_json->emplace_back(inputs_list[i]); | |||||
| } | |||||
| } else { | |||||
| inputs_json->emplace_back(inputs_list[1]); | |||||
| inputs_json->emplace_back(inputs_list[0]); | |||||
| for (size_t i = 2; i < inputs_list.size(); ++i) { | |||||
| inputs_json->emplace_back(inputs_list[i]); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| void TbeAdapter::FusionDataOrderPass(const std::string &op_name, const std::vector<AnfNodePtr> &data_layer, | |||||
| std::vector<AnfNodePtr> *reorder_data_layer) { | |||||
| MS_EXCEPTION_IF_NULL(reorder_data_layer); | |||||
| if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) { | |||||
| (void)std::copy(data_layer.begin(), data_layer.end(), std::back_inserter((*reorder_data_layer))); | |||||
| } else { | |||||
| if (op_name == "MinimumGrad" || op_name == "MaximumGrad") { | |||||
| reorder_data_layer->emplace_back(data_layer[2]); | |||||
| reorder_data_layer->emplace_back(data_layer[0]); | |||||
| reorder_data_layer->emplace_back(data_layer[1]); | |||||
| for (size_t i = 3; i < data_layer.size(); ++i) { | |||||
| reorder_data_layer->emplace_back(data_layer[i]); | |||||
| } | |||||
| } else { | |||||
| reorder_data_layer->emplace_back(data_layer[1]); | |||||
| reorder_data_layer->emplace_back(data_layer[0]); | |||||
| for (size_t i = 2; i < data_layer.size(); ++i) { | |||||
| reorder_data_layer->emplace_back(data_layer[i]); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| std::map<std::string, FAttrsPass> TbeAdapter::build_json_attr_pass_map_ = { | std::map<std::string, FAttrsPass> TbeAdapter::build_json_attr_pass_map_ = { | ||||
| {"MaximumGrad", TbeAdapter::MaximumGradAttrJsonPass}, | {"MaximumGrad", TbeAdapter::MaximumGradAttrJsonPass}, | ||||
| {"MinimumGrad", TbeAdapter::MinimumGradAttrJsonPass}, | {"MinimumGrad", TbeAdapter::MinimumGradAttrJsonPass}, | ||||
| @@ -44,15 +44,12 @@ class TbeAdapter { | |||||
| static void GenTopKV2IndicesTensorInfo(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, | static void GenTopKV2IndicesTensorInfo(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, | ||||
| std::vector<nlohmann::json> *input_list, kCreaterType creater_type); | std::vector<nlohmann::json> *input_list, kCreaterType creater_type); | ||||
| static void FusionInputOrderPass(const std::string &op_name, const std::vector<nlohmann::json> &inputs_list, | |||||
| std::vector<nlohmann::json> *inputs_json); | |||||
| static void FusionDataOrderPass(const std::string &op_name, const std::vector<AnfNodePtr> &data_layer, | |||||
| std::vector<AnfNodePtr> *reorder_data_layer); | |||||
| private: | private: | ||||
| static void Conv2DAttrJsonPass(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | |||||
| nlohmann::json *attrs_json); | |||||
| static void Conv2DBackpropFilterAttrJsonPass(const AnfNodePtr &anf_node, | |||||
| const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | |||||
| nlohmann::json *attrs_json); | |||||
| static void Conv2DBackpropInputAttrJsonPass(const AnfNodePtr &anf_node, | |||||
| const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | |||||
| nlohmann::json *attrs_json); | |||||
| static void MaximumGradAttrJsonPass(const AnfNodePtr &anf_node, | static void MaximumGradAttrJsonPass(const AnfNodePtr &anf_node, | ||||
| const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | ||||
| nlohmann::json *attrs_json); | nlohmann::json *attrs_json); | ||||
| @@ -375,20 +375,26 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_no | |||||
| MS_EXCEPTION_IF_NULL(primitive); | MS_EXCEPTION_IF_NULL(primitive); | ||||
| for (const auto &attr_ptr : attrs_ptr) { | for (const auto &attr_ptr : attrs_ptr) { | ||||
| std::string attr_name = attr_ptr->name(); | std::string attr_name = attr_ptr->name(); | ||||
| nlohmann::json attr_obj; | |||||
| attr_obj["name"] = attr_name; | |||||
| if (primitive->GetAttr(attr_name) != nullptr) { | if (primitive->GetAttr(attr_name) != nullptr) { | ||||
| nlohmann::json attr_obj; | |||||
| auto value = primitive->GetAttr(attr_name); | auto value = primitive->GetAttr(attr_name); | ||||
| std::string type = attr_ptr->type(); | std::string type = attr_ptr->type(); | ||||
| ParseAttrValue(type, value, &attr_obj); | ParseAttrValue(type, value, &attr_obj); | ||||
| attr_obj["name"] = attr_name; | |||||
| attr_obj["valid"] = true; | attr_obj["valid"] = true; | ||||
| (*attrs_json).push_back(attr_obj); | |||||
| } else { | } else { | ||||
| if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD && op_info->impl_path() != "") { | |||||
| MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name | |||||
| << " is required, but not set."; | |||||
| if (op_info->impl_path().empty()) { | |||||
| attr_obj["valid"] = false; | |||||
| } else { | |||||
| if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD) { | |||||
| MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name | |||||
| << " is required, but not set."; | |||||
| } else { | |||||
| attr_obj["valid"] = false; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| (*attrs_json).push_back(attr_obj); | |||||
| } | } | ||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -484,7 +490,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp | |||||
| MS_EXCEPTION_IF_NULL(fusion_kernel); | MS_EXCEPTION_IF_NULL(fusion_kernel); | ||||
| // get input layer info | // get input layer info | ||||
| std::vector<std::vector<mindspore::AnfNodePtr>> input_layers; | std::vector<std::vector<mindspore::AnfNodePtr>> input_layers; | ||||
| if (!GetInputLayers(input_nodes, compute_nodes, &input_layers)) { | |||||
| std::map<const AnfNodePtr, FusionDataType> spec_data_input; | |||||
| if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| // gen fusion scopre_op jsom | // gen fusion scopre_op jsom | ||||
| @@ -505,8 +512,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp | |||||
| for (const auto &layer : input_layers) { | for (const auto &layer : input_layers) { | ||||
| for (const auto &data_input : layer) { | for (const auto &data_input : layer) { | ||||
| nlohmann::json data_str; | nlohmann::json data_str; | ||||
| if (!GenFusionDataInputJson(data_input, &data_str, &index)) { | |||||
| MS_LOG(DEBUG) << "GenFusionDataInputJson faild."; | |||||
| if (!GenFusionDataInputJson(data_input, spec_data_input, &data_str, &index)) { | |||||
| MS_LOG(INFO) << "Fusion error: gen fusion datainput json faild."; | |||||
| return false; | return false; | ||||
| } | } | ||||
| data_list.push_back(data_str); | data_list.push_back(data_str); | ||||
| @@ -519,7 +526,7 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp | |||||
| } | } | ||||
| void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | ||||
| size_t desc_output_idx, nlohmann::json *output_desc) { | |||||
| size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) { | |||||
| std::string output_desc_name = anf_node->fullname_with_scope(); | std::string output_desc_name = anf_node->fullname_with_scope(); | ||||
| if (node_out_idx > 0) { | if (node_out_idx > 0) { | ||||
| output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx); | output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx); | ||||
| @@ -539,58 +546,109 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_ | |||||
| (*output_desc)["shape"] = shape; | (*output_desc)["shape"] = shape; | ||||
| auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx); | auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx); | ||||
| if (format == kOpFormat_DEFAULT) { | if (format == kOpFormat_DEFAULT) { | ||||
| if (ori_shape.size() == 4) { | |||||
| format = kOpFormat_NCHW; | |||||
| } else { | |||||
| format = kOpFormat_ND; | |||||
| } | |||||
| format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND; | |||||
| } | } | ||||
| (*output_desc)["format"] = format; | (*output_desc)["format"] = format; | ||||
| (*output_desc)["ori_format"] = kOpFormat_NCHW; | (*output_desc)["ori_format"] = kOpFormat_NCHW; | ||||
| (*output_desc)["output_index"] = desc_output_idx; | (*output_desc)["output_index"] = desc_output_idx; | ||||
| if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) { | |||||
| std::vector<size_t> spec_shape = {}; | |||||
| spec_shape.emplace_back(shape[0]); | |||||
| spec_shape.emplace_back(shape[1]); | |||||
| spec_shape.emplace_back(shape[2] * shape[3]); | |||||
| spec_shape.emplace_back(shape[4]); | |||||
| (*output_desc)["shape"] = spec_shape; | |||||
| } else if (fusion_data_type == kFusionReLUGradV2 && (*output_desc)["data_type"] == "uint8") { | |||||
| std::vector<size_t> spec_shape = {}; | |||||
| spec_shape.emplace_back(shape[0]); | |||||
| spec_shape.emplace_back(shape[1]); | |||||
| spec_shape.emplace_back(shape[2] * shape[3]); | |||||
| spec_shape.emplace_back(16); | |||||
| (*output_desc)["shape"] = spec_shape; | |||||
| (*output_desc)["data_type"] = "bool"; | |||||
| } | |||||
| } | } | ||||
| void TbeKernelBuild::GenReusedOutputDesc(const shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | void TbeKernelBuild::GenReusedOutputDesc(const shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | ||||
| size_t output_index, nlohmann::json *output_desc) { | size_t output_index, nlohmann::json *output_desc) { | ||||
| std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); | std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); | ||||
| (*output_desc)["name"] = NormalizeFullScopeName(output_desc_name); | (*output_desc)["name"] = NormalizeFullScopeName(output_desc_name); | ||||
| (*output_desc)["data_type"] = tbe::TypeIdToString(kNumberTypeFloat32); | |||||
| (*output_desc)["output_index"] = output_index; | (*output_desc)["output_index"] = output_index; | ||||
| std::vector<size_t> shape; | std::vector<size_t> shape; | ||||
| (*output_desc)["shape"] = shape; | (*output_desc)["shape"] = shape; | ||||
| } | } | ||||
| bool TbeKernelBuild::GetInputLayers(const vector<mindspore::AnfNodePtr> &input_nodes, | |||||
| const vector<mindspore::AnfNodePtr> &compute_nodes, | |||||
| std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers) { | |||||
| bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, | |||||
| const std::vector<mindspore::AnfNodePtr> &reorder_layer, | |||||
| std::map<const AnfNodePtr, FusionDataType> *spec_data_input) { | |||||
| if ((op_name == kReluGradV2OpName || op_name == kAddNOpName) && reorder_layer.empty()) { | |||||
| MS_LOG(INFO) << "Fusion error: node(" << op_name << " )'s input is null. "; | |||||
| return false; | |||||
| } | |||||
| MS_LOG(INFO) << "Fusion info: op_name: " << op_name << "input layer size: " << reorder_layer.size(); | |||||
| if (op_name == kReluGradV2OpName) { | |||||
| (*spec_data_input)[reorder_layer[0]] = kFusionReLUGradV2; | |||||
| } else if (op_name == kAddNOpName) { | |||||
| for (const auto &it : reorder_layer) { | |||||
| (*spec_data_input)[it] = kFusionAddN; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes, | |||||
| const std::vector<mindspore::AnfNodePtr> &compute_nodes, | |||||
| std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers, | |||||
| std::map<const AnfNodePtr, FusionDataType> *spec_data_input) { | |||||
| auto result = std::find_if(compute_nodes.begin(), compute_nodes.end(), [](const auto &it) { | |||||
| auto op_name = AnfAlgo::GetCNodeName(it); | |||||
| return op_name == kConv2DBackpropInputOpName; | |||||
| }); | |||||
| bool need_spec = (result != compute_nodes.end()); | |||||
| size_t input_size = 0; | size_t input_size = 0; | ||||
| for (const auto &compute_node : compute_nodes) { | for (const auto &compute_node : compute_nodes) { | ||||
| std::vector<mindspore::AnfNodePtr> layer; | |||||
| std::vector<mindspore::AnfNodePtr> layer = {}; | |||||
| std::vector<mindspore::AnfNodePtr> reorder_layer = {}; | |||||
| MS_EXCEPTION_IF_NULL(compute_node); | MS_EXCEPTION_IF_NULL(compute_node); | ||||
| auto op_name = AnfAlgo::GetCNodeName(compute_node); | |||||
| auto ccompute_node = compute_node->cast<CNodePtr>(); | auto ccompute_node = compute_node->cast<CNodePtr>(); | ||||
| if (ccompute_node == nullptr) { | if (ccompute_node == nullptr) { | ||||
| MS_LOG(DEBUG) << "fusion compute node must be cnode"; | |||||
| MS_LOG(INFO) << "Fusion error: fusion compute node must be cnode"; | |||||
| return false; | return false; | ||||
| } | } | ||||
| MS_LOG(INFO) << "Fusion info: compute name: " << compute_node->fullname_with_scope(); | |||||
| for (size_t i = 1; i < ccompute_node->inputs().size(); ++i) { | for (size_t i = 1; i < ccompute_node->inputs().size(); ++i) { | ||||
| auto input = ccompute_node->input(i); | auto input = ccompute_node->input(i); | ||||
| auto find_iter = std::find(input_nodes.begin(), input_nodes.end(), input); | auto find_iter = std::find(input_nodes.begin(), input_nodes.end(), input); | ||||
| if (find_iter != input_nodes.end()) { | if (find_iter != input_nodes.end()) { | ||||
| MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope(); | |||||
| layer.emplace_back((*find_iter)); | layer.emplace_back((*find_iter)); | ||||
| } else { | |||||
| MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope() | |||||
| << ") node's output."; | |||||
| } | |||||
| } | |||||
| TbeAdapter::FusionDataOrderPass(op_name, layer, &reorder_layer); | |||||
| if (need_spec) { | |||||
| MS_LOG(INFO) << "Fusion info: match conv2d backprop input + ... patten."; | |||||
| if (!GetSpecInputLayers(op_name, reorder_layer, spec_data_input)) { | |||||
| return false; | |||||
| } | } | ||||
| } | } | ||||
| input_size += layer.size(); | |||||
| input_layers->emplace_back(layer); | |||||
| input_size += reorder_layer.size(); | |||||
| input_layers->emplace_back(reorder_layer); | |||||
| } | } | ||||
| if (input_nodes.size() != input_size) { | if (input_nodes.size() != input_size) { | ||||
| MS_LOG(DEBUG) << "fusion scope error, layer input:" << input_size << ", input_node:" << input_nodes.size(); | |||||
| MS_LOG(INFO) << "Fusion error: fusion scope error, layer input:" << input_size | |||||
| << ", input_node:" << input_nodes.size(); | |||||
| return false; | return false; | ||||
| } | } | ||||
| return true; | return true; | ||||
| } | } | ||||
| bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode> &data_input, nlohmann::json *data_str, | |||||
| size_t *index) { | |||||
| bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input, | |||||
| const std::map<const AnfNodePtr, FusionDataType> &spec_data_input, | |||||
| nlohmann::json *data_str, size_t *index) { | |||||
| MS_EXCEPTION_IF_NULL(data_str); | MS_EXCEPTION_IF_NULL(data_str); | ||||
| MS_EXCEPTION_IF_NULL(index); | MS_EXCEPTION_IF_NULL(index); | ||||
| std::vector<nlohmann::json> output_desc_list; | std::vector<nlohmann::json> output_desc_list; | ||||
| @@ -604,13 +662,17 @@ bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode> | |||||
| output_desc_list.push_back(output_desc); | output_desc_list.push_back(output_desc); | ||||
| (*index)++; | (*index)++; | ||||
| } else { | } else { | ||||
| FusionDataType fusion_data_type = kFusionNormal; | |||||
| if (spec_data_input.find(data_input) != spec_data_input.end()) { | |||||
| fusion_data_type = spec_data_input.at(data_input); | |||||
| } | |||||
| auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0); | auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0); | ||||
| auto real_node = kernel_idx.first; | auto real_node = kernel_idx.first; | ||||
| size_t real_idx = kernel_idx.second; | size_t real_idx = kernel_idx.second; | ||||
| MS_LOG(INFO) << "real name " << real_node->fullname_with_scope() << " index:" << real_idx; | MS_LOG(INFO) << "real name " << real_node->fullname_with_scope() << " index:" << real_idx; | ||||
| // "output_desc" | // "output_desc" | ||||
| nlohmann::json output_desc; | nlohmann::json output_desc; | ||||
| GenDescJson(real_node, real_idx, real_idx, &output_desc); | |||||
| GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type); | |||||
| output_desc_list.push_back(output_desc); | output_desc_list.push_back(output_desc); | ||||
| (*data_str)["name"] = NormalizeFullScopeName(real_node->fullname_with_scope()); | (*data_str)["name"] = NormalizeFullScopeName(real_node->fullname_with_scope()); | ||||
| } | } | ||||
| @@ -632,11 +694,12 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) { | |||||
| auto real_input_size = cnode->inputs().size() - 1; | auto real_input_size = cnode->inputs().size() - 1; | ||||
| auto dyn_input_size = dyn_input_sizes.size(); | auto dyn_input_size = dyn_input_sizes.size(); | ||||
| if (dyn_input_size != 1) { | if (dyn_input_size != 1) { | ||||
| MS_LOG(DEBUG) << "fusion build not support dyn_input_sizes > 1"; | |||||
| MS_LOG(INFO) << "Fusion error: fusion build not support dyn_input_sizes > 1"; | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (IntToSize(dyn_input_sizes[0]) != real_input_size) { | if (IntToSize(dyn_input_sizes[0]) != real_input_size) { | ||||
| MS_LOG(DEBUG) << " dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size" << real_input_size; | |||||
| MS_LOG(INFO) << "Fusion error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size" | |||||
| << real_input_size; | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = true; | ret = true; | ||||
| @@ -663,6 +726,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | |||||
| std::vector<nlohmann::json> *input_desc_list, size_t *index) { | std::vector<nlohmann::json> *input_desc_list, size_t *index) { | ||||
| MS_EXCEPTION_IF_NULL(cnode); | MS_EXCEPTION_IF_NULL(cnode); | ||||
| MS_EXCEPTION_IF_NULL(input_desc_list); | MS_EXCEPTION_IF_NULL(input_desc_list); | ||||
| std::vector<nlohmann::json> input_desc_list_tmp = {}; | |||||
| bool is_dynamic_input = IsDynamicInput(cnode); | bool is_dynamic_input = IsDynamicInput(cnode); | ||||
| for (size_t i = 1; i < cnode->inputs().size(); ++i) { | for (size_t i = 1; i < cnode->inputs().size(); ++i) { | ||||
| auto input = cnode->input(i); | auto input = cnode->input(i); | ||||
| @@ -676,7 +740,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | |||||
| MS_LOG(INFO) << "node has dynamic input."; | MS_LOG(INFO) << "node has dynamic input."; | ||||
| input_desc["dyn_index"] = (i - 1); | input_desc["dyn_index"] = (i - 1); | ||||
| } | } | ||||
| (*input_desc_list).emplace_back(input_desc); | |||||
| input_desc_list_tmp.emplace_back(input_desc); | |||||
| } | } | ||||
| size_t optional_num = GetOptionalInput(cnode, is_dynamic_input); | size_t optional_num = GetOptionalInput(cnode, is_dynamic_input); | ||||
| if (optional_num > 0) { | if (optional_num > 0) { | ||||
| @@ -686,35 +750,24 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | |||||
| optional_input_desc["name"] = std::string(kOptional) + std::to_string(*index); | optional_input_desc["name"] = std::string(kOptional) + std::to_string(*index); | ||||
| (*index)++; | (*index)++; | ||||
| (*layer_iter)->emplace_back(nullptr); | (*layer_iter)->emplace_back(nullptr); | ||||
| (*input_desc_list).emplace_back(optional_input_desc); | |||||
| input_desc_list_tmp.emplace_back(optional_input_desc); | |||||
| } | } | ||||
| } | } | ||||
| auto op_name = AnfAlgo::GetCNodeName(cnode); | |||||
| TbeAdapter::FusionInputOrderPass(op_name, input_desc_list_tmp, input_desc_list); | |||||
| return true; | return true; | ||||
| } | } | ||||
| std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &output_used_nums) { | std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &output_used_nums) { | ||||
| std::vector<size_t> desc_output_index = {}; | std::vector<size_t> desc_output_index = {}; | ||||
| bool find_reused = false; | |||||
| size_t reused_num = 0; | |||||
| for (size_t idx = 0; idx < output_used_nums.size(); ++idx) { | for (size_t idx = 0; idx < output_used_nums.size(); ++idx) { | ||||
| auto output_use_num_item = output_used_nums[idx]; | auto output_use_num_item = output_used_nums[idx]; | ||||
| MS_LOG(INFO) << "output used num[" << idx << "] = " << output_use_num_item; | MS_LOG(INFO) << "output used num[" << idx << "] = " << output_use_num_item; | ||||
| if (output_use_num_item == 1 || output_use_num_item == 0) { | |||||
| desc_output_index.emplace_back(idx); | |||||
| if (output_use_num_item > 1) { | |||||
| desc_output_index.emplace_back(idx); | desc_output_index.emplace_back(idx); | ||||
| } else { | |||||
| if (!find_reused) { | |||||
| desc_output_index.emplace_back(idx); | |||||
| } else { | |||||
| desc_output_index.emplace_back(desc_output_index[idx - 1]); | |||||
| } | |||||
| reused_num += (output_use_num_item - 1); | |||||
| find_reused = true; | |||||
| } | } | ||||
| } | } | ||||
| auto pad_value = output_used_nums.size() == 1 ? 0 : desc_output_index[desc_output_index.size() - 1] + 1; | |||||
| for (size_t i = 0; i < reused_num; ++i) { | |||||
| desc_output_index.emplace_back(pad_value); | |||||
| } | |||||
| return desc_output_index; | return desc_output_index; | ||||
| } | } | ||||
| @@ -722,8 +775,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode | |||||
| std::vector<nlohmann::json> *output_desc_list) { | std::vector<nlohmann::json> *output_desc_list) { | ||||
| auto output_size = AnfAlgo::GetOutputTensorNum(cnode); | auto output_size = AnfAlgo::GetOutputTensorNum(cnode); | ||||
| if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) { | if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) { | ||||
| // wait anther pr: auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum); | |||||
| auto output_used_nums = {SizeToInt(AnfAlgo::GetNodeAttr<std::size_t>(cnode, kAttrOutputUsedNum))}; | |||||
| auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum); | |||||
| MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope(); | MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope(); | ||||
| if (output_used_nums.size() != output_size) { | if (output_used_nums.size() != output_size) { | ||||
| MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")" | MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")" | ||||
| @@ -812,6 +864,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto | |||||
| } | } | ||||
| auto ret = GetIOSizeImpl(data_output); | auto ret = GetIOSizeImpl(data_output); | ||||
| input_size_list->push_back(ret); | input_size_list->push_back(ret); | ||||
| MS_LOG(INFO) << "Fusion info: scope input name: " << op["name"] << ", size: " << ret; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -820,26 +873,31 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto | |||||
| auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0); | auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0); | ||||
| auto real_node = kernel_idx.first; | auto real_node = kernel_idx.first; | ||||
| size_t real_idx = kernel_idx.second; | size_t real_idx = kernel_idx.second; | ||||
| auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); | |||||
| MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx; | |||||
| for (const auto &op : fusion_op_list) { | for (const auto &op : fusion_op_list) { | ||||
| auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); | |||||
| if (op["name"] == normal_name) { | if (op["name"] == normal_name) { | ||||
| auto op_output_desces = op["output_desc"]; | auto op_output_desces = op["output_desc"]; | ||||
| if (output_node != real_node) { | if (output_node != real_node) { | ||||
| // tuple_get item | // tuple_get item | ||||
| MS_LOG(DEBUG) << "output is a tuple getitem node"; | |||||
| MS_LOG(INFO) << "output is a tuple getitem node"; | |||||
| auto output_desc = op_output_desces[real_idx]; | auto output_desc = op_output_desces[real_idx]; | ||||
| if (output_desc["shape"].empty()) { | if (output_desc["shape"].empty()) { | ||||
| continue; | |||||
| MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx; | |||||
| return false; | |||||
| } | } | ||||
| auto ret = GetIOSizeImpl(output_desc); | auto ret = GetIOSizeImpl(output_desc); | ||||
| output_size_list->push_back(ret); | output_size_list->push_back(ret); | ||||
| MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret; | |||||
| } else { | } else { | ||||
| for (const auto &output_desc : op_output_desces) { | for (const auto &output_desc : op_output_desces) { | ||||
| if (output_desc["shape"].empty()) { | if (output_desc["shape"].empty()) { | ||||
| MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output"; | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto ret = GetIOSizeImpl(output_desc); | auto ret = GetIOSizeImpl(output_desc); | ||||
| output_size_list->push_back(ret); | output_size_list->push_back(ret); | ||||
| MS_LOG(INFO) << "Fusion info: scope output size: " << ret; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -35,6 +35,8 @@ namespace kernel { | |||||
| // kernel operate type used for generate json | // kernel operate type used for generate json | ||||
| class TbeKernelBuild { | class TbeKernelBuild { | ||||
| enum FusionDataType { kFusionNormal = 0, kFusionAddN, kFusionReLUGradV2 }; | |||||
| public: | public: | ||||
| static bool GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list, | static bool GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list, | ||||
| std::vector<size_t> *output_size_list); | std::vector<size_t> *output_size_list); | ||||
| @@ -48,8 +50,9 @@ class TbeKernelBuild { | |||||
| private: | private: | ||||
| TbeKernelBuild() = default; | TbeKernelBuild() = default; | ||||
| ~TbeKernelBuild() = default; | ~TbeKernelBuild() = default; | ||||
| static bool GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input, nlohmann::json *data_str, | |||||
| size_t *index); | |||||
| static bool GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input, | |||||
| const std::map<const AnfNodePtr, FusionDataType> &spec_data_input, | |||||
| nlohmann::json *data_str, size_t *index); | |||||
| static bool GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node, | static bool GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node, | ||||
| std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter, | std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter, | ||||
| nlohmann::json *compute_op_str, std::string *fusion_kernel_name, size_t *index); | nlohmann::json *compute_op_str, std::string *fusion_kernel_name, size_t *index); | ||||
| @@ -60,13 +63,17 @@ class TbeKernelBuild { | |||||
| static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode, | static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode, | ||||
| std::vector<nlohmann::json> *output_desc_list); | std::vector<nlohmann::json> *output_desc_list); | ||||
| static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | ||||
| size_t desc_output_idx, nlohmann::json *output_desc); | |||||
| size_t desc_output_idx, nlohmann::json *output_desc, | |||||
| FusionDataType fusion_data_type = kFusionNormal); | |||||
| static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | ||||
| size_t output_index, nlohmann::json *output_desc); | size_t output_index, nlohmann::json *output_desc); | ||||
| static size_t GetIOSizeImpl(const nlohmann::json &desc); | static size_t GetIOSizeImpl(const nlohmann::json &desc); | ||||
| static bool GetSpecInputLayers(const std::string &op_name, const std::vector<mindspore::AnfNodePtr> &reorder_layer, | |||||
| std::map<const AnfNodePtr, FusionDataType> *spec_data_input); | |||||
| static bool GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes, | static bool GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes, | ||||
| const std::vector<mindspore::AnfNodePtr> &compute_nodes, | const std::vector<mindspore::AnfNodePtr> &compute_nodes, | ||||
| std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers); | |||||
| std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers, | |||||
| std::map<const AnfNodePtr, FusionDataType> *spec_data_input); | |||||
| static bool IsDynamicInput(const CNodePtr &cnode); | static bool IsDynamicInput(const CNodePtr &cnode); | ||||
| static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input); | static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input); | ||||
| }; | }; | ||||
| @@ -346,7 +346,8 @@ void ShardReader::GetClassesInShard(sqlite3 *db, int shard_id, const std::string | |||||
| MS_LOG(ERROR) << "Error in select sql statement, sql:" << common::SafeCStr(sql) << ", error: " << errmsg; | MS_LOG(ERROR) << "Error in select sql statement, sql:" << common::SafeCStr(sql) << ", error: " << errmsg; | ||||
| return; | return; | ||||
| } | } | ||||
| MS_LOG(INFO) << "Get" << static_cast<int>(columns.size()) << " records from shard " << shard_id << " index."; | |||||
| MS_LOG(INFO) << "Get " << static_cast<int>(columns.size()) << " records from shard " << shard_id << " index."; | |||||
| std::lock_guard<std::mutex> lck(shard_locker_); | |||||
| for (int i = 0; i < static_cast<int>(columns.size()); ++i) { | for (int i = 0; i < static_cast<int>(columns.size()); ++i) { | ||||
| categories.emplace(columns[i][0]); | categories.emplace(columns[i][0]); | ||||
| } | } | ||||
| @@ -1084,6 +1084,7 @@ int GenerateStridedSliceParametersFromTuple(const AbstractTuplePtr &slice_tuple, | |||||
| std::vector<unsigned int> shrink; | std::vector<unsigned int> shrink; | ||||
| auto slice_tuple_eles = slice_tuple->elements(); | auto slice_tuple_eles = slice_tuple->elements(); | ||||
| size_t ellipsis_num = 0; | size_t ellipsis_num = 0; | ||||
| for (size_t index = 0; index < slice_tuple_size; index++) { | for (size_t index = 0; index < slice_tuple_size; index++) { | ||||
| if (slice_tuple_eles[index]->isa<AbstractSlice>()) { | if (slice_tuple_eles[index]->isa<AbstractSlice>()) { | ||||
| AbstractSlicePtr slice = dyn_cast<AbstractSlice>(slice_tuple_eles[index]); | AbstractSlicePtr slice = dyn_cast<AbstractSlice>(slice_tuple_eles[index]); | ||||
| @@ -1118,12 +1119,13 @@ int GenerateStridedSliceParametersFromTuple(const AbstractTuplePtr &slice_tuple, | |||||
| << slice_tuple_eles[index]->ToString(); | << slice_tuple_eles[index]->ToString(); | ||||
| } | } | ||||
| for (size_t index = slice_tuple_size; index < shape_size; index++) { | |||||
| begin->push_back(0); | |||||
| end->push_back(shape[index]); | |||||
| strides->push_back(1); | |||||
| if (ellipsis_num == 0) { | |||||
| for (size_t index = slice_tuple_size; index < shape_size; index++) { | |||||
| begin->push_back(0); | |||||
| end->push_back(shape[index]); | |||||
| strides->push_back(1); | |||||
| } | |||||
| } | } | ||||
| return ConvertBinaryToDecimal(shrink); | return ConvertBinaryToDecimal(shrink); | ||||
| } | } | ||||
| @@ -1199,6 +1201,7 @@ FuncGraphPtr TensorSlice::GenerateFuncGraph(const AbstractBasePtrList &args_spec | |||||
| if (scalar_ptr->BuildValue()->cast<BoolImmPtr>()->value()) { | if (scalar_ptr->BuildValue()->cast<BoolImmPtr>()->value()) { | ||||
| return ExpandADim(ret_graph, tensor_node); | return ExpandADim(ret_graph, tensor_node); | ||||
| } | } | ||||
| MS_LOG(EXCEPTION) << "TensorSlice not support the index is False."; | |||||
| } | } | ||||
| shrink_axis_mask = GenerateStridedSliceParametersFromNumber(scalar_ptr, shape, &begin, &end, &strides); | shrink_axis_mask = GenerateStridedSliceParametersFromNumber(scalar_ptr, shape, &begin, &end, &strides); | ||||
| } else if (args_spec_list[1]->isa<AbstractEllipsis>()) { | } else if (args_spec_list[1]->isa<AbstractEllipsis>()) { | ||||
| @@ -35,7 +35,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| // namespace to support composite operators definition | // namespace to support composite operators definition | ||||
| namespace prim { | namespace prim { | ||||
| // Expand the tuple and dict parameters generated when parsing the function call, | // Expand the tuple and dict parameters generated when parsing the function call, | ||||
| // and generate positional parameters and key-value pairs for function. | // and generate positional parameters and key-value pairs for function. | ||||
| class UnpackCall : public MetaFuncGraph { | class UnpackCall : public MetaFuncGraph { | ||||
| @@ -47,7 +46,6 @@ class UnpackCall : public MetaFuncGraph { | |||||
| friend bool operator==(const UnpackCall &lhs, const UnpackCall &rhs) { return lhs.name_ == rhs.name_; } | friend bool operator==(const UnpackCall &lhs, const UnpackCall &rhs) { return lhs.name_ == rhs.name_; } | ||||
| }; | }; | ||||
| using UnpackCallPtr = std::shared_ptr<UnpackCall>; | using UnpackCallPtr = std::shared_ptr<UnpackCall>; | ||||
| } // namespace prim | } // namespace prim | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -133,7 +133,6 @@ ResolveIRPassLib::ResolveIRPassLib() { | |||||
| InferenceOptPrepareLib::InferenceOptPrepareLib() { | InferenceOptPrepareLib::InferenceOptPrepareLib() { | ||||
| grad_var_prepare_ = MakeSubstitution(GradVarPrepare(), "grad_var_prepare", IsCNode); | grad_var_prepare_ = MakeSubstitution(GradVarPrepare(), "grad_var_prepare", IsCNode); | ||||
| } | } | ||||
| } // namespace irpass | } // namespace irpass | ||||
| } // namespace opt | } // namespace opt | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -159,7 +159,6 @@ inline bool IsCNodeDup(const AnfNodePtr &node) { | |||||
| } | } | ||||
| return false; | return false; | ||||
| } | } | ||||
| } // namespace irpass | } // namespace irpass | ||||
| } // namespace opt | } // namespace opt | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -31,7 +31,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace opt { | namespace opt { | ||||
| namespace irpass { | namespace irpass { | ||||
| static AnfNodePtr GenerateUnpackGraphNode(std::vector<AnfNodePtr> inputs_y, FuncGraphPtr func_graph, | static AnfNodePtr GenerateUnpackGraphNode(std::vector<AnfNodePtr> inputs_y, FuncGraphPtr func_graph, | ||||
| AnfNodePtr func_node, bool is_unpack, bool sens_param) { | AnfNodePtr func_node, bool is_unpack, bool sens_param) { | ||||
| MS_EXCEPTION_IF_NULL(func_graph); | MS_EXCEPTION_IF_NULL(func_graph); | ||||
| @@ -33,7 +33,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace opt { | namespace opt { | ||||
| namespace irpass { | namespace irpass { | ||||
| // {{GradOperation, g, w}, Ys} | // {{GradOperation, g, w}, Ys} | ||||
| // {UnPackCall, {GradOperation, g, w}, Ys} | // {UnPackCall, {GradOperation, g, w}, Ys} | ||||
| class GradVarPrepare : public AnfVisitor { | class GradVarPrepare : public AnfVisitor { | ||||
| @@ -28,13 +28,11 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace pipeline { | namespace pipeline { | ||||
| struct ExecutorInfo { | struct ExecutorInfo { | ||||
| FuncGraphPtr func_graph; | FuncGraphPtr func_graph; | ||||
| ResourcePtr resource; | ResourcePtr resource; | ||||
| std::size_t arg_list_size; | std::size_t arg_list_size; | ||||
| }; | }; | ||||
| using ExecutorInfoPtr = std::shared_ptr<ExecutorInfo>; | using ExecutorInfoPtr = std::shared_ptr<ExecutorInfo>; | ||||
| inline std::string GetPhasePrefix(const std::string &phase) { | inline std::string GetPhasePrefix(const std::string &phase) { | ||||
| @@ -97,7 +97,7 @@ PYBIND11_MODULE(_c_expression, m) { | |||||
| py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"), | py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"), | ||||
| py::arg("phase") = py::str("dataset"), "Init and exec dataset."); | py::arg("phase") = py::str("dataset"), "Init and exec dataset."); | ||||
| (void)m.def("_set_dataset_mode_config", &mindspore::ConfigManager::SetDatasetModeConfig, "API for set dataset mode."); | (void)m.def("_set_dataset_mode_config", &mindspore::ConfigManager::SetDatasetModeConfig, "API for set dataset mode."); | ||||
| (void)m.def("init_ge", &mindspore::pipeline::InitGe, "Init GE"); | |||||
| (void)m.def("init_backend", &mindspore::pipeline::InitBackend, "Init Backend."); | |||||
| (void)m.def("export_graph", &mindspore::pipeline::ExportGraph, "Export Graph."); | (void)m.def("export_graph", &mindspore::pipeline::ExportGraph, "Export Graph."); | ||||
| @@ -101,7 +101,7 @@ py::tuple GenerateKey(const std::string &name, const std::unordered_map<std::str | |||||
| MS_LOG(INFO) << "Start new args and compile key:" << key; | MS_LOG(INFO) << "Start new args and compile key:" << key; | ||||
| g_args_cache[args_spec] = key++; | g_args_cache[args_spec] = key++; | ||||
| } | } | ||||
| py::tuple argSpec = py::tuple(2); | |||||
| auto argSpec = py::tuple(2); | |||||
| argSpec[0] = name; | argSpec[0] = name; | ||||
| argSpec[1] = g_args_cache[args_spec]; | argSpec[1] = g_args_cache[args_spec]; | ||||
| return argSpec; | return argSpec; | ||||
| @@ -236,7 +236,7 @@ py::dict ExecutorPy::GetAllreduceFusion(const std::string &phase) { | |||||
| void ExecutorPy::DelNetRes(const std::string &id) { | void ExecutorPy::DelNetRes(const std::string &id) { | ||||
| #ifdef ENABLE_GE | #ifdef ENABLE_GE | ||||
| FinalizeGe(); | |||||
| FinalizeBackend(); | |||||
| #endif | #endif | ||||
| if (executor_ != nullptr) { | if (executor_ != nullptr) { | ||||
| bool flag = false; | bool flag = false; | ||||
| @@ -668,6 +668,13 @@ bool InitExecDataset(const std::string &queue_name, int64_t iter_num, int64_t ba | |||||
| const std::vector<TypePtr> &types, const std::vector<std::vector<int64_t>> &shapes, | const std::vector<TypePtr> &types, const std::vector<std::vector<int64_t>> &shapes, | ||||
| const std::vector<int64_t> &input_indexes, const std::string &phase) { | const std::vector<int64_t> &input_indexes, const std::string &phase) { | ||||
| std::string name = MsContext::GetInstance()->backend_policy(); | std::string name = MsContext::GetInstance()->backend_policy(); | ||||
| #ifndef NO_DLIB | |||||
| auto ms_context = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(ms_context); | |||||
| if (!ms_context->IsTsdOpened() || !ms_context->IsGeInited()) { | |||||
| (void)InitBackend(); | |||||
| } | |||||
| #endif | |||||
| if (name == kMsConvert || name == kMsVm) { | if (name == kMsConvert || name == kMsVm) { | ||||
| return InitExecDatasetVm(queue_name, iter_num, batch_size, types, shapes, input_indexes); | return InitExecDatasetVm(queue_name, iter_num, batch_size, types, shapes, input_indexes); | ||||
| } | } | ||||
| @@ -746,7 +753,7 @@ void ResetOpId() { mindspore::id_generator::reset_id(); } | |||||
| void InitHccl() { | void InitHccl() { | ||||
| #ifdef ENABLE_GE | #ifdef ENABLE_GE | ||||
| (void)InitGe(); | |||||
| (void)InitBackend(); | |||||
| #else | #else | ||||
| mindspore::parse::python_adapter::set_python_env_flag(true); | mindspore::parse::python_adapter::set_python_env_flag(true); | ||||
| auto ms_context = MsContext::GetInstance(); | auto ms_context = MsContext::GetInstance(); | ||||
| @@ -768,7 +775,7 @@ void InitHccl() { | |||||
| void FinalizeHccl() { | void FinalizeHccl() { | ||||
| #ifdef ENABLE_GE | #ifdef ENABLE_GE | ||||
| (void)FinalizeGe(); | |||||
| (void)FinalizeBackend(); | |||||
| #else | #else | ||||
| device::KernelRuntimeManager::Instance().ClearRuntimeResource(); | device::KernelRuntimeManager::Instance().ClearRuntimeResource(); | ||||
| #endif | #endif | ||||
| @@ -789,7 +796,7 @@ void ReleaseGeTsd() { | |||||
| } | } | ||||
| } | } | ||||
| void InitGe() { | |||||
| void InitBackend() { | |||||
| // set python env flag | // set python env flag | ||||
| mindspore::parse::python_adapter::set_python_env_flag(true); | mindspore::parse::python_adapter::set_python_env_flag(true); | ||||
| // open tsd before ge initialize | // open tsd before ge initialize | ||||
| @@ -801,7 +808,7 @@ void InitGe() { | |||||
| (void)ms_context->InitGe(); | (void)ms_context->InitGe(); | ||||
| } | } | ||||
| void FinalizeGe() { | |||||
| void FinalizeBackend() { | |||||
| auto context_ptr = MsContext::GetInstance(); | auto context_ptr = MsContext::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(context_ptr); | MS_EXCEPTION_IF_NULL(context_ptr); | ||||
| (void)context_ptr->FinalizeGe(); | (void)context_ptr->FinalizeGe(); | ||||
| @@ -115,8 +115,8 @@ bool InitDistribute(const std::map<std::string, std::string> &options); | |||||
| void ResetOpId(); | void ResetOpId(); | ||||
| void InitHccl(); | void InitHccl(); | ||||
| void FinalizeHccl(); | void FinalizeHccl(); | ||||
| void InitGe(); | |||||
| void FinalizeGe(); | |||||
| void InitBackend(); | |||||
| void FinalizeBackend(); | |||||
| void ClearResAtexit(); | void ClearResAtexit(); | ||||
| void ReleaseGeTsd(); | void ReleaseGeTsd(); | ||||
| @@ -52,11 +52,11 @@ void DoExecNonInputGraph(const std::string &phase) { | |||||
| transform::RunOptions run_options; | transform::RunOptions run_options; | ||||
| run_options.name = phase; | run_options.name = phase; | ||||
| auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); | auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); | ||||
| if (graph_runner == nullptr) { | if (graph_runner == nullptr) { | ||||
| MS_LOG(ERROR) << "Can not found GraphRunner"; | MS_LOG(ERROR) << "Can not found GraphRunner"; | ||||
| return; | return; | ||||
| } | } | ||||
| { | { | ||||
| // Release GIL before calling into (potentially long-running) C++ code | // Release GIL before calling into (potentially long-running) C++ code | ||||
| py::gil_scoped_release release; | py::gil_scoped_release release; | ||||
| @@ -181,7 +181,6 @@ bool AddDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const py::di | |||||
| size_t pos = phase.find('.'); | size_t pos = phase.find('.'); | ||||
| std::string net_id = ((pos == std::string::npos || pos == phase.size() - 1) ? phase : phase.substr(pos + 1)); | std::string net_id = ((pos == std::string::npos || pos == phase.size() - 1) ? phase : phase.substr(pos + 1)); | ||||
| std::string phase_prefix = phase.substr(0, pos); | std::string phase_prefix = phase.substr(0, pos); | ||||
| if (phase_prefix == "export") { | if (phase_prefix == "export") { | ||||
| MS_LOG(INFO) << "Set DfGraphConvertor training : false"; | MS_LOG(INFO) << "Set DfGraphConvertor training : false"; | ||||
| convertor.set_training(false); | convertor.set_training(false); | ||||
| @@ -319,19 +318,24 @@ void RunGEInitGraph(const py::dict &init_params, const std::string &phase) { | |||||
| py::object ExtractGeneralCnodeRet(const AbstractBasePtr &cnode_data, const py::tuple &data, size_t *count) { | py::object ExtractGeneralCnodeRet(const AbstractBasePtr &cnode_data, const py::tuple &data, size_t *count) { | ||||
| MS_EXCEPTION_IF_NULL(cnode_data); | MS_EXCEPTION_IF_NULL(cnode_data); | ||||
| if (*count >= data.size()) { | |||||
| MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() | |||||
| << " less than the number of elements required. "; | |||||
| } | |||||
| if (cnode_data->isa<AbstractTensor>()) { | if (cnode_data->isa<AbstractTensor>()) { | ||||
| if (*count >= data.size()) { | |||||
| MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() | |||||
| << " less than the number of elements required. "; | |||||
| } | |||||
| BaseShapePtr shape = cnode_data->BuildShape(); | BaseShapePtr shape = cnode_data->BuildShape(); | ||||
| auto shape_act = shape->cast<abstract::ShapePtr>()->shape(); | |||||
| Tensor tensor_exp = py::cast<Tensor>(data[*count]); | |||||
| if (shape_act != tensor_exp.shape()) { | |||||
| MS_LOG(EXCEPTION) << "The shape of the tensor returned from GE is not the same as " | |||||
| "the shape of the tensor derived from ME."; | |||||
| if (!shape->isa<abstract::Shape>()) { | |||||
| MS_LOG(EXCEPTION) << "The shape of the tensor derived is not Shape, is " << shape->ToString(); | |||||
| } | } | ||||
| auto shape_me = shape->cast<abstract::ShapePtr>()->shape(); | |||||
| auto shape_ge = py::cast<Tensor>(data[*count]).shape(); | |||||
| if (shape_ge != shape_me) { | |||||
| MS_LOG(EXCEPTION) << "The shape of the " << *count << "th tensor returned: " << shape_ge | |||||
| << " is not the same as the shape of the tensor derived: " << shape_me; | |||||
| } | |||||
| return data[(*count)++]; | return data[(*count)++]; | ||||
| } | } | ||||
| @@ -343,7 +347,7 @@ py::object ExtractGeneralCnodeRet(const AbstractBasePtr &cnode_data, const py::t | |||||
| auto data_tp = cnode_data->cast<AbstractTuplePtr>(); | auto data_tp = cnode_data->cast<AbstractTuplePtr>(); | ||||
| auto elements = data_tp->elements(); | auto elements = data_tp->elements(); | ||||
| size_t size = data_tp->size(); | size_t size = data_tp->size(); | ||||
| py::tuple tp = py::tuple(size); | |||||
| auto tp = py::tuple(size); | |||||
| for (size_t i = 0; i < size; i++) { | for (size_t i = 0; i < size; i++) { | ||||
| tp[i] = ExtractGeneralCnodeRet(elements[i], data, count); | tp[i] = ExtractGeneralCnodeRet(elements[i], data, count); | ||||
| } | } | ||||
| @@ -357,11 +361,11 @@ py::object StructureOutput(const AnfNodePtr &output_node, const py::tuple &data, | |||||
| return ValuePtrToPyData(GetValueNode(output_node)); | return ValuePtrToPyData(GetValueNode(output_node)); | ||||
| } | } | ||||
| if (*count >= data.size()) { | |||||
| MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() | |||||
| << " less than the number of elements required. "; | |||||
| } | |||||
| if (output_node->isa<Parameter>()) { | if (output_node->isa<Parameter>()) { | ||||
| if (*count >= data.size()) { | |||||
| MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size() | |||||
| << " less than the number of elements required. "; | |||||
| } | |||||
| return data[(*count)++]; | return data[(*count)++]; | ||||
| } | } | ||||
| @@ -374,7 +378,7 @@ py::object StructureOutput(const AnfNodePtr &output_node, const py::tuple &data, | |||||
| if (output_c->IsApply(prim::kPrimMakeTuple)) { | if (output_c->IsApply(prim::kPrimMakeTuple)) { | ||||
| auto input_list = output_c->inputs(); | auto input_list = output_c->inputs(); | ||||
| size_t size = input_list.size(); | size_t size = input_list.size(); | ||||
| py::tuple tp = py::tuple(size - 1); | |||||
| auto tp = py::tuple(size - 1); | |||||
| for (size_t i = 1; i < size; i++) { | for (size_t i = 1; i < size; i++) { | ||||
| tp[i - 1] = StructureOutput(input_list[i], data, count); | tp[i - 1] = StructureOutput(input_list[i], data, count); | ||||
| } | } | ||||
| @@ -396,11 +400,8 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr &graph, const std::ve | |||||
| std::vector<GeTensorPtr> ge_outputs; | std::vector<GeTensorPtr> ge_outputs; | ||||
| transform::RunOptions run_options; | transform::RunOptions run_options; | ||||
| run_options.name = phase; | run_options.name = phase; | ||||
| auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); | auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner(); | ||||
| if (graph_runner == nullptr) { | if (graph_runner == nullptr) { | ||||
| MS_LOG(EXCEPTION) << "Can not found GraphRunner."; | MS_LOG(EXCEPTION) << "Can not found GraphRunner."; | ||||
| } | } | ||||
| @@ -473,7 +474,6 @@ void ProcessGeArg(const std::map<std::string, ExecutorInfoPtr> &info, const py:: | |||||
| py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const py::tuple &args, | py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const py::tuple &args, | ||||
| const std::string &phase) { | const std::string &phase) { | ||||
| std::string phase_prefix = GetPhasePrefix(phase); | std::string phase_prefix = GetPhasePrefix(phase); | ||||
| if (phase_prefix == "save") { | if (phase_prefix == "save") { | ||||
| DoExecNonInputGraph(phase); | DoExecNonInputGraph(phase); | ||||
| ConfigManager::GetInstance().ResetConfig(); | ConfigManager::GetInstance().ResetConfig(); | ||||
| @@ -483,7 +483,6 @@ py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const | |||||
| if (info.count(phase) == 0) { | if (info.count(phase) == 0) { | ||||
| MS_LOG(EXCEPTION) << "There is no phase:" << phase; | MS_LOG(EXCEPTION) << "There is no phase:" << phase; | ||||
| } | } | ||||
| FuncGraphPtr anf_graph = info.at(phase)->func_graph; | FuncGraphPtr anf_graph = info.at(phase)->func_graph; | ||||
| #ifdef ENABLE_INFER | #ifdef ENABLE_INFER | ||||
| @@ -31,7 +31,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace pipeline { | namespace pipeline { | ||||
| namespace py = pybind11; | namespace py = pybind11; | ||||
| void SetGeOption(const std::map<std::string, std::string> &options); | void SetGeOption(const std::map<std::string, std::string> &options); | ||||
| @@ -50,7 +49,6 @@ bool InitExecDatasetGe(const std::string &queue_name, int64_t size, int64_t batc | |||||
| const std::vector<int64_t> &input_indexes, const std::string &phase); | const std::vector<int64_t> &input_indexes, const std::string &phase); | ||||
| void ExportDFGraph(const std::string &file_name, const std::string &phase); | void ExportDFGraph(const std::string &file_name, const std::string &phase); | ||||
| } // namespace pipeline | } // namespace pipeline | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -41,7 +41,7 @@ class AbstractFuncAtom : public AbstractFunction { | |||||
| AbstractFunctionPtr Join(const AbstractFunctionPtr &other) final; | AbstractFunctionPtr Join(const AbstractFunctionPtr &other) final; | ||||
| void Visit(std::function<void(const AbstractFuncAtomPtr &)>) const final; | void Visit(std::function<void(const AbstractFuncAtomPtr &)>) const final; | ||||
| bool operator==(const AbstractFunction &other) const; | |||||
| bool operator==(const AbstractFunction &other) const override; | |||||
| std::size_t hash() const override { return tid(); } | std::size_t hash() const override { return tid(); } | ||||
| }; | }; | ||||
| @@ -270,7 +270,7 @@ class TypedPrimitiveAbstractClosure : public AbstractFuncAtom { | |||||
| class DummyAbstractClosure : public AbstractFuncAtom { | class DummyAbstractClosure : public AbstractFuncAtom { | ||||
| public: | public: | ||||
| DummyAbstractClosure() = default; | DummyAbstractClosure() = default; | ||||
| ~DummyAbstractClosure() = default; | |||||
| ~DummyAbstractClosure() override = default; | |||||
| MS_DECLARE_PARENT(DummyAbstractClosure, AbstractFuncAtom) | MS_DECLARE_PARENT(DummyAbstractClosure, AbstractFuncAtom) | ||||
| EvaluatorPtr GetEvaluator(AnalysisEnginePtr) override { MS_LOG(EXCEPTION) << "A dummy function cannot eval."; } | EvaluatorPtr GetEvaluator(AnalysisEnginePtr) override { MS_LOG(EXCEPTION) << "A dummy function cannot eval."; } | ||||
| @@ -295,7 +295,6 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) { | |||||
| dic["shape"] = shape; | dic["shape"] = shape; | ||||
| dic["dtype"] = arg_slice->BuildType(); | dic["dtype"] = arg_slice->BuildType(); | ||||
| dic["value"] = BuildValue(arg_slice->BuildValue()); | dic["value"] = BuildValue(arg_slice->BuildValue()); | ||||
| } else if (abs_base->isa<AbstractTuple>()) { | } else if (abs_base->isa<AbstractTuple>()) { | ||||
| auto arg_tuple = dyn_cast<AbstractTuple>(abs_base); | auto arg_tuple = dyn_cast<AbstractTuple>(abs_base); | ||||
| size_t len = arg_tuple->size(); | size_t len = arg_tuple->size(); | ||||
| @@ -38,6 +38,7 @@ | |||||
| #include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h" | #include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h" | ||||
| #include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h" | #include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h" | ||||
| #include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h" | #include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h" | ||||
| #include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h" | |||||
| #include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h" | #include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h" | ||||
| #include "pre_activate/ascend/ir_fusion/transdata_split.h" | #include "pre_activate/ascend/ir_fusion/transdata_split.h" | ||||
| #include "pre_activate/ascend/ir_fission/topk_split.h" | #include "pre_activate/ascend/ir_fission/topk_split.h" | ||||
| @@ -46,7 +47,6 @@ | |||||
| #include "pre_activate/ascend/ir_fusion/mul_addn_fusion.h" | #include "pre_activate/ascend/ir_fusion/mul_addn_fusion.h" | ||||
| #include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h" | #include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h" | ||||
| #include "pre_activate/ascend/ir_fusion/remove_reshape_pair.h" | #include "pre_activate/ascend/ir_fusion/remove_reshape_pair.h" | ||||
| #include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h" | |||||
| #include "pre_activate/ascend/ir_fusion/derelu_fusion.h" | #include "pre_activate/ascend/ir_fusion/derelu_fusion.h" | ||||
| #include "pre_activate/ascend/format_type/insert_trans_op.h" | #include "pre_activate/ascend/format_type/insert_trans_op.h" | ||||
| #include "pre_activate/pass/getitem_tuple.h" | #include "pre_activate/pass/getitem_tuple.h" | ||||
| @@ -97,7 +97,6 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) { | |||||
| ir_fusion_pm->AddPass(std::make_shared<MatmulBiasaddFusion>()); | ir_fusion_pm->AddPass(std::make_shared<MatmulBiasaddFusion>()); | ||||
| ir_fusion_pm->AddPass(std::make_shared<AddnFission>()); | ir_fusion_pm->AddPass(std::make_shared<AddnFission>()); | ||||
| ir_fusion_pm->AddPass(std::make_shared<DereluFusion>()); | ir_fusion_pm->AddPass(std::make_shared<DereluFusion>()); | ||||
| ir_fusion_pm->AddPass(std::make_shared<ConfusionMulGradFusion>()); | |||||
| ir_fusion_pm->AddPass(std::make_shared<TransposeTransDataFusion>()); | ir_fusion_pm->AddPass(std::make_shared<TransposeTransDataFusion>()); | ||||
| ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>()); | ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>()); | ||||
| } | } | ||||
| @@ -267,6 +266,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern | |||||
| other_pm->AddPass(std::make_shared<AllReduceFusion>()); | other_pm->AddPass(std::make_shared<AllReduceFusion>()); | ||||
| other_pm->AddPass(std::make_shared<AllGatherFusion>()); | other_pm->AddPass(std::make_shared<AllGatherFusion>()); | ||||
| other_pm->AddPass(std::make_shared<ParameterTransOpFusion>()); | other_pm->AddPass(std::make_shared<ParameterTransOpFusion>()); | ||||
| other_pm->AddPass(std::make_shared<RefreshParameterFormat>()); | |||||
| other_pm->AddPass(std::make_shared<BufferFusion>()); | other_pm->AddPass(std::make_shared<BufferFusion>()); | ||||
| other_pm->AddPass(std::make_shared<GetitemTuple>()); | other_pm->AddPass(std::make_shared<GetitemTuple>()); | ||||
| other_pm->AddPass(std::make_shared<CommonSubexpressionElimination>()); | other_pm->AddPass(std::make_shared<CommonSubexpressionElimination>()); | ||||
| @@ -21,6 +21,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "device/ascend/kernel_select_ascend.h" | #include "device/ascend/kernel_select_ascend.h" | ||||
| #include "kernel/kernel_query.h" | #include "kernel/kernel_query.h" | ||||
| #include "kernel/tbe/tbe_kernel_select.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace opt { | namespace opt { | ||||
| @@ -36,6 +37,16 @@ class KernelSelect { | |||||
| }; | }; | ||||
| using KernelSelectPtr = std::shared_ptr<KernelSelect>; | using KernelSelectPtr = std::shared_ptr<KernelSelect>; | ||||
| class SupportedChecker { | |||||
| public: | |||||
| SupportedChecker() = default; | |||||
| virtual ~SupportedChecker() = default; | |||||
| virtual bool CheckSupported(const AnfNodePtr &anf_node, const kernel::KernelBuildInfoPtr &select_kernel_build_info) { | |||||
| return kernel::CheckSupported(anf_node, select_kernel_build_info); | |||||
| } | |||||
| }; | |||||
| using SupportedCheckerPtr = std::shared_ptr<SupportedChecker>; | |||||
| class KernelQuery { | class KernelQuery { | ||||
| public: | public: | ||||
| KernelQuery() = default; | KernelQuery() = default; | ||||
| @@ -17,12 +17,14 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <tuple> | #include <tuple> | ||||
| #include <utility> | |||||
| #include <unordered_set> | #include <unordered_set> | ||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <deque> | #include <deque> | ||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <iterator> | |||||
| #include "kernel/kernel_fusion.h" | #include "kernel/kernel_fusion.h" | ||||
| #include "debug/anf_ir_dump.h" | #include "debug/anf_ir_dump.h" | ||||
| @@ -260,33 +262,40 @@ CNodePtr CreateFusionOp(const std::vector<AnfNodePtr> &inputs_list, const std::v | |||||
| return buffer_fusion_kernel; | return buffer_fusion_kernel; | ||||
| } | } | ||||
| kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list_in, | |||||
| const std::vector<AnfNodePtr> &inputs_list, | |||||
| kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list, | |||||
| const std::vector<AnfNodePtr> &outputs_list) { | const std::vector<AnfNodePtr> &outputs_list) { | ||||
| MS_LOG(DEBUG) << "Start Create Kernel Info"; | MS_LOG(DEBUG) << "Start Create Kernel Info"; | ||||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; | kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; | ||||
| // inputs format and data type | // inputs format and data type | ||||
| std::vector<std::string> inputs_format; | std::vector<std::string> inputs_format; | ||||
| std::vector<TypeId> inputs_data_type; | std::vector<TypeId> inputs_data_type; | ||||
| for (auto node : inputs_list_in) { | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| auto &inputs = cnode->inputs(); | |||||
| for (size_t input_index = 1; input_index < inputs.size(); ++input_index) { | |||||
| if (std::find(inputs_list.begin(), inputs_list.end(), inputs[input_index]) != inputs_list.end()) { | |||||
| inputs_format.push_back(AnfAlgo::GetInputFormat(node, input_index - 1)); | |||||
| inputs_data_type.push_back(AnfAlgo::GetInputDeviceDataType(node, input_index - 1)); | |||||
| } | |||||
| for (const auto &input : inputs_list) { | |||||
| if (input->isa<CNode>() && AnfAlgo::GetCNodeName(input) == prim::kPrimTupleGetItem->name()) { | |||||
| auto tuple_getitem = input->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(tuple_getitem); | |||||
| inputs_format.push_back(AnfAlgo::GetOutputFormat( | |||||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||||
| inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( | |||||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||||
| } else { | |||||
| inputs_format.push_back(AnfAlgo::GetOutputFormat(input, 0)); | |||||
| inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(input, 0)); | |||||
| } | } | ||||
| } | } | ||||
| // outputs format and data type | // outputs format and data type | ||||
| std::vector<std::string> outputs_format; | std::vector<std::string> outputs_format; | ||||
| std::vector<TypeId> outputs_data_type; | std::vector<TypeId> outputs_data_type; | ||||
| for (size_t index = 0; index < outputs_list.size(); ++index) { | |||||
| for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) { | |||||
| auto kernel_with_index = AnfAlgo::VisitKernel(outputs_list[index], idx); | |||||
| outputs_format.push_back(AnfAlgo::GetOutputFormat(kernel_with_index.first, kernel_with_index.second)); | |||||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(kernel_with_index.first, kernel_with_index.second)); | |||||
| for (const auto &output : outputs_list) { | |||||
| if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||||
| auto tuple_getitem = output->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(tuple_getitem); | |||||
| outputs_format.push_back(AnfAlgo::GetOutputFormat( | |||||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( | |||||
| tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2)))))); | |||||
| } else { | |||||
| outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0)); | |||||
| outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0)); | |||||
| } | } | ||||
| } | } | ||||
| builder.SetInputsFormat(inputs_format); | builder.SetInputsFormat(inputs_format); | ||||
| @@ -320,140 +329,235 @@ AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::K | |||||
| return tuple_item; | return tuple_item; | ||||
| } | } | ||||
| void ReplaceOldNode(const std::vector<AnfNodePtr> &outputs_list, const AnfNodePtr &buffer_fusion_kernel, | |||||
| session::KernelGraph *kernel_graph) { | |||||
| void ReplaceInputNodeInOtherFusionScope(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||||
| int32_t fusion_id, const AnfNodePtr &output_item, | |||||
| const AnfNodePtr &replace_item) { | |||||
| for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) { | |||||
| auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(), | |||||
| output_item); | |||||
| if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) { | |||||
| MS_LOG(DEBUG) << "replace input of other pattern, id = " << id; | |||||
| *itr = replace_item; | |||||
| } | |||||
| } | |||||
| } | |||||
| void ReplaceOldNode(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||||
| const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | MS_EXCEPTION_IF_NULL(kernel_graph); | ||||
| auto manager = kernel_graph->manager(); | auto manager = kernel_graph->manager(); | ||||
| MS_EXCEPTION_IF_NULL(manager); | MS_EXCEPTION_IF_NULL(manager); | ||||
| if (outputs_list.size() == 1) { // single output | |||||
| (void)manager->Replace(outputs_list[0], buffer_fusion_kernel); | |||||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||||
| if (buffer_fusion_info.outputs_list.size() == 1) { // single output | |||||
| (void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel); | |||||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0], | |||||
| buffer_fusion_kernel); | |||||
| } else { // multiple output | } else { // multiple output | ||||
| size_t real_idx = 0; | |||||
| for (size_t index = 0; index < outputs_list.size(); ++index) { | |||||
| if (AnfAlgo::GetOutputTensorNum(outputs_list[index]) == 1) { | |||||
| auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++); | |||||
| (void)manager->Replace(outputs_list[index], tuple_item); | |||||
| } else { | |||||
| std::vector<AnfNodePtr> make_tuple_inputs; | |||||
| AbstractBasePtrList abstract_list; | |||||
| make_tuple_inputs.push_back(NewValueNode(prim::kPrimMakeTuple)); | |||||
| for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) { | |||||
| auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++); | |||||
| abstract_list.push_back(tuple_item->abstract()); | |||||
| make_tuple_inputs.push_back(tuple_item); | |||||
| for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) { | |||||
| auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index); | |||||
| (void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item); | |||||
| ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index], | |||||
| tuple_item); | |||||
| } | |||||
| } | |||||
| } | |||||
| void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| auto nodes = TopoSort(kernel_graph->get_return()); | |||||
| for (auto &node : nodes) { | |||||
| MS_EXCEPTION_IF_NULL(node); | |||||
| if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) { | |||||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(node, kOpAttrFusionId); | |||||
| (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node); | |||||
| } | |||||
| } | |||||
| } | |||||
| void GetFusionScopeInputNodeList(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| auto manager = kernel_graph->manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||||
| auto fusion_id = buffer_fusion_info.first; | |||||
| auto fusion_info = buffer_fusion_info.second; | |||||
| for (const auto &node : fusion_info.anf_nodes) { | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) { | |||||
| auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0); | |||||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) == | |||||
| fusion_info.anf_nodes.end()) { | |||||
| if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(), | |||||
| (*buffer_fusion_infos)[fusion_id].inputs_list.end(), | |||||
| cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) { | |||||
| (*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx)); | |||||
| } | |||||
| } | } | ||||
| AnfNodePtr make_tuple = kernel_graph->NewCNode(make_tuple_inputs); | |||||
| make_tuple->set_abstract(std::make_shared<abstract::AbstractTuple>(abstract_list)); | |||||
| (void)manager->Replace(outputs_list[index], make_tuple); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| void GetInputList(const CNodePtr &node, const int32_t cur_fusion_id, std::vector<AnfNodePtr> *inputs_list) { | |||||
| MS_EXCEPTION_IF_NULL(node); | |||||
| MS_EXCEPTION_IF_NULL(inputs_list); | |||||
| auto &inputs = node->inputs(); | |||||
| for (size_t input_index = 1; input_index < inputs.size(); ++input_index) { | |||||
| auto input = inputs[input_index]; | |||||
| if (AnfAlgo::IsRealCNodeKernel(input)) { | |||||
| if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) { | |||||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input, kOpAttrFusionId); | |||||
| if (fusion_id != cur_fusion_id) { | |||||
| inputs_list->push_back(input); | |||||
| bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) { | |||||
| MS_EXCEPTION_IF_NULL(node1); | |||||
| MS_EXCEPTION_IF_NULL(node2); | |||||
| auto getitem1 = node1->cast<CNodePtr>(); | |||||
| auto getitem2 = node2->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(getitem1); | |||||
| MS_EXCEPTION_IF_NULL(getitem2); | |||||
| auto output_idx1 = GetValue<int>(GetValueNode(getitem1->input(2))); | |||||
| auto output_idx2 = GetValue<int>(GetValueNode(getitem2->input(2))); | |||||
| return output_idx1 < output_idx2; | |||||
| } | |||||
| void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| auto manager = kernel_graph->manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | |||||
| auto fusion_id = buffer_fusion_info.first; | |||||
| auto fusion_info = buffer_fusion_info.second; | |||||
| for (const auto &node : fusion_info.anf_nodes) { | |||||
| if (AnfAlgo::GetOutputTensorNum(node) == 1) { | |||||
| for (auto use_node : manager->node_users()[node]) { | |||||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) == | |||||
| fusion_info.anf_nodes.end()) { | |||||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node); | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| } else { | } else { | ||||
| inputs_list->push_back(input); | |||||
| } | |||||
| } else if (input->isa<CNode>()) { | |||||
| for (auto &input_in : input->cast<CNodePtr>()->inputs()) { | |||||
| if (AnfAlgo::IsRealCNodeKernel(input_in)) { | |||||
| if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) { | |||||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input_in, kOpAttrFusionId); | |||||
| if (fusion_id != cur_fusion_id) { | |||||
| inputs_list->push_back(input); | |||||
| int prev_idx = 0; | |||||
| std::vector<AnfNodePtr> tuple_getitem_nodes; | |||||
| std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(), | |||||
| std::back_inserter(tuple_getitem_nodes), | |||||
| [](const std::pair<AnfNodePtr, int> &use_node) { return use_node.first; }); | |||||
| std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare); | |||||
| for (auto getitem : tuple_getitem_nodes) { | |||||
| auto getitem_ptr = getitem->cast<CNodePtr>(); | |||||
| auto input2 = getitem_ptr->input(2); | |||||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||||
| for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) { | |||||
| auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx)); | |||||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node); | |||||
| } | |||||
| prev_idx = output_idx + 1; | |||||
| for (auto item_use_node : manager->node_users()[getitem]) { | |||||
| if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) == | |||||
| fusion_info.anf_nodes.end()) { | |||||
| (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem); | |||||
| break; | |||||
| } | } | ||||
| } else { | |||||
| inputs_list->push_back(input); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } else { | |||||
| inputs_list->push_back(input); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| void CheckCurrentNodeIsInput(const CNodePtr &node, const int32_t &cur_fusion_id, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| if ((*buffer_fusion_infos).find(cur_fusion_id) == (*buffer_fusion_infos).end()) { | |||||
| BufferFusionInfo_t buffer_fusion_info; | |||||
| (*buffer_fusion_infos)[cur_fusion_id] = buffer_fusion_info; | |||||
| } | |||||
| std::vector<AnfNodePtr> inputs_list; | |||||
| GetInputList(node, cur_fusion_id, &inputs_list); | |||||
| if (!inputs_list.empty()) { | |||||
| if (!(*buffer_fusion_infos)[cur_fusion_id].inputs_list.empty()) { | |||||
| (void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list.insert( | |||||
| (*buffer_fusion_infos)[cur_fusion_id].inputs_list.end(), inputs_list.begin(), inputs_list.end()); | |||||
| (void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.insert( | |||||
| (*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.end(), node); | |||||
| void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector<AnfNodePtr> &outputs_list, | |||||
| const AnfNodePtr &fusion_kernel) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| auto manager = kernel_graph->manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| for (size_t idx = 0; idx < outputs_list.size(); ++idx) { | |||||
| auto output = outputs_list[idx]; | |||||
| if (output->isa<CNode>() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { | |||||
| auto real_output = AnfAlgo::VisitKernel(output, 0); | |||||
| auto output_cnode = output->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(output_cnode); | |||||
| auto input2 = output_cnode->input(2); | |||||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||||
| session::AnfWithOutIndex out_pair(real_output.first, output_idx); | |||||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||||
| } | |||||
| } else { | } else { | ||||
| (*buffer_fusion_infos)[cur_fusion_id].inputs_list = inputs_list; | |||||
| (*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.push_back(node); | |||||
| session::AnfWithOutIndex out_pair(output, 0); | |||||
| if (kernel_graph->IsInRefOutputMap(out_pair)) { | |||||
| auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); | |||||
| session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); | |||||
| kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| void InsertNode(const AnfNodePtr &node, std::vector<AnfNodePtr> *list) { | |||||
| MS_EXCEPTION_IF_NULL(list); | |||||
| if (std::find(list->begin(), list->end(), node) == list->end()) { | |||||
| (void)list->insert(list->end(), node); | |||||
| void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, | |||||
| std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(fused_set); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto conv = cnode->input(1); | |||||
| if (conv->isa<CNode>() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { | |||||
| std::vector<int> output_used_num{SizeToInt(manager->node_users()[conv].size())}; | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), conv); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, conv}; | |||||
| candidate_fusion->push_back(record); | |||||
| fused_set->insert(record.begin(), record.end()); | |||||
| } | } | ||||
| } | } | ||||
| void CheckCurrentNodeIsOutput(const CNodePtr &node, const int32_t &cur_fusion_id, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(node); | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| for (auto &input : node->inputs()) { | |||||
| MS_EXCEPTION_IF_NULL(input); | |||||
| if (AnfAlgo::IsRealCNodeKernel(input) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) { | |||||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input, kOpAttrFusionId); | |||||
| if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) { | |||||
| BufferFusionInfo_t buffer_fusion_info; | |||||
| (*buffer_fusion_infos)[fusion_id] = buffer_fusion_info; | |||||
| } | |||||
| if (fusion_id != cur_fusion_id) { | |||||
| InsertNode(input, &((*buffer_fusion_infos)[fusion_id].outputs_list)); | |||||
| } | |||||
| } else if (input->isa<CNode>()) { | |||||
| for (auto &input_in : input->cast<CNodePtr>()->inputs()) { | |||||
| if (AnfAlgo::IsRealCNodeKernel(input_in) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) { | |||||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input_in, kOpAttrFusionId); | |||||
| if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) { | |||||
| BufferFusionInfo_t buffer_fusion_info; | |||||
| (*buffer_fusion_infos)[fusion_id] = buffer_fusion_info; | |||||
| } | |||||
| if (fusion_id != cur_fusion_id) { | |||||
| InsertNode(input_in, &((*buffer_fusion_infos)[fusion_id].outputs_list)); | |||||
| } | |||||
| } | |||||
| } | |||||
| void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, | |||||
| std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(fused_set); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto getitem = relu_input->cast<CNodePtr>(); | |||||
| auto bnupdate = getitem->input(1); | |||||
| if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { | |||||
| std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); | |||||
| for (auto out_getitem : manager->node_users()[bnupdate]) { | |||||
| auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>(); | |||||
| auto input2 = out_getitem_ptr->input(2); | |||||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||||
| output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); | |||||
| } | } | ||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, bnupdate}; | |||||
| candidate_fusion->push_back(record); | |||||
| fused_set->insert(record.begin(), record.end()); | |||||
| } | } | ||||
| } | } | ||||
| void GetFusionScopeNodeList(const session::KernelGraph &kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) { | |||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | |||||
| auto nodes = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : nodes) { | |||||
| MS_EXCEPTION_IF_NULL(node); | |||||
| if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) { | |||||
| auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(node, kOpAttrFusionId); | |||||
| (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node); | |||||
| void MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, | |||||
| std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(fused_set); | |||||
| MS_EXCEPTION_IF_NULL(candidate_fusion); | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto add = relu_input->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(add); | |||||
| auto tuple_getitem = add->input(1); | |||||
| if (tuple_getitem->isa<CNode>() && AnfAlgo::GetCNodeName(tuple_getitem) == prim::kPrimTupleGetItem->name()) { | |||||
| auto getitem = tuple_getitem->cast<CNodePtr>(); | |||||
| auto bnupdate = getitem->input(1); | |||||
| if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { | |||||
| std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); | |||||
| for (auto out_getitem : manager->node_users()[bnupdate]) { | |||||
| auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>(); | |||||
| auto input2 = out_getitem_ptr->input(2); | |||||
| auto output_idx = GetValue<int>(GetValueNode(input2)); | |||||
| output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); | |||||
| } | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); | |||||
| std::unordered_set<AnfNodePtr> record{cnode, relu_input, bnupdate}; | |||||
| candidate_fusion->push_back(record); | |||||
| fused_set->insert(record.begin(), record.end()); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -470,15 +574,14 @@ void MatchOpNamePattern(const session::KernelGraph &kernel_graph, std::unordered | |||||
| auto cnode = node->cast<CNodePtr>(); | auto cnode = node->cast<CNodePtr>(); | ||||
| MS_EXCEPTION_IF_NULL(cnode); | MS_EXCEPTION_IF_NULL(cnode); | ||||
| if (AnfAlgo::GetCNodeName(cnode) == kBNTrainingReduceOpName) { | if (AnfAlgo::GetCNodeName(cnode) == kBNTrainingReduceOpName) { | ||||
| auto conv = cnode->input(1); | |||||
| if (conv->isa<CNode>() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { | |||||
| auto manager = kernel_graph.manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| auto &users = manager->node_users(); | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(users[conv].size()), conv); | |||||
| std::unordered_set<AnfNodePtr> record({cnode, conv}); | |||||
| candidate_fusion->push_back(record); | |||||
| fused_set->insert(record.begin(), record.end()); | |||||
| MatchConvBnreduce(cnode, kernel_graph, fused_set, candidate_fusion); | |||||
| } else if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || | |||||
| AnfAlgo::GetCNodeName(cnode) == prim::kPrimRelu->name()) { | |||||
| auto relu_input = cnode->input(1); | |||||
| if (relu_input->isa<CNode>() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTensorAdd->name()) { | |||||
| MatchBnupdateAddRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion); | |||||
| } else if (relu_input->isa<CNode>() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTupleGetItem->name()) { | |||||
| MatchBnupdateRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -536,31 +639,15 @@ void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, std::unord | |||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| void BufferFusion::GetBufferFusionInfo(const session::KernelGraph &kernel_graph, | |||||
| void BufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const { | std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const { | ||||
| MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | MS_EXCEPTION_IF_NULL(buffer_fusion_infos); | ||||
| std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); | |||||
| for (auto &node : node_list) { | |||||
| if (!AnfAlgo::IsRealCNodeKernel(node)) { | |||||
| continue; | |||||
| } | |||||
| int32_t cur_fusion_id = -1; | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) { | |||||
| cur_fusion_id = AnfAlgo::GetNodeAttr<int32_t>(cnode, kOpAttrFusionId); | |||||
| CheckCurrentNodeIsInput(cnode, cur_fusion_id, buffer_fusion_infos); | |||||
| } | |||||
| // Check if current node is output | |||||
| CheckCurrentNodeIsOutput(cnode, cur_fusion_id, buffer_fusion_infos); | |||||
| } | |||||
| GetFusionScopeNodeList(kernel_graph, buffer_fusion_infos); | |||||
| GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); | |||||
| GetFusionScopeInputNodeList(kernel_graph, buffer_fusion_infos); | |||||
| GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); | |||||
| for (auto &buffer_fusion_info : *buffer_fusion_infos) { | for (auto &buffer_fusion_info : *buffer_fusion_infos) { | ||||
| buffer_fusion_info.second.kernel_build_info = | buffer_fusion_info.second.kernel_build_info = | ||||
| CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list_in, buffer_fusion_info.second.inputs_list, | |||||
| buffer_fusion_info.second.outputs_list); | |||||
| CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); | |||||
| } | } | ||||
| } | } | ||||
| @@ -569,7 +656,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c | |||||
| bool change = false; | bool change = false; | ||||
| std::unordered_map<int32_t, BufferFusionInfo_t> buffer_fusion_infos; | std::unordered_map<int32_t, BufferFusionInfo_t> buffer_fusion_infos; | ||||
| buffer_fusion_infos.clear(); | buffer_fusion_infos.clear(); | ||||
| GetBufferFusionInfo(*kernel_graph, &buffer_fusion_infos); | |||||
| GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos); | |||||
| std::vector<mindspore::kernel::FusionScopeInfo> fusion_scope_infos; | std::vector<mindspore::kernel::FusionScopeInfo> fusion_scope_infos; | ||||
| for (auto &buffer_fusion_info : buffer_fusion_infos) { | for (auto &buffer_fusion_info : buffer_fusion_infos) { | ||||
| @@ -600,7 +687,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c | |||||
| MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed"; | MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed"; | ||||
| continue; | continue; | ||||
| } | } | ||||
| change = ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_mods[fusion_id], kernel_graph); | |||||
| change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph); | |||||
| } | } | ||||
| MS_LOG(DEBUG) << "End Buffer Fusion"; | MS_LOG(DEBUG) << "End Buffer Fusion"; | ||||
| return change; | return change; | ||||
| @@ -630,8 +717,10 @@ bool BufferFusion::MatchBufferFusionPattern(const session::KernelGraph &kernel_g | |||||
| return true; | return true; | ||||
| } | } | ||||
| bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr, | |||||
| bool BufferFusion::ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, | |||||
| int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr, | |||||
| session::KernelGraph *kernel_graph) const { | session::KernelGraph *kernel_graph) const { | ||||
| auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; | |||||
| auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, | auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, | ||||
| buffer_fusion_info.anf_nodes, kernel_graph); | buffer_fusion_info.anf_nodes, kernel_graph); | ||||
| AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); | AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); | ||||
| @@ -650,8 +739,8 @@ bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, | |||||
| } | } | ||||
| AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get()); | AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get()); | ||||
| AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get()); | AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get()); | ||||
| // replace node | |||||
| ReplaceOldNode(buffer_fusion_info.outputs_list, buffer_fusion, kernel_graph); | |||||
| SetFusionOpRefInfos(kernel_graph, buffer_fusion_info.outputs_list, buffer_fusion); | |||||
| ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph); | |||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -30,7 +30,6 @@ namespace opt { | |||||
| struct BufferFusionInfo_t { | struct BufferFusionInfo_t { | ||||
| std::vector<AnfNodePtr> anf_nodes; | std::vector<AnfNodePtr> anf_nodes; | ||||
| std::vector<AnfNodePtr> inputs_list; | std::vector<AnfNodePtr> inputs_list; | ||||
| std::vector<AnfNodePtr> inputs_list_in; | |||||
| std::vector<AnfNodePtr> outputs_list; | std::vector<AnfNodePtr> outputs_list; | ||||
| kernel::KernelBuildInfoPtr kernel_build_info; | kernel::KernelBuildInfoPtr kernel_build_info; | ||||
| }; | }; | ||||
| @@ -44,10 +43,10 @@ class BufferFusion : public Pass { | |||||
| bool Run(const FuncGraphPtr &graph) override; | bool Run(const FuncGraphPtr &graph) override; | ||||
| private: | private: | ||||
| void GetBufferFusionInfo(const session::KernelGraph &kernel_graph, | |||||
| void GetBufferFusionInfo(session::KernelGraph *kernel_graph, | |||||
| std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const; | std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const; | ||||
| bool ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr, | |||||
| session::KernelGraph *kernel_graph) const; | |||||
| bool ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id, | |||||
| const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const; | |||||
| bool MatchBufferFusionPattern(const session::KernelGraph &kernel_graph) const; | bool MatchBufferFusionPattern(const session::KernelGraph &kernel_graph) const; | ||||
| bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const; | bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const; | ||||
| }; | }; | ||||
| @@ -16,6 +16,9 @@ | |||||
| #include "pre_activate/ascend/ir_fission/topk_split.h" | #include "pre_activate/ascend/ir_fission/topk_split.h" | ||||
| #include <vector> | #include <vector> | ||||
| #include <memory> | #include <memory> | ||||
| #include <unordered_set> | |||||
| #include "pre_activate/common/helper.h" | |||||
| #include "kernel/kernel_build_info.h" | |||||
| #include "utils/utils.h" | #include "utils/utils.h" | ||||
| #include "session/kernel_graph.h" | #include "session/kernel_graph.h" | ||||
| #include "session/anf_runtime_algorithm.h" | #include "session/anf_runtime_algorithm.h" | ||||
| @@ -25,6 +28,7 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace opt { | namespace opt { | ||||
| constexpr size_t kFloat16Len = 2; // size of float16; | constexpr size_t kFloat16Len = 2; // size of float16; | ||||
| constexpr size_t kTopkIndexK = 1; | |||||
| namespace { | namespace { | ||||
| tensor::TensorPtr CreateTensor(const AnfNodePtr &node) { | tensor::TensorPtr CreateTensor(const AnfNodePtr &node) { | ||||
| // 1 create tensor | // 1 create tensor | ||||
| @@ -70,37 +74,68 @@ ValueNodePtr CreateValueNode(const AnfNodePtr &node) { | |||||
| AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), indices_const.get()); | AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), indices_const.get()); | ||||
| return indices_const; | return indices_const; | ||||
| } | } | ||||
| kernel::KernelBuildInfoPtr CreateKernelBuildInfo() { | |||||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; | |||||
| builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT}); | |||||
| builder.SetOutputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT}); | |||||
| builder.SetInputsDeviceType({kNumberTypeFloat16, kNumberTypeFloat16}); | |||||
| builder.SetOutputsDeviceType({kNumberTypeFloat16, kNumberTypeInt32}); | |||||
| return builder.Build(); | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| const BaseRef TopKSplit::DefinePattern() const { | const BaseRef TopKSplit::DefinePattern() const { | ||||
| VarPtr X = std::make_shared<Var>(); | |||||
| MS_EXCEPTION_IF_NULL(X); | |||||
| VarPtr X1 = std::make_shared<Var>(); | |||||
| VarPtr X2 = std::make_shared<Var>(); | |||||
| auto prim = std::make_shared<Primitive>(kTopKOpName); | auto prim = std::make_shared<Primitive>(kTopKOpName); | ||||
| MS_EXCEPTION_IF_NULL(prim); | |||||
| return VectorRef({prim, X}); | |||||
| return VectorRef({prim, X1, X2}); | |||||
| } | } | ||||
| const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const { | const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const { | ||||
| MS_EXCEPTION_IF_NULL(func_graph); | MS_EXCEPTION_IF_NULL(func_graph); | ||||
| MS_EXCEPTION_IF_NULL(node); | MS_EXCEPTION_IF_NULL(node); | ||||
| auto kernel_graph = func_graph->cast<KernelGraphPtr>(); | auto kernel_graph = func_graph->cast<KernelGraphPtr>(); | ||||
| auto indices_const = CreateValueNode(node); | |||||
| // set value node as topk's input | // set value node as topk's input | ||||
| auto cnode = node->cast<CNodePtr>(); | auto cnode = node->cast<CNodePtr>(); | ||||
| MS_EXCEPTION_IF_NULL(cnode); | MS_EXCEPTION_IF_NULL(cnode); | ||||
| MS_LOG(INFO) << "already has input size: " << cnode->inputs().size(); | |||||
| cnode->add_input(indices_const); | |||||
| // Copy a new node to check supported. | |||||
| std::vector<AnfNodePtr> new_inputs{NewValueNode(std::make_shared<Primitive>(kTopKOpName))}; | |||||
| new_inputs.insert(new_inputs.end(), cnode->inputs().begin() + 1, cnode->inputs().end()); | |||||
| CNodePtr new_cnode = func_graph->NewCNode(new_inputs); | |||||
| MS_EXCEPTION_IF_NULL(new_cnode); | |||||
| new_cnode->set_abstract(cnode->abstract()); | |||||
| new_cnode->set_scope(cnode->scope()); | |||||
| AnfAlgo::CopyNodeAttrs(cnode, new_cnode); | |||||
| CheckCNodeInputSize(new_cnode, kTopkInputNum); | |||||
| // Convert the tensor input to scalar and convert it to attr | |||||
| auto input_k = new_cnode->input(kTopkIndexK + 1); | |||||
| MS_EXCEPTION_IF_NULL(input_k); | |||||
| if (!IsValueNode<tensor::Tensor>(input_k)) { | |||||
| return nullptr; | |||||
| } | |||||
| ValuePtr value = GetValueNode(input_k); | |||||
| MS_EXCEPTION_IF_NULL(value); | |||||
| auto tensor = value->cast<tensor::TensorPtr>(); | |||||
| MS_EXCEPTION_IF_NULL(tensor); | |||||
| int32_t *data = reinterpret_cast<int32_t *>(tensor->data_c()); | |||||
| MS_EXCEPTION_IF_NULL(data); | |||||
| auto new_value_node = std::make_shared<ValueNode>(MakeValue(*data)); | |||||
| new_cnode->set_input(kTopkIndexK + 1, new_value_node); | |||||
| std::unordered_set<size_t> attr_index{kTopkIndexK}; | |||||
| ConstInputToAttr(new_cnode, attr_index); | |||||
| auto indices_const = CreateValueNode(new_cnode); | |||||
| new_cnode->add_input(indices_const); | |||||
| MS_EXCEPTION_IF_NULL(supported_checker_); | |||||
| if (!supported_checker_->CheckSupported(new_cnode, CreateKernelBuildInfo())) { | |||||
| return nullptr; | |||||
| } | |||||
| if (kernel_graph != nullptr) { | if (kernel_graph != nullptr) { | ||||
| kernel_graph->AddValueNodeToGraph(indices_const); | kernel_graph->AddValueNodeToGraph(indices_const); | ||||
| } | } | ||||
| CNodePtr new_cnode = nullptr; | |||||
| if (kernel_graph == nullptr) { | |||||
| new_cnode = std::make_shared<CNode>(*cnode); | |||||
| } else { | |||||
| new_cnode = kernel_graph->NewCNode(cnode); | |||||
| } | |||||
| MS_EXCEPTION_IF_NULL(new_cnode); | |||||
| return new_cnode; | return new_cnode; | ||||
| } | } | ||||
| } // namespace opt | } // namespace opt | ||||
| @@ -16,15 +16,22 @@ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_ | #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_ | ||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_ | #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_ | ||||
| #include <memory> | |||||
| #include "pre_activate/common/optimizer.h" | #include "pre_activate/common/optimizer.h" | ||||
| #include "pre_activate/ascend/ascend_helper.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace opt { | namespace opt { | ||||
| class TopKSplit : public PatternProcessPass { | class TopKSplit : public PatternProcessPass { | ||||
| public: | public: | ||||
| explicit TopKSplit(bool multigraph = true) : PatternProcessPass("topk_split", multigraph) {} | |||||
| explicit TopKSplit(bool multigraph = true) | |||||
| : PatternProcessPass("topk_split", multigraph), supported_checker_(std::make_shared<SupportedChecker>()) {} | |||||
| ~TopKSplit() override = default; | ~TopKSplit() override = default; | ||||
| const BaseRef DefinePattern() const override; | const BaseRef DefinePattern() const override; | ||||
| const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; | const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; | ||||
| private: | |||||
| SupportedCheckerPtr supported_checker_; | |||||
| }; | }; | ||||
| } // namespace opt | } // namespace opt | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -72,6 +72,38 @@ AnfNodePtr GetMul0(const FuncGraphPtr &graph, const AnfNodePtr &input2, const An | |||||
| } | } | ||||
| return mul0; | return mul0; | ||||
| } | } | ||||
| bool QuitFusion(const FuncGraphPtr &graph, const AnfNodePtr &mul0_anf, const AnfNodePtr &reduce_sum) { | |||||
| MS_EXCEPTION_IF_NULL(graph); | |||||
| MS_EXCEPTION_IF_NULL(mul0_anf); | |||||
| MS_EXCEPTION_IF_NULL(reduce_sum); | |||||
| if (!mul0_anf->isa<CNode>()) { | |||||
| return true; | |||||
| } | |||||
| auto mul0 = mul0_anf->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(mul0); | |||||
| // when network is _VirtualDatasetCell, quit fusion | |||||
| if (mul0->fullname_with_scope().find("network-_VirtualDatasetCell") != std::string::npos) { | |||||
| return true; | |||||
| } | |||||
| auto manager = graph->manager(); | |||||
| MS_EXCEPTION_IF_NULL(manager); | |||||
| if (manager->node_users().find(reduce_sum) == manager->node_users().end()) { | |||||
| MS_LOG(EXCEPTION) << "node has no output in manager"; | |||||
| } | |||||
| const AnfNodeIndexSet &outputs_set = manager->node_users()[reduce_sum]; | |||||
| auto it = std::find_if(outputs_set.begin(), outputs_set.end(), [&mul0](const std::pair<AnfNodePtr, int> &node_index) { | |||||
| return node_index.first == mul0->input(1) || node_index.first == mul0; | |||||
| }); | |||||
| if (it != outputs_set.end()) { | |||||
| MS_LOG(INFO) << "ReduceSum's output node is mul0's input or mul0! If do fusion, graph will exist a circle"; | |||||
| return true; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| const BaseRef ConfusionMulGradFusion::DefinePattern() const { | const BaseRef ConfusionMulGradFusion::DefinePattern() const { | ||||
| @@ -90,9 +122,6 @@ const AnfNodePtr ConfusionMulGradFusion::Process(const FuncGraphPtr &graph, cons | |||||
| auto reduce_sum = node->cast<CNodePtr>(); | auto reduce_sum = node->cast<CNodePtr>(); | ||||
| MS_EXCEPTION_IF_NULL(reduce_sum); | MS_EXCEPTION_IF_NULL(reduce_sum); | ||||
| auto mul1 = reduce_sum->input(1); | auto mul1 = reduce_sum->input(1); | ||||
| if (mul1->fullname_with_scope().find("bert/encoder") == std::string::npos) { | |||||
| return nullptr; | |||||
| } | |||||
| if (IsUsedByOthers(graph, mul1)) { | if (IsUsedByOthers(graph, mul1)) { | ||||
| MS_LOG(INFO) << "Mul1 is used by others, quit fusion!"; | MS_LOG(INFO) << "Mul1 is used by others, quit fusion!"; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -102,6 +131,9 @@ const AnfNodePtr ConfusionMulGradFusion::Process(const FuncGraphPtr &graph, cons | |||||
| MS_LOG(INFO) << "Mul0 do not exist, quit fusion"; | MS_LOG(INFO) << "Mul0 do not exist, quit fusion"; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (QuitFusion(graph, mul0, node)) { | |||||
| return nullptr; | |||||
| } | |||||
| auto fusion_node = CreateFusionNode(graph, reduce_sum, mul0, input3); | auto fusion_node = CreateFusionNode(graph, reduce_sum, mul0, input3); | ||||
| std::vector<AnfNodePtr> fusion_node_outputs; | std::vector<AnfNodePtr> fusion_node_outputs; | ||||
| @@ -0,0 +1,71 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "utils/utils.h" | |||||
| #include "operator/ops.h" | |||||
| #include "device/kernel_info.h" | |||||
| #include "pre_activate/common/helper.h" | |||||
| #include "pre_activate/common/optimizer.h" | |||||
| #include "pre_activate/ascend/ascend_helper.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| void DoRefresh(const CNodePtr &cnode) { | |||||
| if (cnode == nullptr) { | |||||
| MS_LOG(EXCEPTION) << "node is nullptr"; | |||||
| } | |||||
| for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); input_index++) { | |||||
| auto input_kernel_node = AnfAlgo::GetInputNode(cnode, input_index); | |||||
| if (input_kernel_node->isa<Parameter>()) { | |||||
| std::shared_ptr<kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder = | |||||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||||
| auto cnode_input_format = AnfAlgo::GetInputFormat(cnode, input_index); | |||||
| auto kernel_node_format = AnfAlgo::GetOutputFormat(input_kernel_node, 0); | |||||
| auto dtype = AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0); | |||||
| if (kernel_node_format != cnode_input_format) { | |||||
| builder->SetOutputsFormat({cnode_input_format}); | |||||
| builder->SetOutputsDeviceType({dtype}); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get()); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| bool RefreshParameterFormat::Run(const FuncGraphPtr &func_graph) { | |||||
| if (func_graph == nullptr) { | |||||
| MS_LOG(ERROR) << "func_graph is nullptr."; | |||||
| return false; | |||||
| } | |||||
| std::vector<AnfNodePtr> node_list = TopoSort(func_graph->get_return()); | |||||
| for (auto node : node_list) { | |||||
| if (node == nullptr || !node->isa<CNode>()) { | |||||
| continue; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| if (cnode == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto node_name = AnfAlgo::GetCNodeName(cnode); | |||||
| if (node_name == kBNTrainingUpdateOpName) { | |||||
| DoRefresh(cnode); | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,40 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ | |||||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include <utility> | |||||
| #include "ir/anf.h" | |||||
| #include "pre_activate/common/pass.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| class RefreshParameterFormat : public Pass { | |||||
| public: | |||||
| explicit RefreshParameterFormat(size_t groups = 1) : Pass("refresh_parameter_format"), groups_(groups) {} | |||||
| ~RefreshParameterFormat() override = default; | |||||
| bool Run(const FuncGraphPtr &graph) override; | |||||
| private: | |||||
| size_t groups_ = 1; | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_ | |||||
| @@ -299,6 +299,10 @@ tensor::TensorPtr CreateTensorWithValueTuple(const ValueTuplePtr &value_tuple_pt | |||||
| tensor::TensorPtr CreateTupleTensor(const ValueTuplePtr &value_tuple) { | tensor::TensorPtr CreateTupleTensor(const ValueTuplePtr &value_tuple) { | ||||
| MS_EXCEPTION_IF_NULL(value_tuple); | MS_EXCEPTION_IF_NULL(value_tuple); | ||||
| tensor::TensorPtr tensor = nullptr; | tensor::TensorPtr tensor = nullptr; | ||||
| if (value_tuple->value().empty()) { | |||||
| MS_LOG(WARNING) << "The value tuple is empty."; | |||||
| return nullptr; | |||||
| } | |||||
| ValuePtr v = *(value_tuple->value().begin()); | ValuePtr v = *(value_tuple->value().begin()); | ||||
| MS_EXCEPTION_IF_NULL(v); | MS_EXCEPTION_IF_NULL(v); | ||||
| // Currently we only deal with the scalar tuple | // Currently we only deal with the scalar tuple | ||||
| @@ -422,5 +426,47 @@ AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePt | |||||
| AnfAlgo::SetOutputInferTypeAndShape({origin_type}, {origin_shape}, tuple_getitem.get()); | AnfAlgo::SetOutputInferTypeAndShape({origin_type}, {origin_shape}, tuple_getitem.get()); | ||||
| return tuple_getitem; | return tuple_getitem; | ||||
| } | } | ||||
| void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| std::vector<AnfNodePtr> new_inputs; | |||||
| std::vector<std::string> new_input_names; | |||||
| auto primitive = AnfAlgo::GetCNodePrimitive(cnode); | |||||
| MS_EXCEPTION_IF_NULL(primitive); | |||||
| auto input_names = primitive->GetAttr(kAttrInputNames); | |||||
| if (input_names == nullptr) { | |||||
| MS_LOG(DEBUG) << "input_names are nullptr in cnode[" + cnode->DebugString() + "]"; | |||||
| return; | |||||
| } | |||||
| auto input_names_vec = GetValue<std::vector<std::string>>(input_names); | |||||
| auto inputs = cnode->inputs(); | |||||
| new_inputs.push_back(inputs[0]); | |||||
| bool need_update = false; | |||||
| for (size_t i = 0; i < inputs.size() - 1; ++i) { | |||||
| auto input_node = inputs[i + 1]; | |||||
| MS_EXCEPTION_IF_NULL(input_node); | |||||
| if (input_attrs.find(i) != input_attrs.end() && input_node->isa<ValueNode>()) { | |||||
| auto value_node = input_node->cast<ValueNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(value_node); | |||||
| MS_LOG(DEBUG) << "start erase input[" << i << "] of cnode[" + cnode->DebugString() + "]"; | |||||
| if (i >= input_names_vec.size()) { | |||||
| MS_LOG(EXCEPTION) << "index " << i << " is larger than input names size [" << input_names_vec.size() << "]"; | |||||
| } | |||||
| primitive->set_attr(input_names_vec[i], value_node->value()); | |||||
| need_update = true; | |||||
| } else { | |||||
| new_inputs.push_back(input_node); | |||||
| if (i < input_names_vec.size()) { | |||||
| new_input_names.push_back(input_names_vec[i]); | |||||
| } | |||||
| } | |||||
| } | |||||
| if (need_update) { | |||||
| // Update cnode's inputs | |||||
| cnode->set_inputs(new_inputs); | |||||
| // Update cnode's input_names attr | |||||
| primitive->set_attr(kAttrInputNames, MakeValue(new_input_names)); | |||||
| } | |||||
| } | |||||
| } // namespace opt | } // namespace opt | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| #include <unordered_set> | |||||
| #include "ir/func_graph.h" | #include "ir/func_graph.h" | ||||
| #include "session/kernel_graph.h" | #include "session/kernel_graph.h" | ||||
| #include "common/utils.h" | #include "common/utils.h" | ||||
| @@ -86,6 +87,7 @@ constexpr size_t kAdamApplyOneOutputNum = 3; | |||||
| constexpr size_t kBackendTransDataInputNum = 2; | constexpr size_t kBackendTransDataInputNum = 2; | ||||
| constexpr size_t kApplyMomentumInputNum = 6; | constexpr size_t kApplyMomentumInputNum = 6; | ||||
| constexpr size_t kBiasAddInputNum = 3; | constexpr size_t kBiasAddInputNum = 3; | ||||
| constexpr size_t kTopkInputNum = 3; | |||||
| enum FusedBatchNormInput { | enum FusedBatchNormInput { | ||||
| kX = 1, | kX = 1, | ||||
| @@ -150,6 +152,8 @@ void RemoveNopNode(session::KernelGraph *const graph); | |||||
| AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t output_idx); | AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t output_idx); | ||||
| bool IsUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node); | bool IsUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node); | ||||
| void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs); | |||||
| } // namespace opt | } // namespace opt | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_HELPER_H_ | #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_HELPER_H_ | ||||
| @@ -52,7 +52,6 @@ ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() { | |||||
| Register(kFlattenGradOpName, {1}); | Register(kFlattenGradOpName, {1}); | ||||
| Register(kExpandDimsOpName, {1}); | Register(kExpandDimsOpName, {1}); | ||||
| Register(kSplitOpName, {0}); | Register(kSplitOpName, {0}); | ||||
| Register(kTopKOpName, {1}); | |||||
| Register(kErfOpName, {1}); | Register(kErfOpName, {1}); | ||||
| Register(kSparseApplyAdagradOpName, {2}); | Register(kSparseApplyAdagradOpName, {2}); | ||||
| Register(kResizeNearestNeighborGrad, {1}); | Register(kResizeNearestNeighborGrad, {1}); | ||||
| @@ -18,10 +18,10 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <string> | #include <string> | ||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <unordered_set> | |||||
| #include <memory> | #include <memory> | ||||
| #include "pre_activate/pass/const_input_to_attr_registry.h" | #include "pre_activate/pass/const_input_to_attr_registry.h" | ||||
| #include "pre_activate/common/helper.h" | |||||
| #include "utils/utils.h" | #include "utils/utils.h" | ||||
| #include "utils/context/ms_context.h" | #include "utils/context/ms_context.h" | ||||
| #include "operator/ops.h" | #include "operator/ops.h" | ||||
| @@ -29,50 +29,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace opt { | namespace opt { | ||||
| namespace { | |||||
| void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| std::vector<AnfNodePtr> new_inputs; | |||||
| std::vector<std::string> new_input_names; | |||||
| auto primitive = AnfAlgo::GetCNodePrimitive(cnode); | |||||
| MS_EXCEPTION_IF_NULL(primitive); | |||||
| auto input_names = primitive->GetAttr(kAttrInputNames); | |||||
| if (input_names == nullptr) { | |||||
| MS_LOG(DEBUG) << "input_names are nullptr in cnode[" + cnode->DebugString() + "]"; | |||||
| return; | |||||
| } | |||||
| auto input_names_vec = GetValue<std::vector<std::string>>(input_names); | |||||
| auto inputs = cnode->inputs(); | |||||
| new_inputs.push_back(inputs[0]); | |||||
| bool need_update = false; | |||||
| for (size_t i = 0; i < inputs.size() - 1; ++i) { | |||||
| auto input_node = inputs[i + 1]; | |||||
| MS_EXCEPTION_IF_NULL(input_node); | |||||
| if (input_attrs.find(i) != input_attrs.end() && input_node->isa<ValueNode>()) { | |||||
| auto value_node = input_node->cast<ValueNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(value_node); | |||||
| MS_LOG(DEBUG) << "start erase input[" << i << "] of cnode[" + cnode->DebugString() + "]"; | |||||
| if (i >= input_names_vec.size()) { | |||||
| MS_LOG(EXCEPTION) << "index " << i << " is larger than input names size [" << input_names_vec.size() << "]"; | |||||
| } | |||||
| primitive->set_attr(input_names_vec[i], value_node->value()); | |||||
| need_update = true; | |||||
| } else { | |||||
| new_inputs.push_back(input_node); | |||||
| if (i < input_names_vec.size()) { | |||||
| new_input_names.push_back(input_names_vec[i]); | |||||
| } | |||||
| } | |||||
| } | |||||
| if (need_update) { | |||||
| // Update cnode's inputs | |||||
| cnode->set_inputs(new_inputs); | |||||
| // Update cnode's input_names attr | |||||
| primitive->set_attr(kAttrInputNames, MakeValue(new_input_names)); | |||||
| } | |||||
| } | |||||
| } // namespace | |||||
| const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const AnfNodePtr &node, | const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const AnfNodePtr &node, | ||||
| const EquivPtr &) const { | const EquivPtr &) const { | ||||
| if (node == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) { | if (node == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) { | ||||
| @@ -825,6 +825,8 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n | |||||
| static std::map<std::string, std::map<size_t, size_t>> spec_node_list = { | static std::map<std::string, std::map<size_t, size_t>> spec_node_list = { | ||||
| {prim::kPrimConv2DBackpropInput->name(), {{0, 1}, {1, 0}}}, | {prim::kPrimConv2DBackpropInput->name(), {{0, 1}, {1, 0}}}, | ||||
| {prim::kPrimConv2DBackpropFilter->name(), {{0, 1}, {1, 0}}}, | {prim::kPrimConv2DBackpropFilter->name(), {{0, 1}, {1, 0}}}, | ||||
| {kFusionOpConv2DBackpropInputReluGradV2Name, {{0, 1}, {1, 0}}}, | |||||
| {kFusionOpConv2DBackpropInputAddNReluGradV2Name, {{0, 1}, {1, 0}}}, | |||||
| {prim::kPrimLogSoftmaxGrad->name(), {{0, 1}, {1, 0}}}, | {prim::kPrimLogSoftmaxGrad->name(), {{0, 1}, {1, 0}}}, | ||||
| {prim::kPrimLayerNormGrad->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}, {4, 4}}}, | {prim::kPrimLayerNormGrad->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}, {4, 4}}}, | ||||
| {prim::kPrimLayerNormBetaGammaBackprop->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}}}, | {prim::kPrimLayerNormBetaGammaBackprop->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}}}, | ||||
| @@ -835,7 +837,7 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n | |||||
| auto node_name = AnfAlgo::GetCNodeName(anf_node); | auto node_name = AnfAlgo::GetCNodeName(anf_node); | ||||
| if (AnfAlgo::GetKernelType(anf_node) == TBE_KERNEL) { | if (AnfAlgo::GetKernelType(anf_node) == TBE_KERNEL) { | ||||
| auto find = spec_node_list.find(node_name); | auto find = spec_node_list.find(node_name); | ||||
| if (find != spec_node_list.end()) { | |||||
| if (find != spec_node_list.end() && cur_index < find->second.size()) { | |||||
| ret = find->second[cur_index]; | ret = find->second[cur_index]; | ||||
| MS_LOG(INFO) << "Real input index change to" << ret << ", node name:" << node_name; | MS_LOG(INFO) << "Real input index change to" << ret << ", node name:" << node_name; | ||||
| } | } | ||||
| @@ -171,20 +171,17 @@ GeTensorPtr TransformUtil::ConvertTensor(const MeTensorPtr &tensor, const std::s | |||||
| MS_LOG(ERROR) << "The Me Tensor data type size is wrong, type size is: " << type_size; | MS_LOG(ERROR) << "The Me Tensor data type size is wrong, type size is: " << type_size; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| // get tensor buff size | |||||
| size_t data_buff_size = 0; | |||||
| size_t elements_num = IntToSize(tensor->ElementsNum()); | size_t elements_num = IntToSize(tensor->ElementsNum()); | ||||
| if (elements_num > 0 && type_size > 0 && UINT_MAX / type_size >= elements_num) { | |||||
| data_buff_size = elements_num * type_size; | |||||
| if (UINT_MAX / type_size < elements_num) { | |||||
| MS_LOG(ERROR) << "The required Me Tensor data buff size " << elements_num << " x " << type_size | |||||
| << " overflowed UINT_MAX: " << UINT_MAX << "."; | |||||
| return nullptr; | |||||
| } | } | ||||
| // get tensor buff size | |||||
| size_t data_buff_size = elements_num * type_size; | |||||
| if (data_buff_size == 0) { | if (data_buff_size == 0) { | ||||
| if (elements_num > 0 && type_size > 0 && UINT_MAX / type_size < elements_num) { | |||||
| MS_LOG(ERROR) << "The required Me Tensor data buff size " << elements_num << " x " << type_size | |||||
| << " overflowed UINT_MAX: " << UINT_MAX << "."; | |||||
| } else { | |||||
| MS_LOG(ERROR) << "The Me Tensor data buff size is 0."; | |||||
| } | |||||
| return nullptr; | |||||
| MS_LOG(INFO) << "The Me Tensor data buff size is 0."; | |||||
| } | } | ||||
| // create ge tensor | // create ge tensor | ||||
| auto desc = GetGeTensorDesc(tensor->shape_c(), tensor->data_type(), format); | auto desc = GetGeTensorDesc(tensor->shape_c(), tensor->data_type(), format); | ||||
| @@ -359,7 +359,12 @@ void MsContext::GetGeOptions(std::map<std::string, std::string> *ge_options) con | |||||
| } | } | ||||
| // Enable auto mixed precision according to the context options | // Enable auto mixed precision according to the context options | ||||
| (*ge_options)["ge.exec.auto_mix_precision"] = std::to_string(auto_mixed_precision_flag_); | |||||
| if (auto_mixed_precision_flag_) { | |||||
| (*ge_options)["ge.exec.precision_mode"] = "allow_mix_precision"; | |||||
| } else { | |||||
| (*ge_options)["ge.exec.precision_mode"] = "must_keep_origin_dtype"; | |||||
| } | |||||
| // Disable the global variable acc, only enable it whlie adding training graph in pipeline | // Disable the global variable acc, only enable it whlie adding training graph in pipeline | ||||
| (*ge_options)["ge.exec.variable_acc"] = "0"; | (*ge_options)["ge.exec.variable_acc"] = "0"; | ||||
| #endif | #endif | ||||
| @@ -438,4 +443,18 @@ bool MsContext::PynativeInitGe() { | |||||
| is_pynative_ge_init_ = true; | is_pynative_ge_init_ = true; | ||||
| return true; | return true; | ||||
| } | } | ||||
| bool MsContext::IsTsdOpened() { | |||||
| if (tsd_ref_ > 0) { | |||||
| return true; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| bool MsContext::IsGeInited() { | |||||
| if (ge_ref_ > 0) { | |||||
| return true; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -82,8 +82,10 @@ class MsContext { | |||||
| bool OpenTsd(); | bool OpenTsd(); | ||||
| bool CloseTsd(bool force = false); | bool CloseTsd(bool force = false); | ||||
| bool IsTsdOpened(); | |||||
| bool InitGe(); | bool InitGe(); | ||||
| bool FinalizeGe(bool force = false); | bool FinalizeGe(bool force = false); | ||||
| bool IsGeInited(); | |||||
| void set_enable_hccl(bool enable_hccl) { enable_hccl_ = enable_hccl; } | void set_enable_hccl(bool enable_hccl) { enable_hccl_ = enable_hccl; } | ||||
| bool enable_hccl() const { return enable_hccl_; } | bool enable_hccl() const { return enable_hccl_; } | ||||
| bool PynativeInitGe(); | bool PynativeInitGe(); | ||||
| @@ -122,6 +122,10 @@ constexpr auto kSendOpName = "Send"; | |||||
| constexpr auto kRecvOpName = "Recv"; | constexpr auto kRecvOpName = "Recv"; | ||||
| constexpr auto kReluV2OpName = "ReLUV2"; | constexpr auto kReluV2OpName = "ReLUV2"; | ||||
| constexpr auto kReluGradV2OpName = "ReluGradV2"; | constexpr auto kReluGradV2OpName = "ReluGradV2"; | ||||
| constexpr auto kAddNOpName = "AddN"; | |||||
| constexpr auto kConv2DBackpropInputOpName = "Conv2DBackpropInput"; | |||||
| constexpr auto kFusionOpConv2DBackpropInputReluGradV2Name = "FusionOp_Conv2DBackpropInput_ReluGradV2"; | |||||
| constexpr auto kFusionOpConv2DBackpropInputAddNReluGradV2Name = "FusionOp_Conv2DBackpropInput_AddN_ReluGradV2"; | |||||
| // attr key name | // attr key name | ||||
| constexpr auto kAttrInputNames = "input_names"; | constexpr auto kAttrInputNames = "input_names"; | ||||
| @@ -22,7 +22,7 @@ from mindspore import context | |||||
| from mindspore import log as logger | from mindspore import log as logger | ||||
| from mindspore.parallel._utils import _get_parallel_mode | from mindspore.parallel._utils import _get_parallel_mode | ||||
| from .._c_expression import generate_key, Executor_, Tensor, MetaTensor | from .._c_expression import generate_key, Executor_, Tensor, MetaTensor | ||||
| from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_ge | |||||
| from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend | |||||
| from .tensor import Tensor as MsTensor | from .tensor import Tensor as MsTensor | ||||
| # store ms_function class compiled pipeline cache | # store ms_function class compiled pipeline cache | ||||
| @@ -184,7 +184,7 @@ class _MindSporeFunction: | |||||
| @_wrap_func | @_wrap_func | ||||
| def __call__(self, *args): | def __call__(self, *args): | ||||
| init_ge() | |||||
| init_backend() | |||||
| converted, arguments_dict, parse_method = _convert_function_arguments(self.fn, *args) | converted, arguments_dict, parse_method = _convert_function_arguments(self.fn, *args) | ||||
| if not converted: | if not converted: | ||||
| raise RuntimeError('Process function parameter is failure') | raise RuntimeError('Process function parameter is failure') | ||||
| @@ -15,6 +15,7 @@ | |||||
| """Alexnet.""" | """Alexnet.""" | ||||
| import mindspore.nn as nn | import mindspore.nn as nn | ||||
| from mindspore.common.initializer import TruncatedNormal | from mindspore.common.initializer import TruncatedNormal | ||||
| from mindspore.ops import operations as P | |||||
| def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid"): | def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid"): | ||||
| weight = weight_variable() | weight = weight_variable() | ||||
| @@ -44,7 +45,7 @@ class AlexNet(nn.Cell): | |||||
| self.conv4 = conv(384, 384, 3, pad_mode="same") | self.conv4 = conv(384, 384, 3, pad_mode="same") | ||||
| self.conv5 = conv(384, 256, 3, pad_mode="same") | self.conv5 = conv(384, 256, 3, pad_mode="same") | ||||
| self.relu = nn.ReLU() | self.relu = nn.ReLU() | ||||
| self.max_pool2d = nn.MaxPool2d(kernel_size=3, stride=2) | |||||
| self.max_pool2d = P.MaxPool(ksize=3, strides=2) | |||||
| self.flatten = nn.Flatten() | self.flatten = nn.Flatten() | ||||
| self.fc1 = fc_with_initialize(6*6*256, 4096) | self.fc1 = fc_with_initialize(6*6*256, 4096) | ||||
| self.fc2 = fc_with_initialize(4096, 4096) | self.fc2 = fc_with_initialize(4096, 4096) | ||||
| @@ -0,0 +1,284 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """MobileNetV2 model define""" | |||||
| import numpy as np | |||||
| import mindspore.nn as nn | |||||
| from mindspore.ops import operations as P | |||||
| from mindspore.ops.operations import TensorAdd | |||||
| from mindspore import Parameter, Tensor | |||||
| from mindspore.common.initializer import initializer | |||||
| __all__ = ['MobileNetV2', 'mobilenet_v2'] | |||||
| def _make_divisible(v, divisor, min_value=None): | |||||
| """ | |||||
| This function is taken from the original tf repo. | |||||
| It ensures that all layers have a channel number that is divisible by 8 | |||||
| It can be seen here: | |||||
| https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py | |||||
| :param v: | |||||
| :param divisor: | |||||
| :param min_value: | |||||
| :return: | |||||
| """ | |||||
| if min_value is None: | |||||
| min_value = divisor | |||||
| new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) | |||||
| # Make sure that round down does not go down by more than 10%. | |||||
| if new_v < 0.9 * v: | |||||
| new_v += divisor | |||||
| return new_v | |||||
| class GlobalAvgPooling(nn.Cell): | |||||
| """ | |||||
| Global avg pooling definition. | |||||
| Args: | |||||
| Returns: | |||||
| Tensor, output tensor. | |||||
| Examples: | |||||
| >>> GlobalAvgPooling() | |||||
| """ | |||||
| def __init__(self): | |||||
| super(GlobalAvgPooling, self).__init__() | |||||
| self.mean = P.ReduceMean(keep_dims=False) | |||||
| def construct(self, x): | |||||
| x = self.mean(x, (2, 3)) | |||||
| return x | |||||
| class DepthwiseConv(nn.Cell): | |||||
| """ | |||||
| Depthwise Convolution warpper definition. | |||||
| Args: | |||||
| in_planes (int): Input channel. | |||||
| kernel_size (int): Input kernel size. | |||||
| stride (int): Stride size. | |||||
| pad_mode (str): pad mode in (pad, same, valid) | |||||
| channel_multiplier (int): Output channel multiplier | |||||
| has_bias (bool): has bias or not | |||||
| Returns: | |||||
| Tensor, output tensor. | |||||
| Examples: | |||||
| >>> DepthwiseConv(16, 3, 1, 'pad', 1, channel_multiplier=1) | |||||
| """ | |||||
| def __init__(self, in_planes, kernel_size, stride, pad_mode, pad, channel_multiplier=1, has_bias=False): | |||||
| super(DepthwiseConv, self).__init__() | |||||
| self.has_bias = has_bias | |||||
| self.in_channels = in_planes | |||||
| self.channel_multiplier = channel_multiplier | |||||
| self.out_channels = in_planes * channel_multiplier | |||||
| self.kernel_size = (kernel_size, kernel_size) | |||||
| self.depthwise_conv = P.DepthwiseConv2dNative(channel_multiplier=channel_multiplier, kernel_size=kernel_size, | |||||
| stride=stride, pad_mode=pad_mode, pad=pad) | |||||
| self.bias_add = P.BiasAdd() | |||||
| weight_shape = [channel_multiplier, in_planes, *self.kernel_size] | |||||
| self.weight = Parameter(initializer('ones', weight_shape), name='weight') | |||||
| if has_bias: | |||||
| bias_shape = [channel_multiplier * in_planes] | |||||
| self.bias = Parameter(initializer('zeros', bias_shape), name='bias') | |||||
| else: | |||||
| self.bias = None | |||||
| def construct(self, x): | |||||
| output = self.depthwise_conv(x, self.weight) | |||||
| if self.has_bias: | |||||
| output = self.bias_add(output, self.bias) | |||||
| return output | |||||
| class ConvBNReLU(nn.Cell): | |||||
| """ | |||||
| Convolution/Depthwise fused with Batchnorm and ReLU block definition. | |||||
| Args: | |||||
| in_planes (int): Input channel. | |||||
| out_planes (int): Output channel. | |||||
| kernel_size (int): Input kernel size. | |||||
| stride (int): Stride size for the first convolutional layer. Default: 1. | |||||
| groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1. | |||||
| Returns: | |||||
| Tensor, output tensor. | |||||
| Examples: | |||||
| >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1) | |||||
| """ | |||||
| def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): | |||||
| super(ConvBNReLU, self).__init__() | |||||
| padding = (kernel_size - 1) // 2 | |||||
| if groups == 1: | |||||
| conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='pad', | |||||
| padding=padding) | |||||
| else: | |||||
| conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='pad', pad=padding) | |||||
| layers = [conv, nn.BatchNorm2d(out_planes), nn.ReLU6()] | |||||
| self.features = nn.SequentialCell(layers) | |||||
| def construct(self, x): | |||||
| output = self.features(x) | |||||
| return output | |||||
| class InvertedResidual(nn.Cell): | |||||
| """ | |||||
| Mobilenetv2 residual block definition. | |||||
| Args: | |||||
| inp (int): Input channel. | |||||
| oup (int): Output channel. | |||||
| stride (int): Stride size for the first convolutional layer. Default: 1. | |||||
| expand_ratio (int): expand ration of input channel | |||||
| Returns: | |||||
| Tensor, output tensor. | |||||
| Examples: | |||||
| >>> ResidualBlock(3, 256, 1, 1) | |||||
| """ | |||||
| def __init__(self, inp, oup, stride, expand_ratio): | |||||
| super(InvertedResidual, self).__init__() | |||||
| assert stride in [1, 2] | |||||
| hidden_dim = int(round(inp * expand_ratio)) | |||||
| self.use_res_connect = stride == 1 and inp == oup | |||||
| layers = [] | |||||
| if expand_ratio != 1: | |||||
| layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) | |||||
| layers.extend([ | |||||
| # dw | |||||
| ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), | |||||
| # pw-linear | |||||
| nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, has_bias=False), | |||||
| nn.BatchNorm2d(oup), | |||||
| ]) | |||||
| self.conv = nn.SequentialCell(layers) | |||||
| self.add = TensorAdd() | |||||
| self.cast = P.Cast() | |||||
| def construct(self, x): | |||||
| identity = x | |||||
| x = self.conv(x) | |||||
| if self.use_res_connect: | |||||
| return self.add(identity, x) | |||||
| return x | |||||
| class MobileNetV2(nn.Cell): | |||||
| """ | |||||
| MobileNetV2 architecture. | |||||
| Args: | |||||
| class_num (Cell): number of classes. | |||||
| width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1. | |||||
| has_dropout (bool): Is dropout used. Default is false | |||||
| inverted_residual_setting (list): Inverted residual settings. Default is None | |||||
| round_nearest (list): Channel round to . Default is 8 | |||||
| Returns: | |||||
| Tensor, output tensor. | |||||
| Examples: | |||||
| >>> MobileNetV2(num_classes=1000) | |||||
| """ | |||||
| def __init__(self, num_classes=1000, width_mult=1., | |||||
| has_dropout=False, inverted_residual_setting=None, round_nearest=8): | |||||
| super(MobileNetV2, self).__init__() | |||||
| block = InvertedResidual | |||||
| input_channel = 32 | |||||
| last_channel = 1280 | |||||
| # setting of inverted residual blocks | |||||
| self.cfgs = inverted_residual_setting | |||||
| if inverted_residual_setting is None: | |||||
| self.cfgs = [ | |||||
| # t, c, n, s | |||||
| [1, 16, 1, 1], | |||||
| [6, 24, 2, 2], | |||||
| [6, 32, 3, 2], | |||||
| [6, 64, 4, 2], | |||||
| [6, 96, 3, 1], | |||||
| [6, 160, 3, 2], | |||||
| [6, 320, 1, 1], | |||||
| ] | |||||
| # building first layer | |||||
| input_channel = _make_divisible(input_channel * width_mult, round_nearest) | |||||
| self.out_channels = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) | |||||
| features = [ConvBNReLU(3, input_channel, stride=2)] | |||||
| # building inverted residual blocks | |||||
| for t, c, n, s in self.cfgs: | |||||
| output_channel = _make_divisible(c * width_mult, round_nearest) | |||||
| for i in range(n): | |||||
| stride = s if i == 0 else 1 | |||||
| features.append(block(input_channel, output_channel, stride, expand_ratio=t)) | |||||
| input_channel = output_channel | |||||
| # building last several layers | |||||
| features.append(ConvBNReLU(input_channel, self.out_channels, kernel_size=1)) | |||||
| # make it nn.CellList | |||||
| self.features = nn.SequentialCell(features) | |||||
| # mobilenet head | |||||
| head = ([GlobalAvgPooling(), nn.Dense(self.out_channels, num_classes, has_bias=True)] if not has_dropout else | |||||
| [GlobalAvgPooling(), nn.Dropout(0.2), nn.Dense(self.out_channels, num_classes, has_bias=True)]) | |||||
| self.head = nn.SequentialCell(head) | |||||
| self._initialize_weights() | |||||
| def construct(self, x): | |||||
| x = self.features(x) | |||||
| x = self.head(x) | |||||
| return x | |||||
| def _initialize_weights(self): | |||||
| """ | |||||
| Initialize weights. | |||||
| Args: | |||||
| Returns: | |||||
| None. | |||||
| Examples: | |||||
| >>> _initialize_weights() | |||||
| """ | |||||
| for _, m in self.cells_and_names(): | |||||
| if isinstance(m, (nn.Conv2d, DepthwiseConv)): | |||||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n), | |||||
| m.weight.data.shape()).astype("float32"))) | |||||
| if m.bias is not None: | |||||
| m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) | |||||
| elif isinstance(m, nn.BatchNorm2d): | |||||
| m.gamma.set_parameter_data(Tensor(np.ones(m.gamma.data.shape(), dtype="float32"))) | |||||
| m.beta.set_parameter_data(Tensor(np.zeros(m.beta.data.shape(), dtype="float32"))) | |||||
| elif isinstance(m, nn.Dense): | |||||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape()).astype("float32"))) | |||||
| if m.bias is not None: | |||||
| m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32"))) | |||||
| def mobilenet_v2(**kwargs): | |||||
| """ | |||||
| Constructs a MobileNet V2 model | |||||
| """ | |||||
| return MobileNetV2(**kwargs) | |||||
| @@ -260,3 +260,23 @@ def resnet50(class_num=10): | |||||
| [256, 512, 1024, 2048], | [256, 512, 1024, 2048], | ||||
| [1, 2, 2, 2], | [1, 2, 2, 2], | ||||
| class_num) | class_num) | ||||
| def resnet101(class_num=1001): | |||||
| """ | |||||
| Get ResNet101 neural network. | |||||
| Args: | |||||
| class_num (int): Class number. | |||||
| Returns: | |||||
| Cell, cell instance of ResNet101 neural network. | |||||
| Examples: | |||||
| >>> net = resnet101(1001) | |||||
| """ | |||||
| return ResNet(ResidualBlock, | |||||
| [3, 4, 23, 3], | |||||
| [64, 256, 512, 1024], | |||||
| [256, 512, 1024, 2048], | |||||
| [1, 2, 2, 2], | |||||
| class_num) | |||||
| @@ -22,7 +22,7 @@ from ..common import dtype as mstype | |||||
| from ..common.api import _executor | from ..common.api import _executor | ||||
| from .._checkparam import _check_str_by_regular | from .._checkparam import _check_str_by_regular | ||||
| from ..common.parameter import Parameter, ParameterTuple | from ..common.parameter import Parameter, ParameterTuple | ||||
| from .._c_expression import init_ge | |||||
| from .._c_expression import init_backend | |||||
| from ..ops.primitive import Primitive | from ..ops.primitive import Primitive | ||||
| from ..parallel._tensor import _load_tensor_by_layout | from ..parallel._tensor import _load_tensor_by_layout | ||||
| from ..parallel._utils import _get_parallel_mode | from ..parallel._utils import _get_parallel_mode | ||||
| @@ -66,7 +66,7 @@ class Cell: | |||||
| self._phase = 'train' | self._phase = 'train' | ||||
| self._parameter_layout_dict = {} | self._parameter_layout_dict = {} | ||||
| self._create_time = int(time.time() * 1e9) | self._create_time = int(time.time() * 1e9) | ||||
| init_ge() | |||||
| init_backend() | |||||
| # call gc to release GE session resources used by non-used cell objects | # call gc to release GE session resources used by non-used cell objects | ||||
| gc.collect() | gc.collect() | ||||
| self._construct_inputs_num = 0 | self._construct_inputs_num = 0 | ||||
| @@ -32,6 +32,7 @@ def piecewise_constant_lr(milestone, learning_rates): | |||||
| Args: | Args: | ||||
| milestone (Union[list[int], tuple[int]]): A list of milestone. This list is a monotone increasing list. | milestone (Union[list[int], tuple[int]]): A list of milestone. This list is a monotone increasing list. | ||||
| Every element is a milestone step, and must be greater than 0. | |||||
| learning_rates (Union[list[float], tuple[float]]): A list of learning rates. | learning_rates (Union[list[float], tuple[float]]): A list of learning rates. | ||||
| Returns: | Returns: | ||||
| @@ -40,7 +41,7 @@ def piecewise_constant_lr(milestone, learning_rates): | |||||
| Examples: | Examples: | ||||
| >>> milestone = [2, 5, 10] | >>> milestone = [2, 5, 10] | ||||
| >>> learning_rates = [0.1, 0.05, 0.01] | >>> learning_rates = [0.1, 0.05, 0.01] | ||||
| >>> lr = piecewise_constant_lr(milestone, learning_rates) | |||||
| >>> piecewise_constant_lr(milestone, learning_rates) | |||||
| [0.1, 0.1, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01] | [0.1, 0.1, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01] | ||||
| """ | """ | ||||
| validator.check_value_type('milestone', milestone, (tuple, list), None) | validator.check_value_type('milestone', milestone, (tuple, list), None) | ||||
| @@ -100,7 +101,7 @@ def exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, | |||||
| >>> total_step = 6 | >>> total_step = 6 | ||||
| >>> step_per_epoch = 2 | >>> step_per_epoch = 2 | ||||
| >>> decay_epoch = 1 | >>> decay_epoch = 1 | ||||
| >>> lr = exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch) | |||||
| >>> exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch) | |||||
| [0.1, 0.1, 0.09000000000000001, 0.09000000000000001, 0.08100000000000002, 0.08100000000000002] | [0.1, 0.1, 0.09000000000000001, 0.09000000000000001, 0.08100000000000002, 0.08100000000000002] | ||||
| """ | """ | ||||
| _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) | _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) | ||||
| @@ -142,7 +143,7 @@ def natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, | |||||
| >>> total_step = 6 | >>> total_step = 6 | ||||
| >>> step_per_epoch = 2 | >>> step_per_epoch = 2 | ||||
| >>> decay_epoch = 2 | >>> decay_epoch = 2 | ||||
| >>> lr = natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) | |||||
| >>> natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) | |||||
| [0.1, 0.1, 0.1, 0.1, 0.016529888822158657, 0.016529888822158657] | [0.1, 0.1, 0.1, 0.1, 0.016529888822158657, 0.016529888822158657] | ||||
| """ | """ | ||||
| _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) | _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) | ||||
| @@ -185,7 +186,7 @@ def inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, deca | |||||
| >>> total_step = 6 | >>> total_step = 6 | ||||
| >>> step_per_epoch = 1 | >>> step_per_epoch = 1 | ||||
| >>> decay_epoch = 1 | >>> decay_epoch = 1 | ||||
| >>> lr = inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) | |||||
| >>> inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True) | |||||
| [0.1, 0.06666666666666667, 0.05, 0.04, 0.03333333333333333, 0.028571428571428574] | [0.1, 0.06666666666666667, 0.05, 0.04, 0.03333333333333333, 0.028571428571428574] | ||||
| """ | """ | ||||
| _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) | _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair) | ||||
| @@ -227,7 +228,7 @@ def cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch): | |||||
| >>> total_step = 6 | >>> total_step = 6 | ||||
| >>> step_per_epoch = 2 | >>> step_per_epoch = 2 | ||||
| >>> decay_epoch = 2 | >>> decay_epoch = 2 | ||||
| >>> lr = cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch) | |||||
| >>> cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch) | |||||
| [0.1, 0.1, 0.05500000000000001, 0.05500000000000001, 0.01, 0.01] | [0.1, 0.1, 0.05500000000000001, 0.05500000000000001, 0.01, 0.01] | ||||
| """ | """ | ||||
| validator.check_float_positive('min_lr', min_lr, None) | validator.check_float_positive('min_lr', min_lr, None) | ||||
| @@ -282,7 +283,7 @@ def polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_e | |||||
| >>> step_per_epoch = 2 | >>> step_per_epoch = 2 | ||||
| >>> decay_epoch = 2 | >>> decay_epoch = 2 | ||||
| >>> power = 0.5 | >>> power = 0.5 | ||||
| >>> lr = polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power) | |||||
| >>> polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power) | |||||
| [0.1, 0.1, 0.07363961030678928, 0.07363961030678928, 0.01, 0.01] | [0.1, 0.1, 0.07363961030678928, 0.07363961030678928, 0.01, 0.01] | ||||
| """ | """ | ||||
| validator.check_float_positive('learning_rate', learning_rate, None) | validator.check_float_positive('learning_rate', learning_rate, None) | ||||
| @@ -104,7 +104,7 @@ class FTRL(Optimizer): | |||||
| self.lr_power = lr_power | self.lr_power = lr_power | ||||
| self.reciprocal_scale = 1.0 / loss_scale | self.reciprocal_scale = 1.0 / loss_scale | ||||
| self.weight_decay = weight_decay | self.weight_decay = weight_decay | ||||
| self.decay_tf = tuple((lambda:True)() for x in self.parameters) | |||||
| self.decay_tf = tuple((lambda: True)() for x in self.parameters) | |||||
| self.hyper_map = C.HyperMap() | self.hyper_map = C.HyperMap() | ||||
| self.opt = P.ApplyFtrl(use_locking=use_locking) | self.opt = P.ApplyFtrl(use_locking=use_locking) | ||||
| self.one = Tensor(1, mstype.int32) | self.one = Tensor(1, mstype.int32) | ||||
| @@ -118,5 +118,6 @@ class FTRL(Optimizer): | |||||
| if self.reciprocal_scale != 1.0: | if self.reciprocal_scale != 1.0: | ||||
| grads = self.hyper_map(F.partial(grad_scale, self.reciprocal_scale), grads) | grads = self.hyper_map(F.partial(grad_scale, self.reciprocal_scale), grads) | ||||
| lr = self.learning_rate | lr = self.learning_rate | ||||
| success = self.hyper_map(F.partial(ftrl_opt, self.opt, lr, self.l1, self.l2, self.lr_power), linear, grads, params, moments) | |||||
| success = self.hyper_map(F.partial(ftrl_opt, self.opt, lr, self.l1, self.l2, self.lr_power), linear, grads, | |||||
| params, moments) | |||||
| return success | return success | ||||