Compare commits

...

100 Commits
master ... r0.2

Author SHA1 Message Date
  mindspore-ci-bot dba9481337 !2825 add libtiff notice info to r0.2 5 years ago
  xulei2020 adffd67714 add libtiff notice to r0.2 5 years ago
  mindspore-ci-bot f5b794c802 !1047 remove dataset link in README 5 years ago
  wandongdong 229c8559fa remove dataset link 5 years ago
  mindspore-ci-bot 6de2733d52 !1041 delete externel link in example ciafr10_resent50 5 years ago
  gengdongjie 3949de02bb delete externel link in resnet50_cifar10 example 5 years ago
  mindspore-ci-bot 701606f0a2 !1005 remove http link from README.md for vgg 5 years ago
  mindspore-ci-bot 77f6c85698 !1012 remove dataset_link 5 years ago
  mindspore-ci-bot 4fdaa8222a !1022 delete dataset hyperlinks in bert README.md file 5 years ago
  yoonlee666 9c80971e20 delete dataset hyperlinks in bert README.md 5 years ago
  mindspore-ci-bot 0333696c8b !1004 modify readme for resnet101 5 years ago
  mindspore-ci-bot 6955440b24 !1014 eliminate external links to data sets about README 5 years ago
  chengxianbin a0a2111587 eliminate external lins to dataset 5 years ago
  wukesong bbdfd92979 remove dataset_link 5 years ago
  meixiaowei 495d0428f7 modify ReadMe and add data parallel 5 years ago
  caojian05 61d7ec7bf3 remove http link from README.md for vgg 5 years ago
  mindspore-ci-bot 07ae9fc168 !889 Set description type of whl package 5 years ago
  leonwanghui 522e178eb1 Fix release package link in dockerfile 5 years ago
  leonwanghui 5ad61af54c Fix release package link in README.md 5 years ago
  zhoufeng c1e9391d94 set description type of whl package 5 years ago
  mindspore-ci-bot 994b1ed052 !868 modify weight init for resnet101 5 years ago
  meixiaowei 69e5978eb2 modify weight init 5 years ago
  zhangzhenghai 5d666bdb61 update RELEASE.md. 5 years ago
  mindspore-ci-bot 680bf2c891 !827 support buffer fusion for r0.2 5 years ago
  mindspore-ci-bot 88c32a6f4d !828 Add reduce mean in SoftmaxCrossEntroyWithLogits in Resnet50 example 5 years ago
  mindspore-ci-bot 8d4511e729 !825 Add README.md for YOLOv3 5 years ago
  mindspore-ci-bot 0de15935ba !826 Check whether the value tuple is empty when converting it to tuple tensor 5 years ago
  mindspore-ci-bot 7aaaf1a5c8 !813 remove enable mixed precision for ge 5 years ago
  gengdongjie 5265c90884 add reduce mean in SoftmaxCrossEntropyWithLogits for resent50 example 5 years ago
  mindspore-ci-bot 24ff160ff0 !817 remove amp setting and add RANK_TABLE_FILE 5 years ago
  jjfeing cd6ed0e344 support buffer fusion 5 years ago
  mindspore-ci-bot e3fcf825bc !821 modify resnet101 scripts with the default backend mixed precision 5 years ago
  YuJianfeng 277659d544 Check the empty value tuple when converting it to tuple tensor 5 years ago
  zhaoting 3a2ddd9bb8 add README.md for YOLOv3 5 years ago
  wandongdong 1465afc5f1 del amp and add RANK_TABLE_FILE 5 years ago
  mindspore-ci-bot bf1d003137 !820 Update document about dynamic_lr 5 years ago
  meixiaowei fce21087dd modify resnet101 scripts 5 years ago
  leilei_snow ba7ccf26a3 fix api document about dynamic_lr 5 years ago
  caojian05 52a238f4bb remove enable mixed precision for ge 5 years ago
  mindspore-ci-bot 3183579e0e !798 change runtime error to type error when cannot find kernel info 5 years ago
  lianliguang e9c3a5a7f8 change runtime error to type error when cannot find kernel info 5 years ago
  mindspore-ci-bot ca5f81af36 !748 fix np.histogram sometimes calc very large bucket number 5 years ago
  mindspore-ci-bot 517e3235ba !790 add distribute train README for vgg16 5 years ago
  mindspore-ci-bot 6562aa66d1 !783 add distribute train for vgg16 5 years ago
  mindspore-ci-bot f4e8bca783 !787 Fix dtype judge sentence in infer_dtype function of hcom operations 5 years ago
  mindspore-ci-bot 928b0bb309 !781 Adjust the order of cast and reshape in the grammar implementation process 5 years ago
  caojian05 16bc4abe34 add distribute train README for vgg16 5 years ago
  mindspore-ci-bot 9ca2349c81 !782 modify resnet101 dir name to resnet101_imagenet2012 5 years ago
  zhouyuanshen 6cc51e0c0c fix bug in infer_dtype function of hcom operations 5 years ago
  mindspore-ci-bot fb90cb4da6 !768 [bug]with eval cell show cast is not support in gpu pynative 5 years ago
  mindspore-ci-bot c96d5f5353 !744 Disable ConfusionMulGrad fusion pass 5 years ago
  caojian05 84f914c4bb add distribute train for vgg16 5 years ago
  meixiaowei aef80c44e2 modify resnet101 dir name to resnet101_imagenet2012 5 years ago
  Wei Luning 6b39161701 only cast when level is O2 5 years ago
  candanzg e8850e485e repair cast 5 years ago
  mindspore-ci-bot 06af0f751f !773 Set precision mode and allreduce split strategy 5 years ago
  mindspore-ci-bot c90b66a0db !777 fix bugs and dock ops 5 years ago
  gengdongjie e8621ce1d6 set auto mix precision and allreduce aplit size 5 years ago
  mindspore-ci-bot 118d434a3b !771 Upload Resnet101 Scripts 5 years ago
  mindspore-ci-bot 58844968fb !770 Add MobilenetV2 to model_zoo and train scripts to r0.2 5 years ago
  buxue 381acf617b dock FloorMod GreaterEqual NotEqual ScatterNdUpdate 5 years ago
  mindspore-ci-bot c52934923e !757 remove redundant data copy 5 years ago
  buxue 42eb8b08c2 fix bugs of Acosh, TopK, ResizeNearestNeighbor, DepthwiseConv2dNative 5 years ago
  buxue 4fa2d03c89 fix reviewboot and example of TruncatedNormal and add type mapping 5 years ago
  buxue 71ccf74b88 fix the infer of TruncatedNormal and a bug of structure output and a bug of tensorslice ellipsis 5 years ago
  meixiaowei 0cd381be14 resnet101 update 5 years ago
  wandongdong 3aa54aada3 add mobilenetv2 5 years ago
  mindspore-ci-bot fed85d7927 !758 modify maxpool in alexnet 5 years ago
  mindspore-ci-bot a427dd6059 !726 modify init_ge api name and add init_ge to init_dataset 5 years ago
  mindspore-ci-bot 2e6f97f60a !752 fix bug of import _akg failed 5 years ago
  wukesong 5543f829e1 alexnet-maxpool 5 years ago
  dinghao 7bc0cbca18 remove data sync 5 years ago
  lizhenyu b7b7ef390d fix bug of import akg failed 5 years ago
  wenkai cdc09b1ce5 fix np.histograms(bins='auto') sometimes calc very small width and very large bucket number, which lead to error/long compute time. 5 years ago
  huanghui e7549bd78a Disable ConfusionMulGrad fusion pass 5 years ago
  mindspore-ci-bot 4337a32ae6 !722 fix load checkpoint bug 5 years ago
  mindspore-ci-bot b56cbf1851 !725 Fix confusionmulgrad fusion pass cannot work 5 years ago
  leonwanghui e86ab6ce9c !724 Bump the version to 0.2.0-alpha 5 years ago
  jinyaohui 5f18c85ffe modify init_dataset 5 years ago
  mindspore-ci-bot b04b879431 !714 fix select wrong kernel 5 years ago
  mindspore-ci-bot 16c00622e7 !716 Check topk supported before converting input to attr 5 years ago
  chang zherui df71c09a4e modify load ckpt 5 years ago
  mindspore-ci-bot bfd2afc00b !657 Fix confusionmulgrad fusion pass cannot work 5 years ago
  leonwanghui 1f05fa8210 Bump the version to 0.2.0-alpha 5 years ago
  mindspore-ci-bot 3fc0c2e1ff !695 Check topk supported before converting input to attr 5 years ago
  chenjianping 290f783f3b fix select wrong kernel 5 years ago
  mindspore-ci-bot f52d4a3dbb !710 sync profiling modifies from master to r0.2 5 years ago
  caifubi bd1d6d558c sync profiling bp_end point modify from master 5 years ago
  mindspore-ci-bot 79d1e46573 !702 add buffer fusion bnupdate eltwise pass 5 years ago
  Etone.Chan 4e39354daa add buffer fusion bnupdate eltwise pass 5 years ago
  mindspore-ci-bot a04e848627 !650 Match format when kernel selecting using raise or reduce precision 5 years ago
  liubuyu 05e001fc84 add model parameters for vgg16 to enable mixed precision 5 years ago
  mindspore-ci-bot 16ac0f29de !640 add model parameters for vgg16 to enable mixed precision. 5 years ago
  mindspore-ci-bot 4bdb03f92c !633 [MD] adjust mindrecord ut 5 years ago
  mindspore-ci-bot ec1b5ada66 !638 modify r0.2 version number 5 years ago
  wangnan39@huawei.com 361181f465 modify r0.2 version number 5 years ago
  mindspore-ci-bot 1b9bad8469 !635 modify r0.2 version number 5 years ago
  wangnan39@huawei.com bf9de88cdb modify r0.2 version number 5 years ago
  caojian05 73d4cf77d4 add model parameters for vgg16 to open mixed precision. 5 years ago
  liyong 1f222ddb9e fix mindrecord c ut 5 years ago
100 changed files with 3297 additions and 640 deletions
Split View
  1. +42
    -16
      README.md
  2. +72
    -0
      RELEASE.md
  3. +54
    -0
      Third_Party_Open_Source_Software_Notice
  4. +6
    -6
      build.bat
  5. +2
    -2
      build.sh
  6. +9
    -10
      docker/README.md
  7. +1
    -1
      docker/mindspore-cpu/0.1.0-alpha/Dockerfile
  8. +67
    -0
      docker/mindspore-cpu/0.2.0-alpha/Dockerfile
  9. +1
    -1
      docker/mindspore-gpu/0.1.0-alpha/Dockerfile
  10. +83
    -0
      docker/mindspore-gpu/0.2.0-alpha/Dockerfile
  11. +2
    -2
      example/Bert_NEZHA_cnwiki/README.md
  12. +1
    -1
      example/alexnet_cifar10/README.md
  13. +1
    -1
      example/lenet_mnist/README.md
  14. +101
    -0
      example/mobilenetv2_imagenet2012/README.md
  15. +35
    -0
      example/mobilenetv2_imagenet2012/config.py
  16. +84
    -0
      example/mobilenetv2_imagenet2012/dataset.py
  17. +56
    -0
      example/mobilenetv2_imagenet2012/eval.py
  18. +143
    -0
      example/mobilenetv2_imagenet2012/launch.py
  19. +54
    -0
      example/mobilenetv2_imagenet2012/lr_generator.py
  20. +33
    -0
      example/mobilenetv2_imagenet2012/run_infer.sh
  21. +33
    -0
      example/mobilenetv2_imagenet2012/run_train.sh
  22. +148
    -0
      example/mobilenetv2_imagenet2012/train.py
  23. +135
    -0
      example/resnet101_imagenet2012/README.md
  24. +39
    -0
      example/resnet101_imagenet2012/config.py
  25. +36
    -0
      example/resnet101_imagenet2012/crossentropy.py
  26. +89
    -0
      example/resnet101_imagenet2012/dataset.py
  27. +84
    -0
      example/resnet101_imagenet2012/eval.py
  28. +52
    -0
      example/resnet101_imagenet2012/lr_generator.py
  29. +66
    -0
      example/resnet101_imagenet2012/run_distribute_train.sh
  30. +64
    -0
      example/resnet101_imagenet2012/run_infer.sh
  31. +56
    -0
      example/resnet101_imagenet2012/run_standalone_train.sh
  32. +103
    -0
      example/resnet101_imagenet2012/train.py
  33. +1
    -1
      example/resnet50_cifar10/README.md
  34. +4
    -7
      example/resnet50_cifar10/dataset.py
  35. +2
    -1
      example/resnet50_cifar10/run_distribute_train.sh
  36. +2
    -2
      example/resnet50_cifar10/train.py
  37. +30
    -2
      example/vgg16_cifar10/README.md
  38. +5
    -1
      example/vgg16_cifar10/dataset.py
  39. +54
    -0
      example/vgg16_cifar10/run_distribute_train.sh
  40. +22
    -7
      example/vgg16_cifar10/train.py
  41. +94
    -0
      example/yolov3_coco2017/README.md
  42. +1
    -46
      mindspore/_akg/__init__.py
  43. +61
    -0
      mindspore/_akg/add_path.py
  44. +6
    -4
      mindspore/_extends/parallel_compile/tbe_compiler/common.py
  45. +8
    -0
      mindspore/ccsrc/debug/anf_ir_dump.cc
  46. +3
    -1
      mindspore/ccsrc/debug/anf_ir_dump.h
  47. +1
    -1
      mindspore/ccsrc/debug/info.h
  48. +1
    -1
      mindspore/ccsrc/debug/trace.cc
  49. +1
    -1
      mindspore/ccsrc/debug/trace_info.h
  50. +131
    -151
      mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
  51. +17
    -6
      mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
  52. +1
    -0
      mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
  53. +6
    -0
      mindspore/ccsrc/ir/dtype/type.cc
  54. +0
    -3
      mindspore/ccsrc/ir/meta_tensor.cc
  55. +3
    -2
      mindspore/ccsrc/kernel/kernel_build_info.cc
  56. +2
    -1
      mindspore/ccsrc/kernel/kernel_fusion.cc
  57. +46
    -0
      mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
  58. +5
    -8
      mindspore/ccsrc/kernel/tbe/tbe_adapter.h
  59. +109
    -51
      mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
  60. +11
    -4
      mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
  61. +2
    -1
      mindspore/ccsrc/mindrecord/io/shard_reader.cc
  62. +8
    -5
      mindspore/ccsrc/operator/composite/composite.cc
  63. +0
    -2
      mindspore/ccsrc/operator/composite/unpack_call.h
  64. +0
    -1
      mindspore/ccsrc/optimizer/irpass.cc
  65. +0
    -1
      mindspore/ccsrc/optimizer/irpass.h
  66. +0
    -1
      mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc
  67. +0
    -1
      mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h
  68. +0
    -2
      mindspore/ccsrc/pipeline/base.h
  69. +1
    -1
      mindspore/ccsrc/pipeline/init.cc
  70. +13
    -6
      mindspore/ccsrc/pipeline/pipeline.cc
  71. +2
    -2
      mindspore/ccsrc/pipeline/pipeline.h
  72. +21
    -22
      mindspore/ccsrc/pipeline/pipeline_ge.cc
  73. +0
    -2
      mindspore/ccsrc/pipeline/pipeline_ge.h
  74. +2
    -2
      mindspore/ccsrc/pipeline/static_analysis/abstract_function.h
  75. +0
    -1
      mindspore/ccsrc/pipeline/static_analysis/prim.cc
  76. +2
    -2
      mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
  77. +11
    -0
      mindspore/ccsrc/pre_activate/ascend/ascend_helper.h
  78. +242
    -153
      mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
  79. +3
    -4
      mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
  80. +49
    -14
      mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc
  81. +8
    -1
      mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h
  82. +35
    -3
      mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc
  83. +71
    -0
      mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc
  84. +40
    -0
      mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h
  85. +46
    -0
      mindspore/ccsrc/pre_activate/common/helper.cc
  86. +4
    -0
      mindspore/ccsrc/pre_activate/common/helper.h
  87. +0
    -1
      mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
  88. +1
    -45
      mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc
  89. +3
    -1
      mindspore/ccsrc/session/anf_runtime_algorithm.cc
  90. +8
    -11
      mindspore/ccsrc/transform/util.cc
  91. +20
    -1
      mindspore/ccsrc/utils/context/ms_context.cc
  92. +2
    -0
      mindspore/ccsrc/utils/context/ms_context.h
  93. +4
    -0
      mindspore/ccsrc/utils/utils.h
  94. +2
    -2
      mindspore/common/api.py
  95. +2
    -1
      mindspore/model_zoo/alexnet.py
  96. +284
    -0
      mindspore/model_zoo/mobilenet.py
  97. +20
    -0
      mindspore/model_zoo/resnet.py
  98. +2
    -2
      mindspore/nn/cell.py
  99. +7
    -6
      mindspore/nn/dynamic_lr.py
  100. +3
    -2
      mindspore/nn/optim/ftrl.py

+ 42
- 16
README.md View File

@@ -1,7 +1,7 @@
![MindSpore Logo](docs/MindSpore-logo.png "MindSpore logo")
============================================================

- [What is MindSpore?](#what-is-mindspore)
- [What Is MindSpore?](#what-is-mindspore)
- [Automatic Differentiation](#automatic-differentiation)
- [Automatic Parallel](#automatic-parallel)
- [Installation](#installation)
@@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem.

<img src="docs/MindSpore-architecture.png" alt="MindSpore Architecture" width="600"/>

For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.1.0-alpha/architecture.html).
For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.2.0-alpha/architecture.html).

### Automatic Differentiation

@@ -76,13 +76,36 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex
1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.

```
pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
```

2. Run the following command to verify the install.

```python
import numpy as np
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P

context.set_context(mode=context.GRAPH_MODE, device_target="CPU")

class Mul(nn.Cell):
def __init__(self):
super(Mul, self).__init__()
self.mul = P.Mul()

def construct(self, x, y):
return self.mul(x, y)

x = Tensor(np.array([1.0, 2.0, 3.0]).astype(np.float32))
y = Tensor(np.array([4.0, 5.0, 6.0]).astype(np.float32))

mul = Mul()
print(mul(x, y))
```
python -c 'import mindspore'
```
[ 4. 10. 18.]
```

### From Source
@@ -96,20 +119,22 @@ currently the containerized build options are supported as follows:

| Hardware Platform | Docker Image Repository | Tag | Description |
| :---------------- | :---------------------- | :-- | :---------- |
| CPU | `mindspore/mindspore-cpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` CPU release. |
| CPU | `mindspore/mindspore-cpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` CPU release. |
| | | `devel` | Development environment provided to build MindSpore (with `CPU` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
| | | `runtime` | Runtime environment provided to install MindSpore binary package with `CPU` backend. |
| GPU | `mindspore/mindspore-gpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` GPU release. |
| GPU | `mindspore/mindspore-gpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` GPU release. |
| | | `devel` | Development environment provided to build MindSpore (with `GPU CUDA10.1` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
| | | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU` backend. |
| | | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU CUDA10.1` backend. |
| Ascend | <center>&mdash;</center> | <center>&mdash;</center> | Coming soon. |

> **NOTICE:** For GPU `devel` docker image, it's NOT suggested to directly install the whl package after building from the source, instead we strongly RECOMMEND you transfer and install the whl package inside GPU `runtime` docker image.

* CPU

For `CPU` backend, you can directly pull and run the image using the below command:
For `CPU` backend, you can directly pull and run the latest stable image using the below command:
```
docker pull mindspore/mindspore-cpu:0.1.0-alpha
docker run -it mindspore/mindspore-cpu:0.1.0-alpha python -c 'import mindspore'
docker pull mindspore/mindspore-cpu:0.2.0-alpha
docker run -it mindspore/mindspore-cpu:0.2.0-alpha /bin/bash
```

* GPU
@@ -124,20 +149,21 @@ currently the containerized build options are supported as follows:
sudo systemctl restart docker
```

Then you can pull and run the image using the below command:
Then you can pull and run the latest stable image using the below command:
```
docker pull mindspore/mindspore-gpu:0.1.0-alpha
docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.1.0-alpha /bin/bash
docker pull mindspore/mindspore-gpu:0.2.0-alpha
docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.2.0-alpha /bin/bash
```

To test if the docker image works, please execute the python code below and check the output:
```python
import numpy as np
import mindspore.context as context
from mindspore import Tensor
from mindspore.ops import functional as F
import mindspore.context as context

context.set_context(device_target="GPU")

x = Tensor(np.ones([1,3,3,4]).astype(np.float32))
y = Tensor(np.ones([1,3,3,4]).astype(np.float32))
print(F.tensor_add(x, y))
@@ -157,11 +183,11 @@ currently the containerized build options are supported as follows:
```

If you want to learn more about the building process of MindSpore docker images,
please check out `docker` folder for the details.
please check out [docker](docker/README.md) repo for the details.

## Quickstart

See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.1.0-alpha/quick_start/quick_start.html)
See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.2.0-alpha/quick_start/quick_start.html)
to implement the image classification.

## Docs


+ 72
- 0
RELEASE.md View File

@@ -1,3 +1,75 @@
# Release 0.2.0-alpha

## Major Features and Improvements

### Ascend 910 Training and Inference Framework
* New models
* MobileNetV2: Inverted Residuals and Linear Bottlenecks.
* ResNet101: Deep Residual Learning for Image Recognition.

* Frontend and User Interface
* Support for all python comparison operators.
* Support for math operators **,//,%. Support for other python operators like and/or/not/is/is not/ in/ not in.
* Support for the gradients of function with variable arguments.
* Support for tensor indexing assignment for certain indexing type.
* Support for dynamic learning rate.
* User interfaces change log
* DepthwiseConv2dNative, DepthwiseConv2dNativeBackpropFilter, DepthwiseConv2dNativeBackpropInput([!424](https://gitee.com/mindspore/mindspore/pulls/424))
* ReLU6, ReLU6Grad([!224](https://gitee.com/mindspore/mindspore/pulls/224))
* GeneratorDataset([!183](https://gitee.com/mindspore/mindspore/pulls/183))
* VOCDataset([!477](https://gitee.com/mindspore/mindspore/pulls/477))
* MindDataset, PKSampler([!514](https://gitee.com/mindspore/mindspore/pulls/514))
* map([!506](https://gitee.com/mindspore/mindspore/pulls/506))
* Conv([!226](https://gitee.com/mindspore/mindspore/pulls/226))
* Adam([!253](https://gitee.com/mindspore/mindspore/pulls/253))
* _set_fusion_strategy_by_idx, _set_fusion_strategy_by_size([!189](https://gitee.com/mindspore/mindspore/pulls/189))
* CheckpointConfig([!122](https://gitee.com/mindspore/mindspore/pulls/122))
* Constant([!54](https://gitee.com/mindspore/mindspore/pulls/54))
* Executor and Performance Optimization
* Support parallel execution of data prefetching and forward/backward computing.
* Support parallel execution of gradient aggregation and forward/backward computing in distributed training scenarios.
* Support operator fusion optimization.
* Optimize compilation process and improve the performance.
* Data processing, augmentation, and save format
* Support multi-process of GeneratorDataset/PyFunc for high performance
* Support variable batchsize
* Support new Dataset operators, such as filter,skip,take,TextLineDataset

### Other Hardware Support
* GPU platform
* Use dynamic memory pool by default on GPU.
* Support parallel execution of computation and communication.
* Support continuous address allocation by memory pool.
* CPU platform
* Support for windows 10 OS.

## Bugfixes
* Models
* Fix mixed precision bug for VGG16 model ([!629](https://gitee.com/mindspore/mindspore/pulls/629)).
* Python API
* Fix ControlDepend operator bugs on CPU and GPU ([!396](https://gitee.com/mindspore/mindspore/pulls/396)).
* Fix ArgMinWithValue operator bugs ([!338](https://gitee.com/mindspore/mindspore/pulls/338)).
* Fix Dense operator bugs on PyNative mode ([!276](https://gitee.com/mindspore/mindspore/pulls/276)).
* Fix MatMul operator bugs on PyNative mode ([!288](https://gitee.com/mindspore/mindspore/pulls/288)).
* Executor
* Fix operator selection bugs and make it general ([!300](https://gitee.com/mindspore/mindspore/pulls/300)).
* Fix memory reuse bug for GetNext op ([!291](https://gitee.com/mindspore/mindspore/pulls/291)).
* GPU platform
* Fix memory allocation in multi-graph scenarios ([!444](https://gitee.com/mindspore/mindspore/pulls/444)).
* Fix bias_add_grad under fp16 precision ([!598](https://gitee.com/mindspore/mindspore/pulls/598)).
* Fix support for fp16 kernels on nvidia 1080Ti([!571](https://gitee.com/mindspore/mindspore/pulls/571)).
* Fix parsing of tuple type parameters ([!316](https://gitee.com/mindspore/mindspore/pulls/316)).
* Data processing
* Fix TypeErrors about can't pickle mindspore._c_dataengine.DEPipeline objects([!434](https://gitee.com/mindspore/mindspore/pulls/434)).
* Add TFRecord file verification([!406](https://gitee.com/mindspore/mindspore/pulls/406)).

## Contributors
Thanks goes to these wonderful people:

Alexey_Shevlyakov, Cathy, Chong, Hoai, Jonathan, Junhan, JunhanHu, Peilin, SanjayChan, StrawNoBerry, VectorSL, Wei, WeibiaoYu, Xiaoda, Yanjun, YuJianfeng, ZPaC, Zhang, ZhangQinghua, ZiruiWu, amongo, anthonyaje, anzhengqi, biffex, caifubi, candanzg, caojian05, casgj, cathwong, ch-l, chang, changzherui, chenfei, chengang, chenhaozhe, chenjianping, chentingting, chenzomi, chujinjin, dengwentao, dinghao, fanglei, fary86, flywind, gaojing, geekun, gengdongjie, ghzl, gong, gongchen, gukecai, guohongzilong, guozhijian, gziyan, h.farahat, hesham, huangdongrun, huanghui, jiangzhiwen, jinyaohui, jjfeing, jojobugfree, jonathan_yan, jonyguo, jzw, kingfo, kisnwang, laiyongqiang, leonwanghui, lianliguang, lichen, lichenever, limingqi107, liubuyu, liuxiao, liyong, liyong126, lizhenyu, lupengcheng, lvliang, maoweiyong, ms_yan, mxm, ougongchang, panfengfeng, panyifeng, pengyanjun, penn, qianlong, seatea, simson, suteng, thlinh, vlne-v1, wangchengke, wanghua, wangnan39, wangqiuliang, wenchunjiang, wenkai, wukesong, xiefangqi, xulei, yanghaitao, yanghaoran, yangjie159, yangzhenzhang, yankai10, yanzhenxiang2020, yao_yf, yoonlee666, zhangbuxue, zhangz0911gm, zhangzheng, zhaojichen, zhaoting, zhaozhenlong, zhongligeng, zhoufeng, zhousiyi, zjun, zyli2020, yuhuijun, limingqi107, lizhenyu, chenweifeng.

Contributions of any kind are welcome!

# Release 0.1.0-alpha

## Main Features


+ 54
- 0
Third_Party_Open_Source_Software_Notice View File

@@ -3042,6 +3042,60 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", AND
Why Three Licenses?
The zlib License could have been used instead of the Modified (3-clause) BSD License, and since the IJG License effectively subsumes the distribution conditions of the zlib License, this would have effectively placed libjpeg-turbo binary distributions under the IJG License. However, the IJG License specifically refers to the Independent JPEG Group and does not extend attribution and endorsement protections to other entities. Thus, it was desirable to choose a license that granted us the same protections for new code that were granted to the IJG for code derived from their software.

Software: libtiff 4.1.0
Copyright notice:
Copyright © 2015 Open Microscopy Environment / University of Dundee
Copyright (c) 2004, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) 1990-1997 Sam Leffler
Copyright (c) 1991-1997 Silicon Graphics, Inc.
Copyright (c) 1988-1997 Sam Leffler
Copyright (c) 1991-1997 Sam Leffler
Use and Copyright
Copyright (C) 1990, 1995 Frank D. Cringle.
Copyright (c) 1994-1997 Sam Leffler
Copyright (c) 1994-1997 Silicon Graphics, Inc.
Copyright (c) 1997 Greg Ward Larson
Copyright (c) 1997 Silicon Graphics, Inc.
Copyright (c) 2010, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) Joris Van Damme <info@awaresystems.be>
Copyright (c) AWare Systems <http:www.awaresystems.be/>
Copyright (c) 1996-1997 Sam Leffler
Copyright (c) 1996 Pixar
Copyright (c) 1995-1997 Sam Leffler
Copyright (c) 1995-1997 Silicon Graphics, Inc.
Copyright (c) 1988-1996 Sam Leffler
Copyright (c) 1991-1996 Silicon Graphics, Inc.
Copyright (c) 1992-1997 Sam Leffler
Copyright (c) 1992-1997 Silicon Graphics, Inc.
Copyright (c) 2018, Mapbox
Copyright (c) 2017, Planet Labs
Copyright (c) 1990 by Sun Microsystems, Inc.
Copyright 1990 by Digital Equipment Corporation, Maynard, Massachusetts.
Copyright 1991 by Digital Equipment Corporation, Maynard, Massachusetts.
Copyright (c) 2002, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) 2003 Ross Finlayson
Additions (c) Richard Nolde 2006-2010
Copyright (c) 2003, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) 2000, Frank Warmerdam
Copyright (c) 1987, 1993, 1994
Copyright (c) 1989, 1993
Copyright (c) 2009 Frank Warmerdam
Copyright (c) 1987, 1993
Copyright (c) 2005 The DragonFly Project. All rights reserved.
Copyright (c) 2003 Citrus Project,
All rights reserved.
Copyright (c) 1990, 1993
Copyright (c) 1996 Mike Johnson
Copyright (c) 1996 BancTec AB
Copyright (c) 2004, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) 2012, Frank Warmerdam <warmerdam@pobox.com>
Copyright (c) 2019, Even Rouault <even.rouault at spatialys.com>
Copyright (c) 2007, Frank Warmerdam <warmerdam@pobox.com>
Copyright (c) 2019, Thomas Bernard <miniupnp@free.fr>
Copyright (c) 2008, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) 1999, Frank Warmerdam
Copyright (c) 1991-1996 Sam Leffler
Copyright (c) 1996 USAF Phillips Laboratory

Software: opencv 4.2.0
Copyright notice:


+ 6
- 6
build.bat View File

@@ -14,27 +14,27 @@
@rem ============================================================================
@echo off
@title mindspore_build
SET BASEPATH=%CD%
IF NOT EXIST %BASEPATH%/build (
md "build"
)
cd %BASEPATH%/build
SET BUILD_PATH=%CD%
IF NOT EXIST %BUILD_PATH%/mindspore (
md "mindspore"
)
cd %CD%/mindspore
cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_CPU=ON -DENABLE_MINDDATA=ON -DUSE_GLOG=ON -G "CodeBlocks - MinGW Makefiles" ../..
IF NOT %errorlevel% == 0 (
echo "cmake fail."
goto run_fail
)
IF "%1%" == "" (
cmake --build . --target package -- -j6
) ELSE (


+ 2
- 2
build.sh View File

@@ -433,9 +433,9 @@ build_predict()

cd "${BASEPATH}/predict/output/"
if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then
tar -cf MSPredict-0.1.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
tar -cf MSPredict-0.2.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then
tar -cf MSPredict-0.1.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
tar -cf MSPredict-0.2.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
fi
echo "success to build predict project!"
}


+ 9
- 10
docker/README.md View File

@@ -4,14 +4,13 @@ This folder hosts all the `Dockerfile` to build MindSpore container images with

### MindSpore docker build command

* CPU
| Hardware Platform | Version | Build Command |
| :---------------- | :------ | :------------ |
| CPU | `x.y.z` | cd mindspore-cpu/x.y.z && docker build . -t mindspore/mindspore-cpu:x.y.z |
| | `devel` | cd mindspore-cpu/devel && docker build . -t mindspore/mindspore-cpu:devel |
| | `runtime` | cd mindspore-cpu/runtime && docker build . -t mindspore/mindspore-cpu:runtime |
| GPU | `x.y.z` | cd mindspore-gpu/x.y.z && docker build . -t mindspore/mindspore-gpu:x.y.z |
| | `devel` | cd mindspore-gpu/devel && docker build . -t mindspore/mindspore-gpu:devel |
| | `runtime` | cd mindspore-gpu/runtime && docker build . -t mindspore/mindspore-gpu:runtime |

```
cd mindspore-cpu/0.1.0-alpha && docker build . -t mindspore/mindspore-cpu:0.1.0-alpha
```

* GPU

```
cd mindspore-gpu/0.1.0-alpha && docker build . -t mindspore/mindspore-gpu:0.1.0-alpha
```
> **NOTICE:** The `x.y.z` version shown above should be replaced with the real version number.

+ 1
- 1
docker/mindspore-cpu/0.1.0-alpha/Dockerfile View File

@@ -64,4 +64,4 @@ RUN mkdir -pv /root/.pip \
&& echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf

# Install MindSpore cpu whl package
RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl

+ 67
- 0
docker/mindspore-cpu/0.2.0-alpha/Dockerfile View File

@@ -0,0 +1,67 @@
FROM ubuntu:18.04

MAINTAINER leonwanghui <leon.wanghui@huawei.com>

# Set env
ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
ENV PATH /usr/local/bin:$PATH

# Install base tools
RUN apt update \
&& DEBIAN_FRONTEND=noninteractive apt install -y \
vim \
wget \
curl \
xz-utils \
net-tools \
openssh-client \
git \
ntpdate \
tzdata \
tcl \
sudo \
bash-completion

# Install compile tools
RUN DEBIAN_FRONTEND=noninteractive apt install -y \
gcc \
g++ \
zlibc \
make \
libgmp-dev \
patch \
autoconf \
libtool \
automake \
flex

# Set bash
RUN echo "dash dash/sh boolean false" | debconf-set-selections
RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash

# Install python (v3.7.5)
RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
&& cd /tmp \
&& wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
&& tar -xvf v3.7.5.tar.gz \
&& cd /tmp/cpython-3.7.5 \
&& mkdir -p ${PYTHON_ROOT_PATH} \
&& ./configure --prefix=${PYTHON_ROOT_PATH} \
&& make -j4 \
&& make install -j4 \
&& rm -f /usr/local/bin/python \
&& rm -f /usr/local/bin/pip \
&& ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
&& ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
&& rm -rf /tmp/cpython-3.7.5 \
&& rm -f /tmp/v3.7.5.tar.gz

# Set pip source
RUN mkdir -pv /root/.pip \
&& echo "[global]" > /root/.pip/pip.conf \
&& echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
&& echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf

# Install MindSpore cpu whl package
RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl

+ 1
- 1
docker/mindspore-gpu/0.1.0-alpha/Dockerfile View File

@@ -80,4 +80,4 @@ RUN cd /tmp \
&& rm -f /tmp/openmpi-3.1.5.tar.gz

# Install MindSpore cuda-10.1 whl package
RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl

+ 83
- 0
docker/mindspore-gpu/0.2.0-alpha/Dockerfile View File

@@ -0,0 +1,83 @@
FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04

MAINTAINER leonwanghui <leon.wanghui@huawei.com>

# Set env
ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5
ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH
ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH

# Install base tools
RUN apt update \
&& DEBIAN_FRONTEND=noninteractive apt install -y \
vim \
wget \
curl \
xz-utils \
net-tools \
openssh-client \
git \
ntpdate \
tzdata \
tcl \
sudo \
bash-completion

# Install compile tools
RUN DEBIAN_FRONTEND=noninteractive apt install -y \
gcc \
g++ \
zlibc \
make \
libgmp-dev \
patch \
autoconf \
libtool \
automake \
flex \
libnccl2=2.4.8-1+cuda10.1 \
libnccl-dev=2.4.8-1+cuda10.1

# Set bash
RUN echo "dash dash/sh boolean false" | debconf-set-selections
RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash

# Install python (v3.7.5)
RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
&& cd /tmp \
&& wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
&& tar -xvf v3.7.5.tar.gz \
&& cd /tmp/cpython-3.7.5 \
&& mkdir -p ${PYTHON_ROOT_PATH} \
&& ./configure --prefix=${PYTHON_ROOT_PATH} \
&& make -j4 \
&& make install -j4 \
&& rm -f /usr/local/bin/python \
&& rm -f /usr/local/bin/pip \
&& ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
&& ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
&& rm -rf /tmp/cpython-3.7.5 \
&& rm -f /tmp/v3.7.5.tar.gz

# Set pip source
RUN mkdir -pv /root/.pip \
&& echo "[global]" > /root/.pip/pip.conf \
&& echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
&& echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf

# Install openmpi (v3.1.5)
RUN cd /tmp \
&& wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \
&& tar -xvf openmpi-3.1.5.tar.gz \
&& cd /tmp/openmpi-3.1.5 \
&& mkdir -p ${OMPI_ROOT_PATH} \
&& ./configure --prefix=${OMPI_ROOT_PATH} \
&& make -j4 \
&& make install -j4 \
&& rm -rf /tmp/openmpi-3.1.5 \
&& rm -f /tmp/openmpi-3.1.5.tar.gz

# Install MindSpore cuda-10.1 whl package
RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore_gpu-0.2.0-cp37-cp37m-linux_x86_64.whl

+ 2
- 2
example/Bert_NEZHA_cnwiki/README.md View File

@@ -4,8 +4,8 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base](

## Requirements
- Install [MindSpore](https://www.mindspore.cn/install/en).
- Download the zhwiki dataset from <https://dumps.wikimedia.org/zhwiki> for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wiliextractor). Convert the dataset to TFRecord format and move the files to a specified path.
- Download the CLUE dataset from <https://www.cluebenchmarks.com> for fine-tuning and evaluation.
- Download the zhwiki dataset for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wikiextractor). Convert the dataset to TFRecord format and move the files to a specified path.
- Download the CLUE dataset for fine-tuning and evaluation.
> Notes:
If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file.



+ 1
- 1
example/alexnet_cifar10/README.md View File

@@ -10,7 +10,7 @@ This is the simple tutorial for training AlexNet in MindSpore.

- Install [MindSpore](https://www.mindspore.cn/install/en).

- Download the CIFAR-10 dataset at <http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz>. The directory structure is as follows:
- Download the CIFAR-10 dataset, the directory structure is as follows:

```
├─cifar-10-batches-bin


+ 1
- 1
example/lenet_mnist/README.md View File

@@ -10,7 +10,7 @@ This is the simple and basic tutorial for constructing a network in MindSpore.

- Install [MindSpore](https://www.mindspore.cn/install/en).

- Download the MNIST dataset at <http://yann.lecun.com/exdb/mnist/>. The directory structure is as follows:
- Download the MNIST dataset, the directory structure is as follows:

```
└─MNIST_Data


+ 101
- 0
example/mobilenetv2_imagenet2012/README.md View File

@@ -0,0 +1,101 @@
# MobileNetV2 Example

## Description

This is an example of training MobileNetV2 with ImageNet2012 dataset in MindSpore.

## Requirements

* Install [MindSpore](https://www.mindspore.cn/install/en).

* Download the dataset [ImageNet2012].

> Unzip the ImageNet2012 dataset to any path you want and the folder structure should be as follows:
> ```
> .
> ├── train # train dataset
> └── val # infer dataset
> ```

## Example structure

``` shell
.
├── config.py # parameter configuration
├── dataset.py # data preprocessing
├── eval.py # infer script
├── launch.py # launcher for distributed training
├── lr_generator.py # generate learning rate for each step
├── run_infer.sh # launch infering
├── run_train.sh # launch training
└── train.py # train script
```

## Parameter configuration

Parameters for both training and inference can be set in 'config.py'.

```
"num_classes": 1000, # dataset class num
"image_height": 224, # image height
"image_width": 224, # image width
"batch_size": 256, # training or infering batch size
"epoch_size": 200, # total training epochs, including warmup_epochs
"warmup_epochs": 4, # warmup epochs
"lr": 0.4, # base learning rate
"momentum": 0.9, # momentum
"weight_decay": 4e-5, # weight decay
"loss_scale": 1024, # loss scale
"save_checkpoint": True, # whether save checkpoint
"save_checkpoint_epochs": 1, # the epoch interval between two checkpoints
"keep_checkpoint_max": 200, # only keep the last keep_checkpoint_max checkpoint
"save_checkpoint_path": "./checkpoint" # path to save checkpoint
```

## Running the example

### Train

#### Usage
Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]

#### Launch

```
# training example
sh run_train.sh 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet
```

#### Result

Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log will be redirected to `./train/train.log` like followings.

```
epoch: [ 0/200], step:[ 624/ 625], loss:[5.258/5.258], time:[140412.236], lr:[0.100]
epoch time: 140522.500, per step time: 224.836, avg loss: 5.258
epoch: [ 1/200], step:[ 624/ 625], loss:[3.917/3.917], time:[138221.250], lr:[0.200]
epoch time: 138331.250, per step time: 221.330, avg loss: 3.917
```

### Infer

#### Usage

Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]

#### Launch

```
# infer example
sh run_infer.sh ~/imagenet ~/train/mobilenet-200_625.ckpt
```

> checkpoint can be produced in training process.

#### Result

Inference result will be stored in the example path, you can find result like the followings in `val.log`.

```
result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt
```

+ 35
- 0
example/mobilenetv2_imagenet2012/config.py View File

@@ -0,0 +1,35 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting, will be used in train.py and eval.py
"""
from easydict import EasyDict as ed

config = ed({
"num_classes": 1000,
"image_height": 224,
"image_width": 224,
"batch_size": 256,
"epoch_size": 200,
"warmup_epochs": 4,
"lr": 0.4,
"momentum": 0.9,
"weight_decay": 4e-5,
"loss_scale": 1024,
"save_checkpoint": True,
"save_checkpoint_epochs": 1,
"keep_checkpoint_max": 200,
"save_checkpoint_path": "./checkpoint",
})

+ 84
- 0
example/mobilenetv2_imagenet2012/dataset.py View File

@@ -0,0 +1,84 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
create train or eval dataset.
"""
import os
import mindspore.common.dtype as mstype
import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2
from config import config


def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
"""
create a train or eval dataset

Args:
dataset_path(string): the path of dataset.
do_train(bool): whether dataset is used for train or eval.
repeat_num(int): the repeat times of dataset. Default: 1
batch_size(int): the batch size of dataset. Default: 32

Returns:
dataset
"""
rank_size = int(os.getenv("RANK_SIZE"))
rank_id = int(os.getenv("RANK_ID"))

if rank_size == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True)
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True,
num_shards=rank_size, shard_id=rank_id)

resize_height = config.image_height
resize_width = config.image_width
rescale = 1.0 / 255.0
shift = 0.0
buffer_size = 1000

# define map operations
decode_op = C.Decode()
resize_crop_op = C.RandomResizedCrop(resize_height, scale=(0.2, 1.0))
horizontal_flip_op = C.RandomHorizontalFlip()

resize_op = C.Resize((256, 256))
center_crop = C.CenterCrop(resize_width)
rescale_op = C.Rescale(rescale, shift)
normalize_op = C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
change_swap_op = C.HWC2CHW()

if do_train:
trans = [decode_op, resize_crop_op, horizontal_flip_op, rescale_op, normalize_op, change_swap_op]
else:
trans = [decode_op, resize_op, center_crop, rescale_op, normalize_op, change_swap_op]

type_cast_op = C2.TypeCast(mstype.int32)

ds = ds.map(input_columns="image", operations=trans)
ds = ds.map(input_columns="label", operations=type_cast_op)

# apply shuffle operations
ds = ds.shuffle(buffer_size=buffer_size)

# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

# apply dataset repeat operation
ds = ds.repeat(repeat_num)

return ds

+ 56
- 0
example/mobilenetv2_imagenet2012/eval.py View File

@@ -0,0 +1,56 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
eval.
"""
import os
import argparse
from dataset import create_dataset
from config import config
from mindspore import context
from mindspore.model_zoo.mobilenet import mobilenet_v2
from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits

parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
args_opt = parser.parse_args()

device_id = int(os.getenv('DEVICE_ID'))

context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False)
context.set_context(enable_task_sink=True)
context.set_context(enable_loop_sink=True)
context.set_context(enable_mem_reuse=True)

if __name__ == '__main__':
context.set_context(enable_hccl=False)

loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
net = mobilenet_v2()

dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size)
step_size = dataset.get_dataset_size()

if args_opt.checkpoint_path:
param_dict = load_checkpoint(args_opt.checkpoint_path)
load_param_into_net(net, param_dict)
net.set_train(False)

model = Model(net, loss_fn=loss, metrics={'acc'})
res = model.eval(dataset)
print("result:", res, "ckpt=", args_opt.checkpoint_path)

+ 143
- 0
example/mobilenetv2_imagenet2012/launch.py View File

@@ -0,0 +1,143 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""launch train script"""
import os
import sys
import json
from argparse import ArgumentParser


def parse_args():
"""
parse args .

Args:

Returns:
args.

Examples:
>>> parse_args()
"""
parser = ArgumentParser(description="mindspore distributed training launch "
"helper utilty that will spawn up "
"multiple distributed processes")
parser.add_argument("--nproc_per_node", type=int, default=1,
help="The number of processes to launch on each node, "
"for D training, this is recommended to be set "
"to the number of D in your system so that "
"each process can be bound to a single D.")
parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
help="will use the visible devices sequentially")
parser.add_argument("--server_id", type=str, default="",
help="server ip")
parser.add_argument("--training_script", type=str,
help="The full path to the single D training "
"program/script to be launched in parallel, "
"followed by all the arguments for the "
"training script")
# rest from the training program
args, unknown = parser.parse_known_args()
args.training_script_args = unknown
return args


def main():
print("start", __file__)
args = parse_args()
print(args)
visible_devices = args.visible_devices.split(',')
assert os.path.isfile(args.training_script)
assert len(visible_devices) >= args.nproc_per_node
print('visible_devices:{}'.format(visible_devices))
if not args.server_id:
print('pleaser input server ip!!!')
exit(0)
print('server_id:{}'.format(args.server_id))

# construct hccn_table
hccn_configs = open('/etc/hccn.conf', 'r').readlines()
device_ips = {}
for hccn_item in hccn_configs:
hccn_item = hccn_item.strip()
if hccn_item.startswith('address_'):
device_id, device_ip = hccn_item.split('=')
device_id = device_id.split('_')[1]
device_ips[device_id] = device_ip
print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
hccn_table = {}
hccn_table['board_id'] = '0x0000'
hccn_table['chip_info'] = '910'
hccn_table['deploy_mode'] = 'lab'
hccn_table['group_count'] = '1'
hccn_table['group_list'] = []
instance_list = []
usable_dev = ''
for instance_id in range(args.nproc_per_node):
instance = {}
instance['devices'] = []
device_id = visible_devices[instance_id]
device_ip = device_ips[device_id]
usable_dev += str(device_id)
instance['devices'].append({
'device_id': device_id,
'device_ip': device_ip,
})
instance['rank_id'] = str(instance_id)
instance['server_id'] = args.server_id
instance_list.append(instance)
hccn_table['group_list'].append({
'device_num': str(args.nproc_per_node),
'server_num': '1',
'group_name': '',
'instance_count': str(args.nproc_per_node),
'instance_list': instance_list,
})
hccn_table['para_plane_nic_location'] = 'device'
hccn_table['para_plane_nic_name'] = []
for instance_id in range(args.nproc_per_node):
eth_id = visible_devices[instance_id]
hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
hccn_table['para_plane_nic_num'] = str(args.nproc_per_node)
hccn_table['status'] = 'completed'

# save hccn_table to file
table_path = os.getcwd()
if not os.path.exists(table_path):
os.mkdir(table_path)
table_fn = os.path.join(table_path,
'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id))
with open(table_fn, 'w') as table_fp:
json.dump(hccn_table, table_fp, indent=4)
sys.stdout.flush()

# spawn the processes
for rank_id in range(0, args.nproc_per_node):
device_id = visible_devices[rank_id]
device_dir = os.path.join(os.getcwd(), 'device{}'.format(rank_id))
rank_process = 'export RANK_SIZE={} && export RANK_ID={} && export DEVICE_ID={} && '.format(args.nproc_per_node,
rank_id, device_id)
if args.nproc_per_node > 1:
rank_process += 'export MINDSPORE_HCCL_CONFIG_PATH={} && '.format(table_fn)
rank_process += 'export RANK_TABLE_FILE={} && '.format(table_fn)
rank_process += 'rm -rf {dir} && mkdir {dir} && cd {dir} && python {script} '.format(dir=device_dir,
script=args.training_script
)
rank_process += ' '.join(args.training_script_args) + ' > log{}.log 2>&1 &'.format(rank_id)
os.system(rank_process)


if __name__ == "__main__":
main()

+ 54
- 0
example/mobilenetv2_imagenet2012/lr_generator.py View File

@@ -0,0 +1,54 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""learning rate generator"""
import math
import numpy as np


def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
"""
generate learning rate array

Args:
global_step(int): total steps of the training
lr_init(float): init learning rate
lr_end(float): end learning rate
lr_max(float): max learning rate
warmup_epochs(int): number of warmup epochs
total_epochs(int): total epoch of training
steps_per_epoch(int): steps of one epoch

Returns:
np.array, learning rate array
"""
lr_each_step = []
total_steps = steps_per_epoch * total_epochs
warmup_steps = steps_per_epoch * warmup_epochs
for i in range(total_steps):
if i < warmup_steps:
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
else:
lr = lr_end + \
(lr_max - lr_end) * \
(1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2.
if lr < 0.0:
lr = 0.0
lr_each_step.append(lr)

current_step = global_step
lr_each_step = np.array(lr_each_step).astype(np.float32)
learning_rate = lr_each_step[current_step:]

return learning_rate

+ 33
- 0
example/mobilenetv2_imagenet2012/run_infer.sh View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
if [ $# != 2 ]
then
echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
exit 1
fi

if [ ! -d $1 ]
then
echo "error: DATASET_PATH=$1 is not a directory"
exit 1
fi

if [ ! -f $2 ]
then
echo "error: CHECKPOINT_PATH=$2 is not a file"
exit 1
fi

BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
export DEVICE_ID=0
export RANK_ID=0
export RANK_SIZE=1
if [ -d "eval" ];
then
rm -rf ./eval
fi
mkdir ./eval
cd ./eval || exit
python ${BASEPATH}/eval.py \
--checkpoint_path=$2 \
--dataset_path=$1 &> infer.log & # dataset val folder path

+ 33
- 0
example/mobilenetv2_imagenet2012/run_train.sh View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
if [ $# != 4 ]
then
echo "Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]"
exit 1
fi

if [ $1 -lt 1 ] && [ $1 -gt 8 ]
then
echo "error: DEVICE_NUM=$1 is not in (1-8)"
exit 1
fi

if [ ! -d $4 ]
then
echo "error: DATASET_PATH=$4 is not a directory"
exit 1
fi

BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
if [ -d "train" ];
then
rm -rf ./train
fi
mkdir ./train
cd ./train || exit
python ${BASEPATH}/launch.py \
--nproc_per_node=$1 \
--visible_devices=$3 \
--server_id=$2 \
--training_script=${BASEPATH}/train.py \
--dataset_path=$4 &> train.log & # dataset train folder

+ 148
- 0
example/mobilenetv2_imagenet2012/train.py View File

@@ -0,0 +1,148 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""train_imagenet."""
import os
import time
import argparse
import random
import numpy as np
from dataset import create_dataset
from lr_generator import get_lr
from config import config
from mindspore import context
from mindspore import Tensor
from mindspore.model_zoo.mobilenet import mobilenet_v2
from mindspore.parallel._auto_parallel_context import auto_parallel_context
from mindspore.nn.optim.momentum import Momentum
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits

from mindspore.train.model import Model, ParallelMode

from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
from mindspore.train.loss_scale_manager import FixedLossScaleManager
import mindspore.dataset.engine as de
from mindspore.communication.management import init

random.seed(1)
np.random.seed(1)
de.config.set_seed(1)

parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
args_opt = parser.parse_args()

device_id = int(os.getenv('DEVICE_ID'))
rank_id = int(os.getenv('RANK_ID'))
rank_size = int(os.getenv('RANK_SIZE'))
run_distribute = rank_size > 1

context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False)
context.set_context(enable_task_sink=True)
context.set_context(enable_loop_sink=True)
context.set_context(enable_mem_reuse=True)


class Monitor(Callback):
"""
Monitor loss and time.

Args:
lr_init (numpy array): train lr

Returns:
None.

Examples:
>>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
"""

def __init__(self, lr_init=None):
super(Monitor, self).__init__()
self.lr_init = lr_init
self.lr_init_len = len(lr_init)

def epoch_begin(self, run_context):
self.losses = []
self.epoch_time = time.time()

def epoch_end(self, run_context):
cb_params = run_context.original_args()

epoch_mseconds = (time.time() - self.epoch_time) * 1000
per_step_mseconds = epoch_mseconds / cb_params.batch_num
print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds,
per_step_mseconds,
np.mean(self.losses)
), flush=True)

def step_begin(self, run_context):
self.step_time = time.time()

def step_end(self, run_context):
cb_params = run_context.original_args()
step_mseconds = (time.time() - self.step_time) * 1000
step_loss = cb_params.net_outputs

if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
step_loss = step_loss[0]
if isinstance(step_loss, Tensor):
step_loss = np.mean(step_loss.asnumpy())

self.losses.append(step_loss)
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num

print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format(
cb_params.cur_epoch_num - 1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss,
np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]), flush=True)


if __name__ == '__main__':
if run_distribute:
context.set_context(enable_hccl=True)
context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL,
parameter_broadcast=True, mirror_mean=True)
auto_parallel_context().set_all_reduce_fusion_split_indices([140])
init()
else:
context.set_context(enable_hccl=False)

epoch_size = config.epoch_size
net = mobilenet_v2(num_classes=config.num_classes)
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')

print("train args: ", args_opt, "\ncfg: ", config,
"\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size))

dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
repeat_num=epoch_size, batch_size=config.batch_size)
step_size = dataset.get_dataset_size()

loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
lr = Tensor(get_lr(global_step=0, lr_init=0, lr_end=0, lr_max=config.lr,
warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
config.weight_decay, config.loss_scale)

model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale)

cb = None
if rank_id == 0:
cb = [Monitor(lr_init=lr.asnumpy())]
if config.save_checkpoint:
config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * step_size,
keep_checkpoint_max=config.keep_checkpoint_max)
ckpt_cb = ModelCheckpoint(prefix="mobilenet", directory=config.save_checkpoint_path, config=config_ck)
cb += [ckpt_cb]
model.train(epoch_size, dataset, callbacks=cb)

+ 135
- 0
example/resnet101_imagenet2012/README.md View File

@@ -0,0 +1,135 @@
# ResNet101 Example
## Description
This is an example of training ResNet101 with ImageNet dataset in MindSpore.

## Requirements

- Install [MindSpore](https://www.mindspore.cn/install/en).

- Download the dataset ImageNet2012.
> Unzip the ImageNet2012 dataset to any path you want, the folder should include train and eval dataset as follows:
```
.
└─dataset
├─ilsvrc
└─validation_preprocess
```

## Example structure
```shell
.
├── crossentropy.py # CrossEntropy loss function
├── config.py # parameter configuration
├── dataset.py # data preprocessing
├── eval.py # eval net
├── lr_generator.py # generate learning rate
├── run_distribute_train.sh # launch distributed training(8p)
├── run_infer.sh # launch evaluating
├── run_standalone_train.sh # launch standalone training(1p)
└── train.py # train net
```
## Parameter configuration
Parameters for both training and evaluating can be set in config.py.
```
"class_num": 1001, # dataset class number
"batch_size": 32, # batch size of input tensor
"loss_scale": 1024, # loss scale
"momentum": 0.9, # momentum optimizer
"weight_decay": 1e-4, # weight decay
"epoch_size": 120, # epoch sizes for training
"buffer_size": 1000, # number of queue size in data preprocessing
"image_height": 224, # image height
"image_width": 224, # image width
"save_checkpoint": True, # whether save checkpoint or not
"save_checkpoint_steps": 500, # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step
"keep_checkpoint_max": 10, # only keep the last keep_checkpoint_max checkpoint
"save_checkpoint_path": "./", # path to save checkpoint relative to the executed path
"warmup_epochs": 0, # number of warmup epoch
"lr_decay_mode": "cosine" # decay mode for generating learning rate
"label_smooth": 1, # label_smooth
"label_smooth_factor": 0.1, # label_smooth_factor
"lr": 0.1 # base learning rate
```

## Running the example

### Train
#### Usage

```
# distributed training
sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]
# standalone training
sh run_standalone_train.sh [DATASET_PATH]
```
#### Launch
```bash
# distributed training example(8p)
sh run_distribute_train.sh rank_table_8p.json dataset/ilsvrc
# standalone training example(1p)
sh run_standalone_train.sh dataset/ilsvrc
```
> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).

#### Result
Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". You can find checkpoint file together with result like the followings in log.

```
# distribute training result(8p)
epoch: 1 step: 5004, loss is 4.805483
epoch: 2 step: 5004, loss is 3.2121816
epoch: 3 step: 5004, loss is 3.429647
epoch: 4 step: 5004, loss is 3.3667371
epoch: 5 step: 5004, loss is 3.1718972
...
epoch: 67 step: 5004, loss is 2.2768745
epoch: 68 step: 5004, loss is 1.7223864
epoch: 69 step: 5004, loss is 2.0665488
epoch: 70 step: 5004, loss is 1.8717369
...
```

### Infer
#### Usage
```
# infer
sh run_infer.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH]
```
#### Launch
```bash
# infer with checkpoint
sh run_infer.sh dataset/validation_preprocess/ train_parallel0/resnet-120_5004.ckpt

```
> checkpoint can be produced in training process.

#### Result
Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log.
```
result: {'top_5_accuracy': 0.9429417413572343, 'top_1_accuracy': 0.7853513124199744} ckpt=train_parallel0/resnet-120_5004.ckpt
```

+ 39
- 0
example/resnet101_imagenet2012/config.py View File

@@ -0,0 +1,39 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting, will be used in train.py and eval.py
"""
from easydict import EasyDict as ed

config = ed({
"class_num": 1001,
"batch_size": 32,
"loss_scale": 1024,
"momentum": 0.9,
"weight_decay": 1e-4,
"epoch_size": 120,
"buffer_size": 1000,
"image_height": 224,
"image_width": 224,
"save_checkpoint": True,
"save_checkpoint_steps": 500,
"keep_checkpoint_max": 10,
"save_checkpoint_path": "./",
"warmup_epochs": 0,
"lr_decay_mode": "cosine",
"label_smooth": 1,
"label_smooth_factor": 0.1,
"lr": 0.1
})

+ 36
- 0
example/resnet101_imagenet2012/crossentropy.py View File

@@ -0,0 +1,36 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""define loss function for network"""
from mindspore.nn.loss.loss import _Loss
from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore import Tensor
from mindspore.common import dtype as mstype
import mindspore.nn as nn

class CrossEntropy(_Loss):
"""the redefined loss function with SoftmaxCrossEntropyWithLogits"""
def __init__(self, smooth_factor=0., num_classes=1001):
super(CrossEntropy, self).__init__()
self.onehot = P.OneHot()
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
self.off_value = Tensor(1.0 * smooth_factor / (num_classes -1), mstype.float32)
self.ce = nn.SoftmaxCrossEntropyWithLogits()
self.mean = P.ReduceMean(False)
def construct(self, logit, label):
one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
loss = self.ce(logit, one_hot_label)
loss = self.mean(loss, 0)
return loss

+ 89
- 0
example/resnet101_imagenet2012/dataset.py View File

@@ -0,0 +1,89 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
create train or eval dataset.
"""
import os
import mindspore.common.dtype as mstype
import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2
from config import config

def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
"""
create a train or evaluate dataset
Args:
dataset_path(string): the path of dataset.
do_train(bool): whether dataset is used for train or eval.
repeat_num(int): the repeat times of dataset. Default: 1
batch_size(int): the batch size of dataset. Default: 32

Returns:
dataset
"""
device_num = int(os.getenv("RANK_SIZE"))
rank_id = int(os.getenv("RANK_ID"))

if device_num == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=device_num, shard_id=rank_id)
resize_height = 224
rescale = 1.0 / 255.0
shift = 0.0

# define map operations
decode_op = C.Decode()

random_resize_crop_op = C.RandomResizedCrop(resize_height, (0.08, 1.0), (0.75, 1.33), max_attempts=100)
horizontal_flip_op = C.RandomHorizontalFlip(rank_id / (rank_id + 1))
resize_op_256 = C.Resize((256, 256))
center_crop = C.CenterCrop(224)
rescale_op = C.Rescale(rescale, shift)
normalize_op = C.Normalize((0.475, 0.451, 0.392), (0.275, 0.267, 0.278))
changeswap_op = C.HWC2CHW()

trans = []
if do_train:
trans = [decode_op,
random_resize_crop_op,
horizontal_flip_op,
rescale_op,
normalize_op,
changeswap_op]

else:
trans = [decode_op,
resize_op_256,
center_crop,
rescale_op,
normalize_op,
changeswap_op]

type_cast_op = C2.TypeCast(mstype.int32)

ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)

# apply shuffle operations
ds = ds.shuffle(buffer_size=config.buffer_size)
# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)
# apply dataset repeat operation
ds = ds.repeat(repeat_num)

return ds

+ 84
- 0
example/resnet101_imagenet2012/eval.py View File

@@ -0,0 +1,84 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
eval.
"""
import os
import argparse
import random
import numpy as np
from dataset import create_dataset
from config import config
from mindspore import context
from mindspore.model_zoo.resnet import resnet101
from mindspore.parallel._auto_parallel_context import auto_parallel_context
from mindspore.train.model import Model, ParallelMode
from mindspore.train.serialization import load_checkpoint, load_param_into_net
import mindspore.dataset.engine as de
from mindspore.communication.management import init
from crossentropy import CrossEntropy

random.seed(1)
np.random.seed(1)
de.config.set_seed(1)

parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
parser.add_argument('--device_num', type=int, default=1, help='Device num.')
parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.')
parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.')
parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
args_opt = parser.parse_args()

device_id = int(os.getenv('DEVICE_ID'))

context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id)
context.set_context(enable_task_sink=True)
context.set_context(enable_loop_sink=True)
context.set_context(enable_mem_reuse=True)

if __name__ == '__main__':
if args_opt.do_eval:
context.set_context(enable_hccl=False)
else:
if args_opt.run_distribute:
context.set_context(enable_hccl=True)
context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
mirror_mean=True, parameter_broadcast=True)
auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313])
init()
else:
context.set_context(enable_hccl=False)

epoch_size = config.epoch_size
net = resnet101(class_num=config.class_num)

if not config.label_smooth:
config.label_smooth_factor = 0.0
loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)

if args_opt.do_eval:
dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size)
step_size = dataset.get_dataset_size()

if args_opt.checkpoint_path:
param_dict = load_checkpoint(args_opt.checkpoint_path)
load_param_into_net(net, param_dict)
net.set_train(False)

model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'})
res = model.eval(dataset)
print("result:", res, "ckpt=", args_opt.checkpoint_path)

+ 52
- 0
example/resnet101_imagenet2012/lr_generator.py View File

@@ -0,0 +1,52 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""learning rate generator"""
import math
import numpy as np

def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr):
lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
lr = float(init_lr) + lr_inc * current_step
return lr

def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch):
"""
generate learning rate array with cosine

Args:
lr(float): base learning rate
steps_per_epoch(int): steps size of one epoch
warmup_epochs(int): number of warmup epochs
max_epoch(int): total epochs of training
Returns:
np.array, learning rate array
"""
base_lr = lr
warmup_init_lr = 0
total_steps = int(max_epoch * steps_per_epoch)
warmup_steps = int(warmup_epochs * steps_per_epoch)
decay_steps = total_steps - warmup_steps

lr_each_step = []
for i in range(total_steps):
if i < warmup_steps:
lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr)
else:
linear_decay = (total_steps - i) / decay_steps
cosine_decay = 0.5 * (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps))
decayed = linear_decay * cosine_decay + 0.00001
lr = base_lr * decayed
lr_each_step.append(lr)
return np.array(lr_each_step).astype(np.float32)

+ 66
- 0
example/resnet101_imagenet2012/run_distribute_train.sh View File

@@ -0,0 +1,66 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

if [ $# != 2 ]
then
echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]"
exit 1
fi

get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
echo $PATH1
echo $PATH2

if [ ! -f $PATH1 ]
then
echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file"
exit 1
fi

if [ ! -d $PATH2 ]
then
echo "error: DATASET_PATH=$PATH2 is not a directory"
exit 1
fi

ulimit -u unlimited
export DEVICE_NUM=8
export RANK_SIZE=8
export MINDSPORE_HCCL_CONFIG_PATH=$PATH1
export RANK_TABLE_FILE=$PATH1

for((i=0; i<${DEVICE_NUM}; i++))
do
export DEVICE_ID=$i
export RANK_ID=$i
rm -rf ./train_parallel$i
mkdir ./train_parallel$i
cp *.py ./train_parallel$i
cp *.sh ./train_parallel$i
cd ./train_parallel$i || exit
echo "start training for rank $RANK_ID, device $DEVICE_ID"
env > env.log
python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log &
cd ..
done

+ 64
- 0
example/resnet101_imagenet2012/run_infer.sh View File

@@ -0,0 +1,64 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

if [ $# != 2 ]
then
echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
exit 1
fi

get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
echo $PATH1
echo $PATH2

if [ ! -d $PATH1 ]
then
echo "error: DATASET_PATH=$PATH1 is not a directory"
exit 1
fi

if [ ! -f $PATH2 ]
then
echo "error: CHECKPOINT_PATH=$PATH2 is not a file"
exit 1
fi

ulimit -u unlimited
export DEVICE_NUM=1
export DEVICE_ID=0
export RANK_SIZE=$DEVICE_NUM
export RANK_ID=0

if [ -d "infer" ];
then
rm -rf ./infer
fi
mkdir ./infer
cp *.py ./infer
cp *.sh ./infer
cd ./infer || exit
env > env.log
echo "start infering for device $DEVICE_ID"
python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log &
cd ..

+ 56
- 0
example/resnet101_imagenet2012/run_standalone_train.sh View File

@@ -0,0 +1,56 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

if [ $# != 1 ]
then
echo "Usage: sh run_standalone_train.sh [DATASET_PATH]"
exit 1
fi

get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
PATH1=$(get_real_path $1)
echo $PATH1

if [ ! -d $PATH1 ]
then
echo "error: DATASET_PATH=$PATH1 is not a directory"
exit 1
fi

ulimit -u unlimited
export DEVICE_NUM=1
export DEVICE_ID=0
export RANK_ID=0
export RANK_SIZE=1

if [ -d "train" ];
then
rm -rf ./train
fi
mkdir ./train
cp *.py ./train
cp *.sh ./train
cd ./train || exit
echo "start training for device $DEVICE_ID"
env > env.log
python train.py --do_train=True --dataset_path=$PATH1 &> log &
cd ..

+ 103
- 0
example/resnet101_imagenet2012/train.py View File

@@ -0,0 +1,103 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""train_imagenet."""
import os
import argparse
import random
import numpy as np
from dataset import create_dataset
from lr_generator import warmup_cosine_annealing_lr
from config import config
from mindspore import context
from mindspore import Tensor
from mindspore.model_zoo.resnet import resnet101
from mindspore.parallel._auto_parallel_context import auto_parallel_context
from mindspore.nn.optim.momentum import Momentum
from mindspore.train.model import Model, ParallelMode
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.train.loss_scale_manager import FixedLossScaleManager
import mindspore.dataset.engine as de
from mindspore.communication.management import init
import mindspore.nn as nn
import mindspore.common.initializer as weight_init
from crossentropy import CrossEntropy

random.seed(1)
np.random.seed(1)
de.config.set_seed(1)

parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
parser.add_argument('--device_num', type=int, default=1, help='Device num.')
parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.')
parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
args_opt = parser.parse_args()

device_id = int(os.getenv('DEVICE_ID'))

context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id)
context.set_context(enable_task_sink=True)
context.set_context(enable_loop_sink=True)
context.set_context(enable_mem_reuse=True)

if __name__ == '__main__':
if args_opt.do_eval:
context.set_context(enable_hccl=False)
else:
if args_opt.run_distribute:
context.set_context(enable_hccl=True)
context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
mirror_mean=True, parameter_broadcast=True)
auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313])
init()
else:
context.set_context(enable_hccl=False)

epoch_size = config.epoch_size
net = resnet101(class_num=config.class_num)
# weight init
for _, cell in net.cells_and_names():
if isinstance(cell, nn.Conv2d):
cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(),
cell.weight.default_input.shape(),
cell.weight.default_input.dtype())
if isinstance(cell, nn.Dense):
cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(),
cell.weight.default_input.shape(),
cell.weight.default_input.dtype())
if not config.label_smooth:
config.label_smooth_factor = 0.0
loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
if args_opt.do_train:
dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
repeat_num=epoch_size, batch_size=config.batch_size)
step_size = dataset.get_dataset_size()
loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)

# learning rate strategy with cosine
lr = Tensor(warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
config.weight_decay, config.loss_scale)
model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
time_cb = TimeMonitor(data_size=step_size)
loss_cb = LossMonitor()
cb = [time_cb, loss_cb]
if config.save_checkpoint:
config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,
keep_checkpoint_max=config.keep_checkpoint_max)
ckpt_cb = ModelCheckpoint(prefix="resnet", directory=config.save_checkpoint_path, config=config_ck)
cb += [ckpt_cb]
model.train(epoch_size, dataset, callbacks=cb)

+ 1
- 1
example/resnet50_cifar10/README.md View File

@@ -8,7 +8,7 @@ This is an example of training ResNet-50 with CIFAR-10 dataset in MindSpore.

- Install [MindSpore](https://www.mindspore.cn/install/en).

- Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz).
- Download the dataset CIFAR-10.

> Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows:
> ```


+ 4
- 7
example/resnet50_cifar10/dataset.py View File

@@ -40,9 +40,9 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
rank_id = int(os.getenv("RANK_ID"))

if device_num == 1:
ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=4, shuffle=True)
ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True)
else:
ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=4, shuffle=True,
ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=device_num, shard_id=rank_id)

resize_height = config.image_height
@@ -68,11 +68,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):

type_cast_op = C2.TypeCast(mstype.int32)

ds = ds.map(input_columns="label", operations=type_cast_op)
ds = ds.map(input_columns="image", operations=trans)

# apply shuffle operations
ds = ds.shuffle(buffer_size=config.buffer_size)
ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)

# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)


+ 2
- 1
example/resnet50_cifar10/run_distribute_train.sh View File

@@ -22,7 +22,7 @@ fi

if [ ! -f $1 ]
then
echo "error: DMINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
exit 1
fi

@@ -36,6 +36,7 @@ ulimit -u unlimited
export DEVICE_NUM=8
export RANK_SIZE=8
export MINDSPORE_HCCL_CONFIG_PATH=$1
export RANK_TABLE_FILE=$1

for((i=0; i<${DEVICE_NUM}; i++))
do


+ 2
- 2
example/resnet50_cifar10/train.py View File

@@ -61,14 +61,14 @@ if __name__ == '__main__':
context.set_context(enable_hccl=True)
context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
mirror_mean=True)
auto_parallel_context().set_all_reduce_fusion_split_indices([140])
auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
init()
else:
context.set_context(enable_hccl=False)

epoch_size = config.epoch_size
net = resnet50(class_num=config.class_num)
loss = SoftmaxCrossEntropyWithLogits(sparse=True)
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")


if args_opt.do_train:


+ 30
- 2
example/vgg16_cifar10/README.md View File

@@ -8,7 +8,7 @@ This example is for VGG16 model training and evaluation.

- Install [MindSpore](https://www.mindspore.cn/install/en).

- Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz).
- Download the CIFAR-10 binary version dataset.

> Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows:
> ```
@@ -49,6 +49,24 @@ You will get the accuracy as following:
result: {'acc': 0.92}
```

### Distribute Training
```
sh run_distribute_train.sh rank_table.json your_data_path
```
The above shell script will run distribute training in the background, you can view the results through the file `train_parallel[X]/log`.

You will get the loss value as following:
```
# grep "result: " train_parallel*/log
train_parallel0/log:epoch: 1 step: 97, loss is 1.9060308
train_parallel0/log:epcoh: 2 step: 97, loss is 1.6003821
...
train_parallel1/log:epoch: 1 step: 97, loss is 1.7095519
train_parallel1/log:epcoh: 2 step: 97, loss is 1.7133579
...
...
```
> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).

## Usage:

@@ -75,4 +93,14 @@ parameters/options:
--data_path the storage path of datasetd
--device_id the device which used to evaluate model.
--checkpoint_path the checkpoint file path used to evaluate model.
```
```

### Distribute Training

```
Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]

parameters/options:
MINDSPORE_HCCL_CONFIG_PATH HCCL configuration file path.
DATA_PATH the storage path of dataset.
```

+ 5
- 1
example/vgg16_cifar10/dataset.py View File

@@ -28,7 +28,11 @@ def create_dataset(data_home, repeat_num=1, training=True):
data_dir = os.path.join(data_home, "cifar-10-batches-bin")
if not training:
data_dir = os.path.join(data_home, "cifar-10-verify-bin")
data_set = ds.Cifar10Dataset(data_dir)

rank_size = int(os.environ.get("RANK_SIZE")) if os.environ.get("RANK_SIZE") else None
rank_id = int(os.environ.get("RANK_ID")) if os.environ.get("RANK_ID") else None
data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id)

resize_height = cfg.image_height
resize_width = cfg.image_width
rescale = 1.0 / 255.0


+ 54
- 0
example/vgg16_cifar10/run_distribute_train.sh View File

@@ -0,0 +1,54 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

if [ $# != 2 ]
then
echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]"
exit 1
fi

if [ ! -f $1 ]
then
echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
exit 1
fi

if [ ! -d $2 ]
then
echo "error: DATA_PATH=$2 is not a directory"
exit 1
fi

ulimit -u unlimited
export DEVICE_NUM=8
export RANK_SIZE=8
export MINDSPORE_HCCL_CONFIG_PATH=$1
export RANK_TABLE_FILE=$1

for((i=0; i<${DEVICE_NUM}; i++))
do
export DEVICE_ID=$i
export RANK_ID=$i
rm -rf ./train_parallel$i
mkdir ./train_parallel$i
cp *.py ./train_parallel$i
cp *.sh ./train_parallel$i
cd ./train_parallel$i || exit
echo "start training for rank $RANK_ID, device $DEVICE_ID"
env > env.log
python train.py --data_path=$2 --device_id=$i &> log &
cd ..
done

+ 22
- 7
example/vgg16_cifar10/train.py View File

@@ -17,16 +17,18 @@
python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID
"""
import argparse
import os
import random
import numpy as np
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.communication.management import init
from mindspore.nn.optim.momentum import Momentum
from mindspore.train.model import Model
from mindspore.train.model import Model, ParallelMode
from mindspore import context
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.model_zoo.vgg import vgg16
import dataset
from dataset import create_dataset
from config import cifar_cfg as cfg
random.seed(1)
np.random.seed(1)
@@ -62,17 +64,30 @@ if __name__ == '__main__':

context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target)
context.set_context(device_id=args_opt.device_id)
context.set_context(enable_task_sink=True)
context.set_context(enable_loop_sink=True)
context.set_context(enable_mem_reuse=True, enable_hccl=False)

device_num = int(os.environ.get("DEVICE_NUM", 1))
if device_num > 1:
context.reset_auto_parallel_context()
context.set_context(enable_hccl=True)
context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
mirror_mean=True)
init()

dataset = create_dataset(args_opt.data_path, cfg.epoch_size)
batch_num = dataset.get_dataset_size()

net = vgg16(num_classes=cfg.num_classes)
lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=50000 // cfg.batch_size)
lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num)
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})

dataset = dataset.create_dataset(args_opt.data_path, cfg.epoch_size)
batch_num = dataset.get_dataset_size()
config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 5, keep_checkpoint_max=cfg.keep_checkpoint_max)
time_cb = TimeMonitor(data_size=batch_num)
ckpoint_cb = ModelCheckpoint(prefix="train_vgg_cifar10", directory="./", config=config_ck)
loss_cb = LossMonitor()
model.train(cfg.epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb])
model.train(cfg.epoch_size, dataset, callbacks=[time_cb, ckpoint_cb, loss_cb])
print("train success")

+ 94
- 0
example/yolov3_coco2017/README.md View File

@@ -0,0 +1,94 @@
# YOLOv3 Example

## Description

YOLOv3 network based on ResNet-18, with support for training and evaluation.

## Requirements

- Install [MindSpore](https://www.mindspore.cn/install/en).

- Dataset

We use coco2017 as training dataset.

1. The directory structure is as follows:
> ```
> .
> ├── annotations # annotation jsons
> ├── train2017 # train dataset
> └── val2017 # infer dataset
> ```

2. Organize the dataset infomation into a TXT file, each row in the file is as follows:

```
train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2
```

Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. `dataset.py` is the parsing script, we read image from an image path joined by the `image_dir`(dataset directory) and the relative path in `anno_path`(the TXT file path), `image_dir` and `anno_path` are external inputs.


## Running the Example

### Training

To train the model, run `train.py` with the dataset `image_dir`, `anno_path` and `mindrecord_dir`. If the `mindrecord_dir` is empty, it wil generate [mindrecord](https://www.mindspore.cn/tutorial/en/master/use/data_preparation/converting_datasets.html) file by `image_dir` and `anno_path`(the absolute image path is joined by the `image_dir` and the relative path in `anno_path`). **Note if `mindrecord_dir` isn't empty, it will use `mindrecord_dir` rather than `image_dir` and `anno_path`.**

- Stand alone mode

```
sh run_standalone_train.sh 0 50 ./Mindrecord_train ./dataset ./dataset/train.txt

```

The input variables are device id, epoch size, mindrecord directory path, dataset directory path and train TXT file path.


- Distributed mode

```
sh run_distribute_train.sh 8 150 /data/Mindrecord_train /data /data/train.txt /data/hccl.json
```

The input variables are device numbers, epoch size, mindrecord directory path, dataset directory path, train TXT file path and [hccl json configuration file](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). **It is better to use absolute path.**

You will get the loss value and time of each step as following:

```
epoch: 145 step: 156, loss is 12.202981
epoch time: 25599.22742843628, per step time: 164.0976117207454
epoch: 146 step: 156, loss is 16.91706
epoch time: 23199.971675872803, per step time: 148.7177671530308
epoch: 147 step: 156, loss is 13.04007
epoch time: 23801.95164680481, per step time: 152.57661312054364
epoch: 148 step: 156, loss is 10.431475
epoch time: 23634.241580963135, per step time: 151.50154859591754
epoch: 149 step: 156, loss is 14.665991
epoch time: 24118.8325881958, per step time: 154.60790120638333
epoch: 150 step: 156, loss is 10.779521
epoch time: 25319.57221031189, per step time: 162.30495006610187
```

Note the results is two-classification(person and face) used our own annotations with coco2017, you can change `num_classes` in `config.py` to train your dataset. And we will suport 80 classifications in coco2017 the near future.

### Evaluation

To eval, run `eval.py` with the dataset `image_dir`, `anno_path`(eval txt), `mindrecord_dir` and `ckpt_path`. `ckpt_path` is the path of [checkpoint](https://www.mindspore.cn/tutorial/en/master/use/saving_and_loading_model_parameters.html) file.

```
sh run_eval.sh 0 yolo.ckpt ./Mindrecord_eval ./dataset ./dataset/eval.txt
```

The input variables are device id, checkpoint path, mindrecord directory path, dataset directory path and train TXT file path.

You will get the precision and recall value of each class:

```
class 0 precision is 88.18%, recall is 66.00%
class 1 precision is 85.34%, recall is 79.13%
```

Note the precision and recall values are results of two-classification(person and face) used our own annotations with coco2017.



+ 1
- 46
mindspore/_akg/__init__.py View File

@@ -13,51 +13,6 @@
# limitations under the License.

"""__init__"""
from __future__ import absolute_import as _abs
import sys
import os

def AKGAddPath():
"""_akg add path."""
pwd = os.path.dirname(os.path.realpath(__file__))
tvm_path = os.path.realpath(pwd)
if tvm_path not in sys.path:
sys.path.insert(0, tvm_path)
else:
sys.path.remove(tvm_path)
sys.path.insert(0, tvm_path)


class AKGMetaPathFinder:
"""class AKGMetaPath finder."""

def find_module(self, fullname, path=None):
"""method _akg find module."""
if fullname.startswith("_akg.tvm"):
rname = fullname[5:]
return AKGMetaPathLoader(rname)
if fullname.startswith("_akg.topi"):
rname = fullname[5:]
return AKGMetaPathLoader(rname)
return None


class AKGMetaPathLoader:
"""class AKGMetaPathLoader loader."""
def __init__(self, rname):
self.__rname = rname

def load_module(self, fullname):
if self.__rname in sys.modules:
sys.modules.pop(self.__rname)
AKGAddPath()
__import__(self.__rname, globals(), locals())
self.__target_module = sys.modules[self.__rname]
sys.modules[fullname] = self.__target_module
return self.__target_module


sys.meta_path.insert(0, AKGMetaPathFinder())

from . import add_path
from .op_build import op_build
from .message import compilewithjson

+ 61
- 0
mindspore/_akg/add_path.py View File

@@ -0,0 +1,61 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""add tvm path"""
import sys
import os


def AKGAddPath():
"""_akg add path."""
pwd = os.path.dirname(os.path.realpath(__file__))
tvm_path = os.path.realpath(pwd)
if tvm_path not in sys.path:
sys.path.insert(0, tvm_path)
else:
sys.path.remove(tvm_path)
sys.path.insert(0, tvm_path)


class AKGMetaPathFinder:
"""class AKGMetaPath finder."""

def find_module(self, fullname, path=None):
"""method _akg find module."""
if fullname.startswith("_akg.tvm"):
rname = fullname[5:]
return AKGMetaPathLoader(rname)
if fullname.startswith("_akg.topi"):
rname = fullname[5:]
return AKGMetaPathLoader(rname)
return None


class AKGMetaPathLoader:
"""class AKGMetaPathLoader loader."""

def __init__(self, rname):
self.__rname = rname

def load_module(self, fullname):
if self.__rname in sys.modules:
sys.modules.pop(self.__rname)
AKGAddPath()
__import__(self.__rname, globals(), locals())
self.__target_module = sys.modules[self.__rname]
sys.modules[fullname] = self.__target_module
return self.__target_module


sys.meta_path.insert(0, AKGMetaPathFinder())

+ 6
- 4
mindspore/_extends/parallel_compile/tbe_compiler/common.py View File

@@ -122,10 +122,12 @@ def get_args(op_info, arg_type):

elif arg_type == 'attrs':
for item in op_info[arg_type]:
if 'value' not in item:
raise ValueError("Json string Errors, attr key:value not found.")
if item["name"] != "isRef":
args.append(item['value'])
if item["valid"]:
if 'value' not in item:
raise ValueError("Json string Errors, attr key:value not found.")
if item["name"] != "isRef":
args.append(item['value'])

return args




+ 8
- 0
mindspore/ccsrc/debug/anf_ir_dump.cc View File

@@ -91,6 +91,14 @@ void PrintNodeInputType(std::ostringstream &buffer, const AnfNodePtr &nd) {
}
}

void PrintInputAndOutputInferType(std::ostringstream &buffer, const AnfNodePtr &nd) {
buffer << " : (";
PrintNodeInputType(buffer, nd);
buffer << ") -> (";
PrintNodeOutputType(buffer, nd);
buffer << ")";
}

struct SubGraphIRInfo {
int32_t local_var;
std::ostringstream buffer;


+ 3
- 1
mindspore/ccsrc/debug/anf_ir_dump.h View File

@@ -18,12 +18,14 @@

#include <string>
#include <vector>
#include "ir/dtype/type.h"
#include "ir/anf.h"

namespace mindspore {
constexpr char PARALLEL_STRATEGY[] = "strategy";
void DumpIR(const std::string &filename, const FuncGraphPtr &func_graph, bool dump_full_name = false);

void PrintInputAndOutputInferType(std::ostringstream &buffer, const AnfNodePtr &nd);
const std::string ToShortString(const TypeId &typeId);
} // namespace mindspore

#endif // MINDSPORE_CCSRC_DEBUG_ANF_IR_DUMP_H_

+ 1
- 1
mindspore/ccsrc/debug/info.h View File

@@ -134,7 +134,7 @@ class DebugInfo : public Base {

explicit DebugInfo(const LocationPtr &loc);

virtual ~DebugInfo() = default;
~DebugInfo() override = default;
MS_DECLARE_PARENT(DebugInfo, Base);
int64_t debug_id();
int64_t unique_id() const { return unique_id_; }


+ 1
- 1
mindspore/ccsrc/debug/trace.cc View File

@@ -231,10 +231,10 @@ std::string AnalyzedFuncGraphExporter::GetNodeType(const AnfNodePtr &node) {
auto engine = node_cfg_->engine();
auto cfg = engine->MakeConfig(node, ctx);
auto abs = engine->cache().GetValue(cfg);

if (abs == nullptr) {
return "Undefined";
}

auto dtype = abs->BuildType();
auto shape = abs->BuildShape();
std::ostringstream oss;


+ 1
- 1
mindspore/ccsrc/debug/trace_info.h View File

@@ -321,7 +321,7 @@ class TraceTransform : public TraceInfo {

std::string full_name() override { return full_name_ + transform_name_; }
MS_DECLARE_PARENT(TraceTransform, TraceInfo);
virtual std::string symbol() {
std::string symbol() override {
if (transform_name_.empty()) {
return "";
}


+ 131
- 151
mindspore/ccsrc/device/ascend/kernel_select_ascend.cc View File

@@ -18,14 +18,15 @@
#include <string>
#include <vector>
#include <memory>
#include <set>
#include <unordered_map>
#include <utility>
#include <map>
#include "kernel/oplib/oplib.h"
#include "kernel/kernel_query.h"
#include "session/anf_runtime_algorithm.h"
#include "kernel/kernel_build_info.h"
#include "utils/context/ms_context.h"
#include "operator/ops.h"
#include "debug/anf_ir_dump.h"

namespace mindspore {
namespace device {
@@ -180,6 +181,7 @@ void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, co
}

void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector<int> *support_index) {
MS_EXCEPTION_IF_NULL(support_index);
int index = kUnSupportMixedDataTypeIndex;
switch (data_type) {
case kNumberTypeFloat16:
@@ -197,6 +199,7 @@ void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector<int> *s

void AddKernelInputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t input_index,
std::vector<int> *support_datatype_index, std::vector<TypeId> *support_datatype) {
MS_EXCEPTION_IF_NULL(support_datatype);
auto data_type = kernel_build_info.GetInputDeviceType(input_index);
support_datatype->push_back(data_type);
AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index);
@@ -204,6 +207,7 @@ void AddKernelInputSupportDataType(const kernel::KernelBuildInfo &kernel_build_i

void AddKernelOutputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t output_index,
std::vector<int> *support_datatype_index, std::vector<TypeId> *support_datatype) {
MS_EXCEPTION_IF_NULL(support_datatype);
auto data_type = kernel_build_info.GetOutputDeviceType(output_index);
support_datatype->push_back(data_type);
AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index);
@@ -214,16 +218,7 @@ void AddNodeInputDataType(const CNodePtr &kernel_node, size_t input_index,
std::vector<TypeId> *node_mix_precision_datatype) {
AnfNodePtr cur_input = AnfAlgo::GetInputNode(kernel_node, input_index);
MS_EXCEPTION_IF_NULL(cur_input);
TypeId input_origin_type;
if (cur_input->isa<Parameter>() && AnfAlgo::IsParameterWeight(cur_input->cast<ParameterPtr>())) {
// weight
input_origin_type = AnfAlgo::GetOutputDeviceDataType(cur_input, 0);
} else if (cur_input->isa<ValueNode>()) {
input_origin_type = AnfAlgo::GetOutputDeviceDataType(cur_input, 0);
} else {
// feature map
input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index);
}
TypeId input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index);
AddSupportMixedPrecisionDataTypeIndex(input_origin_type, node_mix_precision_datatype_index);
node_mix_precision_datatype->push_back(input_origin_type);
}
@@ -238,8 +233,8 @@ void AddNodeOutputDataType(const CNodePtr &kernel_node, size_t output_index,

void CheckDataTypeInputs(const std::vector<int> &node_mix_precision_datatype_index,
const std::vector<TypeId> &node_mix_precision_datatype,
const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
if (node_mix_precision_datatype_index.size() != node_mix_precision_datatype.size()) {
MS_LOG(EXCEPTION) << "node datatype index size " << node_mix_precision_datatype_index.size() << " != datatype size "
<< node_mix_precision_datatype.size();
@@ -251,10 +246,11 @@ void CheckDataTypeInputs(const std::vector<int> &node_mix_precision_datatype_ind
}
}

int RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index,
const std::vector<TypeId> &node_mix_precision_datatype,
const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
bool RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index,
const std::vector<TypeId> &node_mix_precision_datatype,
const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx);
CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes,
kernel_match_datatype_idx);
for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) {
@@ -289,40 +285,22 @@ int RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_data
}
}
}

if (kernel_match_datatype_idx->size() >= 1) {
return SizeToInt(kernel_match_datatype_idx->begin()->first);
}
return -1;
return !kernel_match_datatype_idx->empty();
}

int GetMinReducePrecisionCountIndex(std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx,
const std::unordered_map<size_t, size_t> &precision_reduce_count) {
int selected_index = -1;
size_t min_reduce_precision_count = kMaxCount;
auto iter = kernel_match_datatype_idx->begin();
while (iter != kernel_match_datatype_idx->end()) {
auto find_iter = precision_reduce_count.find(iter->first);
if (find_iter == precision_reduce_count.end()) {
continue;
}
if (min_reduce_precision_count > find_iter->second) {
selected_index = SizeToInt(iter->first);
min_reduce_precision_count = find_iter->second;
}
++iter;
}
return selected_index;
bool CanDataTypeReduce(const std::vector<int> &datatype_indexes, int check_index,
const std::vector<int> &node_mix_precision_datatype_index) {
return datatype_indexes[check_index] != kUnSupportMixedDataTypeIndex &&
datatype_indexes[check_index] <= node_mix_precision_datatype_index[check_index];
}

int RaiseOrReduceDataTypePrecisionSelect(
const std::vector<int> &node_mix_precision_datatype_index, const std::vector<TypeId> &node_mix_precision_datatype,
const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
bool RaiseOrReduceDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index,
const std::vector<TypeId> &node_mix_precision_datatype,
const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx);
CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes,
kernel_match_datatype_idx);
// reduce / raise
std::unordered_map<size_t, size_t> precision_reduce_count;
for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) {
if (node_mix_precision_datatype[i] == kTypeUnknown) {
continue;
@@ -348,29 +326,21 @@ int RaiseOrReduceDataTypePrecisionSelect(
if (i >= datatype_indexes.size()) {
MS_LOG(EXCEPTION) << "index " << i << "> kernel datatype indexes size " << datatype_indexes.size();
}
if (datatype_indexes[i] == kUnSupportMixedDataTypeIndex) {
if (!CanDataTypeReduce(datatype_indexes, i, node_mix_precision_datatype_index)) {
iter = kernel_match_datatype_idx->erase(iter);
} else {
if (datatype_indexes[i] < node_mix_precision_datatype_index[i]) {
auto count_iter = precision_reduce_count.find(iter->first);
if (count_iter != precision_reduce_count.end()) {
count_iter->second++;
} else {
precision_reduce_count[iter->first] = 1;
}
}
++iter;
}
}
}

return GetMinReducePrecisionCountIndex(kernel_match_datatype_idx, precision_reduce_count);
return !kernel_match_datatype_idx->empty();
}

void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelBuildInfo &kernel_build_info,
std::vector<int> *support_indexes, std::vector<TypeId> *node_mix_precision_datatype,
std::vector<TypeId> *support_datatypes,
std::vector<int> *node_mix_precision_datatype_index) {
MS_EXCEPTION_IF_NULL(node_mix_precision_datatype);
bool add_node_datatype_flag = false;
if (node_mix_precision_datatype->size() == 0) {
add_node_datatype_flag = true;
@@ -390,104 +360,59 @@ void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelB
}
}

int PrecisionReduce(const std::vector<int> &node_mix_precision_datatype_index,
const std::vector<TypeId> &node_mix_precision_datatype,
const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatype,
std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx, bool *precision_reduce) {
void PrecisionReduce(const std::vector<int> &node_mix_precision_datatype_index,
const std::vector<TypeId> &node_mix_precision_datatype,
const std::map<size_t, std::vector<TypeId>> &kernel_support_datatype,
std::map<size_t, std::vector<int>> *kernel_match_datatype_idx, bool *precision_reduce) {
MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx);
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
MS_EXCEPTION_IF_NULL(precision_reduce);
std::unordered_map<size_t, std::vector<int>> kernel_match_datatype_idx_copy = *kernel_match_datatype_idx;
std::map<size_t, std::vector<int>> kernel_match_datatype_idx_copy = *kernel_match_datatype_idx;
// raise precision
int selected_index = RaiseDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype,
kernel_support_datatype, kernel_match_datatype_idx);
if (selected_index != -1) {
int max_match = 0;
auto iter = kernel_match_datatype_idx->begin();
int match_count = 0;
while (iter != kernel_match_datatype_idx->end()) {
auto kernel_datatypes = kernel_support_datatype.find(iter->first);
if (kernel_datatypes == kernel_support_datatype.end()) {
MS_LOG(EXCEPTION) << "Can not find kernel index" << iter->first << "'s datatype.";
}
if (kernel_datatypes->second.size() < node_mix_precision_datatype.size()) {
MS_LOG(EXCEPTION) << "Kernel datatype size is not equal to node datatype size!";
}
for (size_t i = 0; i < node_mix_precision_datatype.size(); ++i) {
if (node_mix_precision_datatype[i] == kernel_datatypes->second[i]) {
++match_count;
}
}
if (match_count > max_match) {
selected_index = SizeToInt(iter->first);
}
++iter;
}
bool selected_ret = RaiseDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype,
kernel_support_datatype, kernel_match_datatype_idx);
if (selected_ret) {
*precision_reduce = false;
return;
}
if (selected_index == -1 && context_ptr->enable_reduce_precision()) {
selected_index =
RaiseOrReduceDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype,
kernel_support_datatype, &kernel_match_datatype_idx_copy);
if (selected_index != -1) {
*precision_reduce = true;
}
if (context_ptr->enable_reduce_precision()) {
selected_ret = RaiseOrReduceDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype,
kernel_support_datatype, &kernel_match_datatype_idx_copy);
}
if (selected_ret) {
*precision_reduce = true;
*kernel_match_datatype_idx = kernel_match_datatype_idx_copy;
}
return selected_index;
}

void SelectKernel(const CNodePtr &kernel_node, bool precision_reduce, const std::vector<TypeId> &node_datatype,
const std::shared_ptr<kernel::KernelBuildInfo> &selected_kernel_info_ptr) {
MS_EXCEPTION_IF_NULL(selected_kernel_info_ptr);
void PrintRaiseOrReducePrecisionSelectedInfo(const CNodePtr &cnode,
const std::shared_ptr<kernel::KernelBuildInfo> &selected_kernel_build_info,
bool precision_reduce) {
MS_EXCEPTION_IF_NULL(selected_kernel_build_info);
MS_EXCEPTION_IF_NULL(cnode);
std::ostringstream buffer;
buffer << cnode->DebugString();
if (precision_reduce) {
std::ostringstream datatype;
size_t input_num = selected_kernel_info_ptr->GetInputNum();
size_t i = 0;
datatype << "(";
for (; i < input_num && i < node_datatype.size(); ++i) {
datatype << static_cast<int>(node_datatype[i]);
if (i < input_num - 1) {
datatype << ", ";
}
}
datatype << ") -> (";
for (; i < node_datatype.size(); ++i) {
datatype << static_cast<int>(node_datatype[i]);
if (i < node_datatype.size() - 1) {
datatype << ", ";
}
}
datatype << ")";
MS_LOG(WARNING) << kernel_node->DebugString() << " reduce precision, node datatype: " << datatype.str()
<< ", select kernel: %s" << selected_kernel_info_ptr->ToString();
buffer << " reduce precision, node datatype: ";
} else {
buffer << " raise precision, node datatype: ";
}
AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info_ptr, kernel_node.get());
// Set format and data type for input tensor.
SetTensorDeviceInfo(*selected_kernel_info_ptr, kernel_node);
PrintInputAndOutputInferType(buffer, cnode);
buffer << ", select kernel:" << selected_kernel_build_info->ToString();
MS_LOG(INFO) << buffer.str();
}
} // namespace

void SelectKernelInfo(const CNodePtr &kernel_node) {
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
MS_EXCEPTION_IF_NULL(kernel_node);
kernel::KernelQuery(kernel_node, &kernel_info_list);
std::shared_ptr<kernel::KernelBuildInfo> ChooseMatchedKernelInfo(
const CNodePtr &kernel_node, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) {
if (kernel_info_list.empty()) {
return nullptr;
}
std::vector<int> most_match_counts = {-1, -1, -1, -1};
int selected_index = -1;
std::unordered_map<size_t, std::vector<int>> kernel_match_datatype_idx;
std::unordered_map<size_t, std::vector<TypeId>> kernel_support_datatype;
std::vector<int> node_mix_precision_datatype_index;
std::vector<TypeId> node_mix_precision_datatype;
size_t selected_index = 0;
for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) {
std::vector<int> cur_kernel_info_match_counts = {0, 0, 0, 0};
auto kernel_build_info = *(kernel_info_list[info_index]);
std::vector<int> support_indexes;
std::vector<TypeId> support_datatypes;
AddNodeAndKernelDataType(kernel_node, kernel_build_info, &support_indexes, &node_mix_precision_datatype,
&support_datatypes, &node_mix_precision_datatype_index);
kernel_match_datatype_idx[info_index] = support_indexes;
kernel_support_datatype[info_index] = support_datatypes;
if (!MatchInferOutputDataType(kernel_node, kernel_build_info)) {
continue;
}
std::shared_ptr<kernel::KernelBuildInfo> kernel_info_ptr = kernel_info_list[info_index];
UpdateCurMatchCounts(*kernel_info_ptr, kernel_node, &cur_kernel_info_match_counts);
// Currently the selection policy is the match format count first, and then is datatype counts.
@@ -495,22 +420,77 @@ void SelectKernelInfo(const CNodePtr &kernel_node) {
selected_index = SizeToInt(info_index);
}
}
return kernel_info_list[selected_index];
}

bool precision_reduce = false;
if (selected_index == -1) {
selected_index = PrecisionReduce(node_mix_precision_datatype_index, node_mix_precision_datatype,
kernel_support_datatype, &kernel_match_datatype_idx, &precision_reduce);
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> GetAllMatchedFilteredKernelInfo(
const CNodePtr &cnode, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) {
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> result;
for (const auto &kernel_build_info : kernel_info_list) {
MS_EXCEPTION_IF_NULL(kernel_build_info);
if (!MatchInferOutputDataType(cnode, *kernel_build_info)) {
continue;
}
result.push_back(kernel_build_info);
}
if (selected_index == -1) {
MS_LOG(EXCEPTION) << kernel_node->DebugString() << "Cannot find valid kernel Info !";
return result;
}

std::vector<std::shared_ptr<kernel::KernelBuildInfo>> FilterRaisedOrReducePrecisionMatchedKernelInfo(
const CNodePtr &cnode, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list,
bool *precision_reduce) {
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> filtered_kernel_info_list;
std::map<size_t, std::vector<int>> kernel_match_datatype_idx;
std::map<size_t, std::vector<TypeId>> kernel_support_datatype;
std::vector<int> node_mix_precision_datatype_index;
std::vector<TypeId> node_mix_precision_datatype;
for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) {
std::vector<int> support_indexes;
std::vector<TypeId> support_datatypes;
MS_EXCEPTION_IF_NULL(kernel_info_list[info_index]);
AddNodeAndKernelDataType(cnode, *kernel_info_list[info_index], &support_indexes, &node_mix_precision_datatype,
&support_datatypes, &node_mix_precision_datatype_index);
kernel_match_datatype_idx[info_index] = support_indexes;
kernel_support_datatype[info_index] = support_datatypes;
}
auto index = IntToSize(selected_index);
if (index >= kernel_info_list.size()) {
MS_LOG(EXCEPTION) << "index outof range";
PrecisionReduce(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatype,
&kernel_match_datatype_idx, precision_reduce);
std::transform(
kernel_match_datatype_idx.begin(), kernel_match_datatype_idx.end(), std::back_inserter(filtered_kernel_info_list),
[&](const std::pair<size_t, std::vector<int>> &matched_idx) -> std::shared_ptr<kernel::KernelBuildInfo> {
return kernel_info_list[matched_idx.first];
});
return filtered_kernel_info_list;
}
} // namespace

void SelectKernelInfo(const CNodePtr &kernel_node) {
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
MS_EXCEPTION_IF_NULL(kernel_node);
bool precision_reduce = false;
std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info = nullptr;
kernel::KernelQuery(kernel_node, &kernel_info_list);
// filter kernel info matched with me infered type
auto filtered_kernel_info_list = GetAllMatchedFilteredKernelInfo(kernel_node, kernel_info_list);
if (!filtered_kernel_info_list.empty()) {
selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list);
} else {
// selected kernel info using raised precision or reduce precision
filtered_kernel_info_list =
FilterRaisedOrReducePrecisionMatchedKernelInfo(kernel_node, kernel_info_list, &precision_reduce);
selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list);
if (selected_kernel_info == nullptr) {
std::ostringstream buffer;
PrintInputAndOutputInferType(buffer, kernel_node);
MS_EXCEPTION(TypeError) << "The node [" << kernel_node->DebugString()
<< "] cannot find valid kernel info, not supported the type" << buffer.str();
} else {
PrintRaiseOrReducePrecisionSelectedInfo(kernel_node, selected_kernel_info, precision_reduce);
}
}
std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info_ptr = kernel_info_list[index];
MS_EXCEPTION_IF_NULL(selected_kernel_info_ptr);
SelectKernel(kernel_node, precision_reduce, node_mix_precision_datatype, selected_kernel_info_ptr);
AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info, kernel_node.get());
// Set format and data type for input tensor.
SetTensorDeviceInfo(*selected_kernel_info, kernel_node);
}

bool CheckKernelAccuracySupported(const CNodePtr &kernel_node,


+ 17
- 6
mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc View File

@@ -148,18 +148,29 @@ std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exe
}

if (bp_end_str.empty()) {
auto last_cnode = cnode_exec_order.back();
MS_EXCEPTION_IF_NULL(last_cnode);
bp_end_str = last_cnode->fullname_with_scope();
bp_end_str = GetGraphLastTbeKernelName(cnode_exec_order);
}
return bp_end_str;
}

std::string ProfilingUtils::GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order) {
std::string last_tbe_kernel_name = "";
// find last tbe_kernel
for (auto iter = cnode_exec_order.rbegin(); iter != cnode_exec_order.rend(); ++iter) {
if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL) {
last_tbe_kernel_name = (*iter)->fullname_with_scope();
break;
}
}
if (last_tbe_kernel_name.empty()) {
MS_LOG(WARNING) << "tbe kernel not found in graph";
}
return last_tbe_kernel_name;
}

std::string ProfilingUtils::GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order) {
const char *trace_netoutput = std::getenv(kIterEndNode);
auto &last_cnode = cnode_exec_order.back();
MS_EXCEPTION_IF_NULL(last_cnode);
return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput);
return trace_netoutput == nullptr ? GetGraphLastTbeKernelName(cnode_exec_order) : std::string(trace_netoutput);
}

NotNull<CNodePtr> ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content,


+ 1
- 0
mindspore/ccsrc/device/ascend/profiling/profiling_utils.h View File

@@ -114,6 +114,7 @@ class ProfilingUtils {
static std::string GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order);
static std::string GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order);
static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order);
static std::string GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order);
static void GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order,
NotNull<ProfilingTraceInfo *> profiling_trace);
static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order,


+ 6
- 0
mindspore/ccsrc/ir/dtype/type.cc View File

@@ -87,6 +87,12 @@ const char *MetaIdLabel(const TypeId &v) {
return "kMetaTypeExternal";
case kMetaTypeNone:
return "kMetaTypeNone";
case kMetaTypeNull:
return "kMetaTypeNull";
case kMetaTypeEllipsis:
return "kMetaTypeEllipsis";
case kMetaTypeEnd:
return "kMetaTypeEnd";
default:
return "[Unknown Type Id]";
}


+ 0
- 3
mindspore/ccsrc/ir/meta_tensor.cc View File

@@ -166,9 +166,6 @@ Tensor::Tensor(const py::int_ &input, const TypePtr &data_type) { init(py::array
Tensor::Tensor(const Tensor &tensor, const TypePtr &data_type)
: MetaTensor(tensor), dirty_(tensor.dirty_), device_address_(tensor.device_address_) {
init(tensor.data_, data_type);
if (device_address_ != nullptr) {
(void)data_sync();
}
}

Tensor &Tensor::operator=(const Tensor &tensor) {


+ 3
- 2
mindspore/ccsrc/kernel/kernel_build_info.cc View File

@@ -17,6 +17,7 @@
#include "kernel/kernel_build_info.h"
#include <algorithm>
#include "utils/log_adapter.h"
#include "debug/anf_ir_dump.h"
namespace mindspore {
namespace kernel {
std::string KernelBuildInfo::GetInputFormat(size_t input_index) const {
@@ -82,14 +83,14 @@ std::string KernelBuildInfo::ToString() const {
if (index != 0) {
output_buffer << ", ";
}
output_buffer << "<" << static_cast<int>(GetInputDeviceType(index)) << "x" << GetInputFormat(index) << ">";
output_buffer << "<" << ToShortString(GetInputDeviceType(index)) << "x" << GetInputFormat(index) << ">";
}
output_buffer << ") -> (";
for (size_t index = 0; index < GetOutputNum(); ++index) {
if (index != 0) {
output_buffer << ", ";
}
output_buffer << "<" << static_cast<int>(GetOutputDeviceType(index)) << "x" << GetOutputFormat(index) << ">";
output_buffer << "<" << ToShortString(GetOutputDeviceType(index)) << "x" << GetOutputFormat(index) << ">";
}
output_buffer << ")";
return output_buffer.str();


+ 2
- 1
mindspore/ccsrc/kernel/kernel_fusion.cc View File

@@ -108,7 +108,8 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
}

if ((task_result != nullptr) && (strcmp(task_result, "Success") != 0)) {
MS_LOG(DEBUG) << "fuison op build failed, err log: " << task_result << " change to single op build.";
MS_LOG(INFO) << "Fusion warning: Fuison op build failed, err log: " << task_result
<< " change to single op build.";
build_failed_num++;
}
auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, false);


+ 46
- 0
mindspore/ccsrc/kernel/tbe/tbe_adapter.cc View File

@@ -153,6 +153,52 @@ void TbeAdapter::InputOrderPass(const std::string &op_name, std::vector<std::vec
}
}

void TbeAdapter::FusionInputOrderPass(const std::string &op_name, const std::vector<nlohmann::json> &inputs_list,
std::vector<nlohmann::json> *inputs_json) {
MS_EXCEPTION_IF_NULL(inputs_json);
if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) {
(void)std::copy(inputs_list.begin(), inputs_list.end(), std::back_inserter((*inputs_json)));
} else {
if (op_name == "MinimumGrad" || op_name == "MaximumGrad") {
inputs_json->emplace_back(inputs_list[2]);
inputs_json->emplace_back(inputs_list[0]);
inputs_json->emplace_back(inputs_list[1]);
for (size_t i = 3; i < inputs_list.size(); ++i) {
inputs_json->emplace_back(inputs_list[i]);
}
} else {
inputs_json->emplace_back(inputs_list[1]);
inputs_json->emplace_back(inputs_list[0]);
for (size_t i = 2; i < inputs_list.size(); ++i) {
inputs_json->emplace_back(inputs_list[i]);
}
}
}
}

void TbeAdapter::FusionDataOrderPass(const std::string &op_name, const std::vector<AnfNodePtr> &data_layer,
std::vector<AnfNodePtr> *reorder_data_layer) {
MS_EXCEPTION_IF_NULL(reorder_data_layer);
if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) {
(void)std::copy(data_layer.begin(), data_layer.end(), std::back_inserter((*reorder_data_layer)));
} else {
if (op_name == "MinimumGrad" || op_name == "MaximumGrad") {
reorder_data_layer->emplace_back(data_layer[2]);
reorder_data_layer->emplace_back(data_layer[0]);
reorder_data_layer->emplace_back(data_layer[1]);
for (size_t i = 3; i < data_layer.size(); ++i) {
reorder_data_layer->emplace_back(data_layer[i]);
}
} else {
reorder_data_layer->emplace_back(data_layer[1]);
reorder_data_layer->emplace_back(data_layer[0]);
for (size_t i = 2; i < data_layer.size(); ++i) {
reorder_data_layer->emplace_back(data_layer[i]);
}
}
}
}

std::map<std::string, FAttrsPass> TbeAdapter::build_json_attr_pass_map_ = {
{"MaximumGrad", TbeAdapter::MaximumGradAttrJsonPass},
{"MinimumGrad", TbeAdapter::MinimumGradAttrJsonPass},


+ 5
- 8
mindspore/ccsrc/kernel/tbe/tbe_adapter.h View File

@@ -44,15 +44,12 @@ class TbeAdapter {
static void GenTopKV2IndicesTensorInfo(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index,
std::vector<nlohmann::json> *input_list, kCreaterType creater_type);

static void FusionInputOrderPass(const std::string &op_name, const std::vector<nlohmann::json> &inputs_list,
std::vector<nlohmann::json> *inputs_json);
static void FusionDataOrderPass(const std::string &op_name, const std::vector<AnfNodePtr> &data_layer,
std::vector<AnfNodePtr> *reorder_data_layer);

private:
static void Conv2DAttrJsonPass(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
nlohmann::json *attrs_json);
static void Conv2DBackpropFilterAttrJsonPass(const AnfNodePtr &anf_node,
const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
nlohmann::json *attrs_json);
static void Conv2DBackpropInputAttrJsonPass(const AnfNodePtr &anf_node,
const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
nlohmann::json *attrs_json);
static void MaximumGradAttrJsonPass(const AnfNodePtr &anf_node,
const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
nlohmann::json *attrs_json);


+ 109
- 51
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc View File

@@ -375,20 +375,26 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_no
MS_EXCEPTION_IF_NULL(primitive);
for (const auto &attr_ptr : attrs_ptr) {
std::string attr_name = attr_ptr->name();
nlohmann::json attr_obj;
attr_obj["name"] = attr_name;
if (primitive->GetAttr(attr_name) != nullptr) {
nlohmann::json attr_obj;
auto value = primitive->GetAttr(attr_name);
std::string type = attr_ptr->type();
ParseAttrValue(type, value, &attr_obj);
attr_obj["name"] = attr_name;
attr_obj["valid"] = true;
(*attrs_json).push_back(attr_obj);
} else {
if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD && op_info->impl_path() != "") {
MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name
<< " is required, but not set.";
if (op_info->impl_path().empty()) {
attr_obj["valid"] = false;
} else {
if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD) {
MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name
<< " is required, but not set.";
} else {
attr_obj["valid"] = false;
}
}
}
(*attrs_json).push_back(attr_obj);
}
return true;
}
@@ -484,7 +490,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp
MS_EXCEPTION_IF_NULL(fusion_kernel);
// get input layer info
std::vector<std::vector<mindspore::AnfNodePtr>> input_layers;
if (!GetInputLayers(input_nodes, compute_nodes, &input_layers)) {
std::map<const AnfNodePtr, FusionDataType> spec_data_input;
if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) {
return false;
}
// gen fusion scopre_op jsom
@@ -505,8 +512,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp
for (const auto &layer : input_layers) {
for (const auto &data_input : layer) {
nlohmann::json data_str;
if (!GenFusionDataInputJson(data_input, &data_str, &index)) {
MS_LOG(DEBUG) << "GenFusionDataInputJson faild.";
if (!GenFusionDataInputJson(data_input, spec_data_input, &data_str, &index)) {
MS_LOG(INFO) << "Fusion error: gen fusion datainput json faild.";
return false;
}
data_list.push_back(data_str);
@@ -519,7 +526,7 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp
}

void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
size_t desc_output_idx, nlohmann::json *output_desc) {
size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) {
std::string output_desc_name = anf_node->fullname_with_scope();
if (node_out_idx > 0) {
output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx);
@@ -539,58 +546,109 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_
(*output_desc)["shape"] = shape;
auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx);
if (format == kOpFormat_DEFAULT) {
if (ori_shape.size() == 4) {
format = kOpFormat_NCHW;
} else {
format = kOpFormat_ND;
}
format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND;
}
(*output_desc)["format"] = format;
(*output_desc)["ori_format"] = kOpFormat_NCHW;
(*output_desc)["output_index"] = desc_output_idx;
if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) {
std::vector<size_t> spec_shape = {};
spec_shape.emplace_back(shape[0]);
spec_shape.emplace_back(shape[1]);
spec_shape.emplace_back(shape[2] * shape[3]);
spec_shape.emplace_back(shape[4]);
(*output_desc)["shape"] = spec_shape;
} else if (fusion_data_type == kFusionReLUGradV2 && (*output_desc)["data_type"] == "uint8") {
std::vector<size_t> spec_shape = {};
spec_shape.emplace_back(shape[0]);
spec_shape.emplace_back(shape[1]);
spec_shape.emplace_back(shape[2] * shape[3]);
spec_shape.emplace_back(16);
(*output_desc)["shape"] = spec_shape;
(*output_desc)["data_type"] = "bool";
}
}

void TbeKernelBuild::GenReusedOutputDesc(const shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
size_t output_index, nlohmann::json *output_desc) {
std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index);
(*output_desc)["name"] = NormalizeFullScopeName(output_desc_name);
(*output_desc)["data_type"] = tbe::TypeIdToString(kNumberTypeFloat32);
(*output_desc)["output_index"] = output_index;
std::vector<size_t> shape;
(*output_desc)["shape"] = shape;
}

bool TbeKernelBuild::GetInputLayers(const vector<mindspore::AnfNodePtr> &input_nodes,
const vector<mindspore::AnfNodePtr> &compute_nodes,
std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers) {
bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name,
const std::vector<mindspore::AnfNodePtr> &reorder_layer,
std::map<const AnfNodePtr, FusionDataType> *spec_data_input) {
if ((op_name == kReluGradV2OpName || op_name == kAddNOpName) && reorder_layer.empty()) {
MS_LOG(INFO) << "Fusion error: node(" << op_name << " )'s input is null. ";
return false;
}
MS_LOG(INFO) << "Fusion info: op_name: " << op_name << "input layer size: " << reorder_layer.size();
if (op_name == kReluGradV2OpName) {
(*spec_data_input)[reorder_layer[0]] = kFusionReLUGradV2;
} else if (op_name == kAddNOpName) {
for (const auto &it : reorder_layer) {
(*spec_data_input)[it] = kFusionAddN;
}
}
return true;
}

bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes,
const std::vector<mindspore::AnfNodePtr> &compute_nodes,
std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers,
std::map<const AnfNodePtr, FusionDataType> *spec_data_input) {
auto result = std::find_if(compute_nodes.begin(), compute_nodes.end(), [](const auto &it) {
auto op_name = AnfAlgo::GetCNodeName(it);
return op_name == kConv2DBackpropInputOpName;
});
bool need_spec = (result != compute_nodes.end());
size_t input_size = 0;
for (const auto &compute_node : compute_nodes) {
std::vector<mindspore::AnfNodePtr> layer;
std::vector<mindspore::AnfNodePtr> layer = {};
std::vector<mindspore::AnfNodePtr> reorder_layer = {};
MS_EXCEPTION_IF_NULL(compute_node);
auto op_name = AnfAlgo::GetCNodeName(compute_node);
auto ccompute_node = compute_node->cast<CNodePtr>();
if (ccompute_node == nullptr) {
MS_LOG(DEBUG) << "fusion compute node must be cnode";
MS_LOG(INFO) << "Fusion error: fusion compute node must be cnode";
return false;
}
MS_LOG(INFO) << "Fusion info: compute name: " << compute_node->fullname_with_scope();
for (size_t i = 1; i < ccompute_node->inputs().size(); ++i) {
auto input = ccompute_node->input(i);
auto find_iter = std::find(input_nodes.begin(), input_nodes.end(), input);
if (find_iter != input_nodes.end()) {
MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope();
layer.emplace_back((*find_iter));
} else {
MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope()
<< ") node's output.";
}
}
TbeAdapter::FusionDataOrderPass(op_name, layer, &reorder_layer);
if (need_spec) {
MS_LOG(INFO) << "Fusion info: match conv2d backprop input + ... patten.";
if (!GetSpecInputLayers(op_name, reorder_layer, spec_data_input)) {
return false;
}
}
input_size += layer.size();
input_layers->emplace_back(layer);
input_size += reorder_layer.size();
input_layers->emplace_back(reorder_layer);
}
if (input_nodes.size() != input_size) {
MS_LOG(DEBUG) << "fusion scope error, layer input:" << input_size << ", input_node:" << input_nodes.size();
MS_LOG(INFO) << "Fusion error: fusion scope error, layer input:" << input_size
<< ", input_node:" << input_nodes.size();
return false;
}
return true;
}

bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode> &data_input, nlohmann::json *data_str,
size_t *index) {
bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input,
const std::map<const AnfNodePtr, FusionDataType> &spec_data_input,
nlohmann::json *data_str, size_t *index) {
MS_EXCEPTION_IF_NULL(data_str);
MS_EXCEPTION_IF_NULL(index);
std::vector<nlohmann::json> output_desc_list;
@@ -604,13 +662,17 @@ bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode>
output_desc_list.push_back(output_desc);
(*index)++;
} else {
FusionDataType fusion_data_type = kFusionNormal;
if (spec_data_input.find(data_input) != spec_data_input.end()) {
fusion_data_type = spec_data_input.at(data_input);
}
auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0);
auto real_node = kernel_idx.first;
size_t real_idx = kernel_idx.second;
MS_LOG(INFO) << "real name " << real_node->fullname_with_scope() << " index:" << real_idx;
// "output_desc"
nlohmann::json output_desc;
GenDescJson(real_node, real_idx, real_idx, &output_desc);
GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type);
output_desc_list.push_back(output_desc);
(*data_str)["name"] = NormalizeFullScopeName(real_node->fullname_with_scope());
}
@@ -632,11 +694,12 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) {
auto real_input_size = cnode->inputs().size() - 1;
auto dyn_input_size = dyn_input_sizes.size();
if (dyn_input_size != 1) {
MS_LOG(DEBUG) << "fusion build not support dyn_input_sizes > 1";
MS_LOG(INFO) << "Fusion error: fusion build not support dyn_input_sizes > 1";
return ret;
}
if (IntToSize(dyn_input_sizes[0]) != real_input_size) {
MS_LOG(DEBUG) << " dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size" << real_input_size;
MS_LOG(INFO) << "Fusion error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size"
<< real_input_size;
return ret;
}
ret = true;
@@ -663,6 +726,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
std::vector<nlohmann::json> *input_desc_list, size_t *index) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(input_desc_list);
std::vector<nlohmann::json> input_desc_list_tmp = {};
bool is_dynamic_input = IsDynamicInput(cnode);
for (size_t i = 1; i < cnode->inputs().size(); ++i) {
auto input = cnode->input(i);
@@ -676,7 +740,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
MS_LOG(INFO) << "node has dynamic input.";
input_desc["dyn_index"] = (i - 1);
}
(*input_desc_list).emplace_back(input_desc);
input_desc_list_tmp.emplace_back(input_desc);
}
size_t optional_num = GetOptionalInput(cnode, is_dynamic_input);
if (optional_num > 0) {
@@ -686,35 +750,24 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
optional_input_desc["name"] = std::string(kOptional) + std::to_string(*index);
(*index)++;
(*layer_iter)->emplace_back(nullptr);
(*input_desc_list).emplace_back(optional_input_desc);
input_desc_list_tmp.emplace_back(optional_input_desc);
}
}
auto op_name = AnfAlgo::GetCNodeName(cnode);
TbeAdapter::FusionInputOrderPass(op_name, input_desc_list_tmp, input_desc_list);
return true;
}

std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &output_used_nums) {
std::vector<size_t> desc_output_index = {};
bool find_reused = false;
size_t reused_num = 0;
for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
auto output_use_num_item = output_used_nums[idx];
MS_LOG(INFO) << "output used num[" << idx << "] = " << output_use_num_item;
if (output_use_num_item == 1 || output_use_num_item == 0) {
desc_output_index.emplace_back(idx);
if (output_use_num_item > 1) {
desc_output_index.emplace_back(idx);
} else {
if (!find_reused) {
desc_output_index.emplace_back(idx);
} else {
desc_output_index.emplace_back(desc_output_index[idx - 1]);
}
reused_num += (output_use_num_item - 1);
find_reused = true;
}
}
auto pad_value = output_used_nums.size() == 1 ? 0 : desc_output_index[desc_output_index.size() - 1] + 1;
for (size_t i = 0; i < reused_num; ++i) {
desc_output_index.emplace_back(pad_value);
}
return desc_output_index;
}

@@ -722,8 +775,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode
std::vector<nlohmann::json> *output_desc_list) {
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
// wait anther pr: auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
auto output_used_nums = {SizeToInt(AnfAlgo::GetNodeAttr<std::size_t>(cnode, kAttrOutputUsedNum))};
auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope();
if (output_used_nums.size() != output_size) {
MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"
@@ -812,6 +864,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto
}
auto ret = GetIOSizeImpl(data_output);
input_size_list->push_back(ret);
MS_LOG(INFO) << "Fusion info: scope input name: " << op["name"] << ", size: " << ret;
}
}
}
@@ -820,26 +873,31 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto
auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0);
auto real_node = kernel_idx.first;
size_t real_idx = kernel_idx.second;
auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope());
MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx;
for (const auto &op : fusion_op_list) {
auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope());
if (op["name"] == normal_name) {
auto op_output_desces = op["output_desc"];
if (output_node != real_node) {
// tuple_get item
MS_LOG(DEBUG) << "output is a tuple getitem node";
MS_LOG(INFO) << "output is a tuple getitem node";
auto output_desc = op_output_desces[real_idx];
if (output_desc["shape"].empty()) {
continue;
MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx;
return false;
}
auto ret = GetIOSizeImpl(output_desc);
output_size_list->push_back(ret);
MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret;
} else {
for (const auto &output_desc : op_output_desces) {
if (output_desc["shape"].empty()) {
MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output";
continue;
}
auto ret = GetIOSizeImpl(output_desc);
output_size_list->push_back(ret);
MS_LOG(INFO) << "Fusion info: scope output size: " << ret;
}
}
}


+ 11
- 4
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h View File

@@ -35,6 +35,8 @@ namespace kernel {
// kernel operate type used for generate json

class TbeKernelBuild {
enum FusionDataType { kFusionNormal = 0, kFusionAddN, kFusionReLUGradV2 };

public:
static bool GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
std::vector<size_t> *output_size_list);
@@ -48,8 +50,9 @@ class TbeKernelBuild {
private:
TbeKernelBuild() = default;
~TbeKernelBuild() = default;
static bool GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input, nlohmann::json *data_str,
size_t *index);
static bool GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input,
const std::map<const AnfNodePtr, FusionDataType> &spec_data_input,
nlohmann::json *data_str, size_t *index);
static bool GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node,
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
nlohmann::json *compute_op_str, std::string *fusion_kernel_name, size_t *index);
@@ -60,13 +63,17 @@ class TbeKernelBuild {
static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
std::vector<nlohmann::json> *output_desc_list);
static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
size_t desc_output_idx, nlohmann::json *output_desc);
size_t desc_output_idx, nlohmann::json *output_desc,
FusionDataType fusion_data_type = kFusionNormal);
static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
size_t output_index, nlohmann::json *output_desc);
static size_t GetIOSizeImpl(const nlohmann::json &desc);
static bool GetSpecInputLayers(const std::string &op_name, const std::vector<mindspore::AnfNodePtr> &reorder_layer,
std::map<const AnfNodePtr, FusionDataType> *spec_data_input);
static bool GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes,
const std::vector<mindspore::AnfNodePtr> &compute_nodes,
std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers);
std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers,
std::map<const AnfNodePtr, FusionDataType> *spec_data_input);
static bool IsDynamicInput(const CNodePtr &cnode);
static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input);
};


+ 2
- 1
mindspore/ccsrc/mindrecord/io/shard_reader.cc View File

@@ -346,7 +346,8 @@ void ShardReader::GetClassesInShard(sqlite3 *db, int shard_id, const std::string
MS_LOG(ERROR) << "Error in select sql statement, sql:" << common::SafeCStr(sql) << ", error: " << errmsg;
return;
}
MS_LOG(INFO) << "Get" << static_cast<int>(columns.size()) << " records from shard " << shard_id << " index.";
MS_LOG(INFO) << "Get " << static_cast<int>(columns.size()) << " records from shard " << shard_id << " index.";
std::lock_guard<std::mutex> lck(shard_locker_);
for (int i = 0; i < static_cast<int>(columns.size()); ++i) {
categories.emplace(columns[i][0]);
}


+ 8
- 5
mindspore/ccsrc/operator/composite/composite.cc View File

@@ -1084,6 +1084,7 @@ int GenerateStridedSliceParametersFromTuple(const AbstractTuplePtr &slice_tuple,
std::vector<unsigned int> shrink;
auto slice_tuple_eles = slice_tuple->elements();
size_t ellipsis_num = 0;

for (size_t index = 0; index < slice_tuple_size; index++) {
if (slice_tuple_eles[index]->isa<AbstractSlice>()) {
AbstractSlicePtr slice = dyn_cast<AbstractSlice>(slice_tuple_eles[index]);
@@ -1118,12 +1119,13 @@ int GenerateStridedSliceParametersFromTuple(const AbstractTuplePtr &slice_tuple,
<< slice_tuple_eles[index]->ToString();
}

for (size_t index = slice_tuple_size; index < shape_size; index++) {
begin->push_back(0);
end->push_back(shape[index]);
strides->push_back(1);
if (ellipsis_num == 0) {
for (size_t index = slice_tuple_size; index < shape_size; index++) {
begin->push_back(0);
end->push_back(shape[index]);
strides->push_back(1);
}
}

return ConvertBinaryToDecimal(shrink);
}

@@ -1199,6 +1201,7 @@ FuncGraphPtr TensorSlice::GenerateFuncGraph(const AbstractBasePtrList &args_spec
if (scalar_ptr->BuildValue()->cast<BoolImmPtr>()->value()) {
return ExpandADim(ret_graph, tensor_node);
}
MS_LOG(EXCEPTION) << "TensorSlice not support the index is False.";
}
shrink_axis_mask = GenerateStridedSliceParametersFromNumber(scalar_ptr, shape, &begin, &end, &strides);
} else if (args_spec_list[1]->isa<AbstractEllipsis>()) {


+ 0
- 2
mindspore/ccsrc/operator/composite/unpack_call.h View File

@@ -35,7 +35,6 @@
namespace mindspore {
// namespace to support composite operators definition
namespace prim {

// Expand the tuple and dict parameters generated when parsing the function call,
// and generate positional parameters and key-value pairs for function.
class UnpackCall : public MetaFuncGraph {
@@ -47,7 +46,6 @@ class UnpackCall : public MetaFuncGraph {
friend bool operator==(const UnpackCall &lhs, const UnpackCall &rhs) { return lhs.name_ == rhs.name_; }
};
using UnpackCallPtr = std::shared_ptr<UnpackCall>;

} // namespace prim
} // namespace mindspore



+ 0
- 1
mindspore/ccsrc/optimizer/irpass.cc View File

@@ -133,7 +133,6 @@ ResolveIRPassLib::ResolveIRPassLib() {
InferenceOptPrepareLib::InferenceOptPrepareLib() {
grad_var_prepare_ = MakeSubstitution(GradVarPrepare(), "grad_var_prepare", IsCNode);
}

} // namespace irpass
} // namespace opt
} // namespace mindspore

+ 0
- 1
mindspore/ccsrc/optimizer/irpass.h View File

@@ -159,7 +159,6 @@ inline bool IsCNodeDup(const AnfNodePtr &node) {
}
return false;
}

} // namespace irpass
} // namespace opt
} // namespace mindspore


+ 0
- 1
mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc View File

@@ -31,7 +31,6 @@
namespace mindspore {
namespace opt {
namespace irpass {

static AnfNodePtr GenerateUnpackGraphNode(std::vector<AnfNodePtr> inputs_y, FuncGraphPtr func_graph,
AnfNodePtr func_node, bool is_unpack, bool sens_param) {
MS_EXCEPTION_IF_NULL(func_graph);


+ 0
- 1
mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h View File

@@ -33,7 +33,6 @@
namespace mindspore {
namespace opt {
namespace irpass {

// {{GradOperation, g, w}, Ys}
// {UnPackCall, {GradOperation, g, w}, Ys}
class GradVarPrepare : public AnfVisitor {


+ 0
- 2
mindspore/ccsrc/pipeline/base.h View File

@@ -28,13 +28,11 @@

namespace mindspore {
namespace pipeline {

struct ExecutorInfo {
FuncGraphPtr func_graph;
ResourcePtr resource;
std::size_t arg_list_size;
};

using ExecutorInfoPtr = std::shared_ptr<ExecutorInfo>;

inline std::string GetPhasePrefix(const std::string &phase) {


+ 1
- 1
mindspore/ccsrc/pipeline/init.cc View File

@@ -97,7 +97,7 @@ PYBIND11_MODULE(_c_expression, m) {
py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"),
py::arg("phase") = py::str("dataset"), "Init and exec dataset.");
(void)m.def("_set_dataset_mode_config", &mindspore::ConfigManager::SetDatasetModeConfig, "API for set dataset mode.");
(void)m.def("init_ge", &mindspore::pipeline::InitGe, "Init GE");
(void)m.def("init_backend", &mindspore::pipeline::InitBackend, "Init Backend.");

(void)m.def("export_graph", &mindspore::pipeline::ExportGraph, "Export Graph.");



+ 13
- 6
mindspore/ccsrc/pipeline/pipeline.cc View File

@@ -101,7 +101,7 @@ py::tuple GenerateKey(const std::string &name, const std::unordered_map<std::str
MS_LOG(INFO) << "Start new args and compile key:" << key;
g_args_cache[args_spec] = key++;
}
py::tuple argSpec = py::tuple(2);
auto argSpec = py::tuple(2);
argSpec[0] = name;
argSpec[1] = g_args_cache[args_spec];
return argSpec;
@@ -236,7 +236,7 @@ py::dict ExecutorPy::GetAllreduceFusion(const std::string &phase) {

void ExecutorPy::DelNetRes(const std::string &id) {
#ifdef ENABLE_GE
FinalizeGe();
FinalizeBackend();
#endif
if (executor_ != nullptr) {
bool flag = false;
@@ -668,6 +668,13 @@ bool InitExecDataset(const std::string &queue_name, int64_t iter_num, int64_t ba
const std::vector<TypePtr> &types, const std::vector<std::vector<int64_t>> &shapes,
const std::vector<int64_t> &input_indexes, const std::string &phase) {
std::string name = MsContext::GetInstance()->backend_policy();
#ifndef NO_DLIB
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (!ms_context->IsTsdOpened() || !ms_context->IsGeInited()) {
(void)InitBackend();
}
#endif
if (name == kMsConvert || name == kMsVm) {
return InitExecDatasetVm(queue_name, iter_num, batch_size, types, shapes, input_indexes);
}
@@ -746,7 +753,7 @@ void ResetOpId() { mindspore::id_generator::reset_id(); }

void InitHccl() {
#ifdef ENABLE_GE
(void)InitGe();
(void)InitBackend();
#else
mindspore::parse::python_adapter::set_python_env_flag(true);
auto ms_context = MsContext::GetInstance();
@@ -768,7 +775,7 @@ void InitHccl() {

void FinalizeHccl() {
#ifdef ENABLE_GE
(void)FinalizeGe();
(void)FinalizeBackend();
#else
device::KernelRuntimeManager::Instance().ClearRuntimeResource();
#endif
@@ -789,7 +796,7 @@ void ReleaseGeTsd() {
}
}

void InitGe() {
void InitBackend() {
// set python env flag
mindspore::parse::python_adapter::set_python_env_flag(true);
// open tsd before ge initialize
@@ -801,7 +808,7 @@ void InitGe() {
(void)ms_context->InitGe();
}

void FinalizeGe() {
void FinalizeBackend() {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
(void)context_ptr->FinalizeGe();


+ 2
- 2
mindspore/ccsrc/pipeline/pipeline.h View File

@@ -115,8 +115,8 @@ bool InitDistribute(const std::map<std::string, std::string> &options);
void ResetOpId();
void InitHccl();
void FinalizeHccl();
void InitGe();
void FinalizeGe();
void InitBackend();
void FinalizeBackend();

void ClearResAtexit();
void ReleaseGeTsd();


+ 21
- 22
mindspore/ccsrc/pipeline/pipeline_ge.cc View File

@@ -52,11 +52,11 @@ void DoExecNonInputGraph(const std::string &phase) {
transform::RunOptions run_options;
run_options.name = phase;
auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();

if (graph_runner == nullptr) {
MS_LOG(ERROR) << "Can not found GraphRunner";
return;
}

{
// Release GIL before calling into (potentially long-running) C++ code
py::gil_scoped_release release;
@@ -181,7 +181,6 @@ bool AddDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const py::di
size_t pos = phase.find('.');
std::string net_id = ((pos == std::string::npos || pos == phase.size() - 1) ? phase : phase.substr(pos + 1));
std::string phase_prefix = phase.substr(0, pos);

if (phase_prefix == "export") {
MS_LOG(INFO) << "Set DfGraphConvertor training : false";
convertor.set_training(false);
@@ -319,19 +318,24 @@ void RunGEInitGraph(const py::dict &init_params, const std::string &phase) {

py::object ExtractGeneralCnodeRet(const AbstractBasePtr &cnode_data, const py::tuple &data, size_t *count) {
MS_EXCEPTION_IF_NULL(cnode_data);
if (*count >= data.size()) {
MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
<< " less than the number of elements required. ";
}

if (cnode_data->isa<AbstractTensor>()) {
if (*count >= data.size()) {
MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
<< " less than the number of elements required. ";
}

BaseShapePtr shape = cnode_data->BuildShape();
auto shape_act = shape->cast<abstract::ShapePtr>()->shape();
Tensor tensor_exp = py::cast<Tensor>(data[*count]);
if (shape_act != tensor_exp.shape()) {
MS_LOG(EXCEPTION) << "The shape of the tensor returned from GE is not the same as "
"the shape of the tensor derived from ME.";
if (!shape->isa<abstract::Shape>()) {
MS_LOG(EXCEPTION) << "The shape of the tensor derived is not Shape, is " << shape->ToString();
}
auto shape_me = shape->cast<abstract::ShapePtr>()->shape();
auto shape_ge = py::cast<Tensor>(data[*count]).shape();
if (shape_ge != shape_me) {
MS_LOG(EXCEPTION) << "The shape of the " << *count << "th tensor returned: " << shape_ge
<< " is not the same as the shape of the tensor derived: " << shape_me;
}

return data[(*count)++];
}

@@ -343,7 +347,7 @@ py::object ExtractGeneralCnodeRet(const AbstractBasePtr &cnode_data, const py::t
auto data_tp = cnode_data->cast<AbstractTuplePtr>();
auto elements = data_tp->elements();
size_t size = data_tp->size();
py::tuple tp = py::tuple(size);
auto tp = py::tuple(size);
for (size_t i = 0; i < size; i++) {
tp[i] = ExtractGeneralCnodeRet(elements[i], data, count);
}
@@ -357,11 +361,11 @@ py::object StructureOutput(const AnfNodePtr &output_node, const py::tuple &data,
return ValuePtrToPyData(GetValueNode(output_node));
}

if (*count >= data.size()) {
MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
<< " less than the number of elements required. ";
}
if (output_node->isa<Parameter>()) {
if (*count >= data.size()) {
MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
<< " less than the number of elements required. ";
}
return data[(*count)++];
}

@@ -374,7 +378,7 @@ py::object StructureOutput(const AnfNodePtr &output_node, const py::tuple &data,
if (output_c->IsApply(prim::kPrimMakeTuple)) {
auto input_list = output_c->inputs();
size_t size = input_list.size();
py::tuple tp = py::tuple(size - 1);
auto tp = py::tuple(size - 1);
for (size_t i = 1; i < size; i++) {
tp[i - 1] = StructureOutput(input_list[i], data, count);
}
@@ -396,11 +400,8 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr &graph, const std::ve

std::vector<GeTensorPtr> ge_outputs;
transform::RunOptions run_options;

run_options.name = phase;

auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();

if (graph_runner == nullptr) {
MS_LOG(EXCEPTION) << "Can not found GraphRunner.";
}
@@ -473,7 +474,6 @@ void ProcessGeArg(const std::map<std::string, ExecutorInfoPtr> &info, const py::
py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const py::tuple &args,
const std::string &phase) {
std::string phase_prefix = GetPhasePrefix(phase);

if (phase_prefix == "save") {
DoExecNonInputGraph(phase);
ConfigManager::GetInstance().ResetConfig();
@@ -483,7 +483,6 @@ py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const
if (info.count(phase) == 0) {
MS_LOG(EXCEPTION) << "There is no phase:" << phase;
}

FuncGraphPtr anf_graph = info.at(phase)->func_graph;

#ifdef ENABLE_INFER


+ 0
- 2
mindspore/ccsrc/pipeline/pipeline_ge.h View File

@@ -31,7 +31,6 @@

namespace mindspore {
namespace pipeline {

namespace py = pybind11;

void SetGeOption(const std::map<std::string, std::string> &options);
@@ -50,7 +49,6 @@ bool InitExecDatasetGe(const std::string &queue_name, int64_t size, int64_t batc
const std::vector<int64_t> &input_indexes, const std::string &phase);

void ExportDFGraph(const std::string &file_name, const std::string &phase);

} // namespace pipeline
} // namespace mindspore



+ 2
- 2
mindspore/ccsrc/pipeline/static_analysis/abstract_function.h View File

@@ -41,7 +41,7 @@ class AbstractFuncAtom : public AbstractFunction {

AbstractFunctionPtr Join(const AbstractFunctionPtr &other) final;
void Visit(std::function<void(const AbstractFuncAtomPtr &)>) const final;
bool operator==(const AbstractFunction &other) const;
bool operator==(const AbstractFunction &other) const override;

std::size_t hash() const override { return tid(); }
};
@@ -270,7 +270,7 @@ class TypedPrimitiveAbstractClosure : public AbstractFuncAtom {
class DummyAbstractClosure : public AbstractFuncAtom {
public:
DummyAbstractClosure() = default;
~DummyAbstractClosure() = default;
~DummyAbstractClosure() override = default;
MS_DECLARE_PARENT(DummyAbstractClosure, AbstractFuncAtom)

EvaluatorPtr GetEvaluator(AnalysisEnginePtr) override { MS_LOG(EXCEPTION) << "A dummy function cannot eval."; }


+ 0
- 1
mindspore/ccsrc/pipeline/static_analysis/prim.cc View File

@@ -295,7 +295,6 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) {
dic["shape"] = shape;
dic["dtype"] = arg_slice->BuildType();
dic["value"] = BuildValue(arg_slice->BuildValue());

} else if (abs_base->isa<AbstractTuple>()) {
auto arg_tuple = dyn_cast<AbstractTuple>(abs_base);
size_t len = arg_tuple->size();


+ 2
- 2
mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc View File

@@ -38,6 +38,7 @@
#include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h"
#include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
#include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h"
#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h"
#include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h"
#include "pre_activate/ascend/ir_fusion/transdata_split.h"
#include "pre_activate/ascend/ir_fission/topk_split.h"
@@ -46,7 +47,6 @@
#include "pre_activate/ascend/ir_fusion/mul_addn_fusion.h"
#include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h"
#include "pre_activate/ascend/ir_fusion/remove_reshape_pair.h"
#include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h"
#include "pre_activate/ascend/ir_fusion/derelu_fusion.h"
#include "pre_activate/ascend/format_type/insert_trans_op.h"
#include "pre_activate/pass/getitem_tuple.h"
@@ -97,7 +97,6 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) {
ir_fusion_pm->AddPass(std::make_shared<MatmulBiasaddFusion>());
ir_fusion_pm->AddPass(std::make_shared<AddnFission>());
ir_fusion_pm->AddPass(std::make_shared<DereluFusion>());
ir_fusion_pm->AddPass(std::make_shared<ConfusionMulGradFusion>());
ir_fusion_pm->AddPass(std::make_shared<TransposeTransDataFusion>());
ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>());
}
@@ -267,6 +266,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
other_pm->AddPass(std::make_shared<AllReduceFusion>());
other_pm->AddPass(std::make_shared<AllGatherFusion>());
other_pm->AddPass(std::make_shared<ParameterTransOpFusion>());
other_pm->AddPass(std::make_shared<RefreshParameterFormat>());
other_pm->AddPass(std::make_shared<BufferFusion>());
other_pm->AddPass(std::make_shared<GetitemTuple>());
other_pm->AddPass(std::make_shared<CommonSubexpressionElimination>());


+ 11
- 0
mindspore/ccsrc/pre_activate/ascend/ascend_helper.h View File

@@ -21,6 +21,7 @@
#include <vector>
#include "device/ascend/kernel_select_ascend.h"
#include "kernel/kernel_query.h"
#include "kernel/tbe/tbe_kernel_select.h"

namespace mindspore {
namespace opt {
@@ -36,6 +37,16 @@ class KernelSelect {
};
using KernelSelectPtr = std::shared_ptr<KernelSelect>;

class SupportedChecker {
public:
SupportedChecker() = default;
virtual ~SupportedChecker() = default;
virtual bool CheckSupported(const AnfNodePtr &anf_node, const kernel::KernelBuildInfoPtr &select_kernel_build_info) {
return kernel::CheckSupported(anf_node, select_kernel_build_info);
}
};
using SupportedCheckerPtr = std::shared_ptr<SupportedChecker>;

class KernelQuery {
public:
KernelQuery() = default;


+ 242
- 153
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc View File

@@ -17,12 +17,14 @@

#include <vector>
#include <tuple>
#include <utility>
#include <unordered_set>
#include <unordered_map>
#include <deque>
#include <memory>
#include <string>
#include <algorithm>
#include <iterator>

#include "kernel/kernel_fusion.h"
#include "debug/anf_ir_dump.h"
@@ -260,33 +262,40 @@ CNodePtr CreateFusionOp(const std::vector<AnfNodePtr> &inputs_list, const std::v
return buffer_fusion_kernel;
}

kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list_in,
const std::vector<AnfNodePtr> &inputs_list,
kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list,
const std::vector<AnfNodePtr> &outputs_list) {
MS_LOG(DEBUG) << "Start Create Kernel Info";
kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
// inputs format and data type
std::vector<std::string> inputs_format;
std::vector<TypeId> inputs_data_type;
for (auto node : inputs_list_in) {
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto &inputs = cnode->inputs();
for (size_t input_index = 1; input_index < inputs.size(); ++input_index) {
if (std::find(inputs_list.begin(), inputs_list.end(), inputs[input_index]) != inputs_list.end()) {
inputs_format.push_back(AnfAlgo::GetInputFormat(node, input_index - 1));
inputs_data_type.push_back(AnfAlgo::GetInputDeviceDataType(node, input_index - 1));
}
for (const auto &input : inputs_list) {
if (input->isa<CNode>() && AnfAlgo::GetCNodeName(input) == prim::kPrimTupleGetItem->name()) {
auto tuple_getitem = input->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(tuple_getitem);
inputs_format.push_back(AnfAlgo::GetOutputFormat(
tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2))))));
inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(
tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2))))));
} else {
inputs_format.push_back(AnfAlgo::GetOutputFormat(input, 0));
inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(input, 0));
}
}
// outputs format and data type
std::vector<std::string> outputs_format;
std::vector<TypeId> outputs_data_type;
for (size_t index = 0; index < outputs_list.size(); ++index) {
for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) {
auto kernel_with_index = AnfAlgo::VisitKernel(outputs_list[index], idx);
outputs_format.push_back(AnfAlgo::GetOutputFormat(kernel_with_index.first, kernel_with_index.second));
outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(kernel_with_index.first, kernel_with_index.second));
for (const auto &output : outputs_list) {
if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) {
auto tuple_getitem = output->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(tuple_getitem);
outputs_format.push_back(AnfAlgo::GetOutputFormat(
tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2))))));
outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(
tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2))))));
} else {
outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0));
outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0));
}
}
builder.SetInputsFormat(inputs_format);
@@ -320,140 +329,235 @@ AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::K
return tuple_item;
}

void ReplaceOldNode(const std::vector<AnfNodePtr> &outputs_list, const AnfNodePtr &buffer_fusion_kernel,
session::KernelGraph *kernel_graph) {
void ReplaceInputNodeInOtherFusionScope(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos,
int32_t fusion_id, const AnfNodePtr &output_item,
const AnfNodePtr &replace_item) {
for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) {
auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(),
output_item);
if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) {
MS_LOG(DEBUG) << "replace input of other pattern, id = " << id;
*itr = replace_item;
}
}
}

void ReplaceOldNode(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id,
const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto manager = kernel_graph->manager();
MS_EXCEPTION_IF_NULL(manager);
if (outputs_list.size() == 1) { // single output
(void)manager->Replace(outputs_list[0], buffer_fusion_kernel);
auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id];
if (buffer_fusion_info.outputs_list.size() == 1) { // single output
(void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel);
ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0],
buffer_fusion_kernel);
} else { // multiple output
size_t real_idx = 0;
for (size_t index = 0; index < outputs_list.size(); ++index) {
if (AnfAlgo::GetOutputTensorNum(outputs_list[index]) == 1) {
auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++);
(void)manager->Replace(outputs_list[index], tuple_item);
} else {
std::vector<AnfNodePtr> make_tuple_inputs;
AbstractBasePtrList abstract_list;
make_tuple_inputs.push_back(NewValueNode(prim::kPrimMakeTuple));
for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) {
auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++);
abstract_list.push_back(tuple_item->abstract());
make_tuple_inputs.push_back(tuple_item);
for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) {
auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index);
(void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item);
ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index],
tuple_item);
}
}
}

void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph,
std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
auto nodes = TopoSort(kernel_graph->get_return());
for (auto &node : nodes) {
MS_EXCEPTION_IF_NULL(node);
if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) {
auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(node, kOpAttrFusionId);
(*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node);
}
}
}

void GetFusionScopeInputNodeList(session::KernelGraph *kernel_graph,
std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
MS_EXCEPTION_IF_NULL(kernel_graph);
MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
auto manager = kernel_graph->manager();
MS_EXCEPTION_IF_NULL(manager);

for (auto &buffer_fusion_info : *buffer_fusion_infos) {
auto fusion_id = buffer_fusion_info.first;
auto fusion_info = buffer_fusion_info.second;
for (const auto &node : fusion_info.anf_nodes) {
auto cnode = node->cast<CNodePtr>();
for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) {
auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0);
if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) ==
fusion_info.anf_nodes.end()) {
if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(),
(*buffer_fusion_infos)[fusion_id].inputs_list.end(),
cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) {
(*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx));
}
}
AnfNodePtr make_tuple = kernel_graph->NewCNode(make_tuple_inputs);
make_tuple->set_abstract(std::make_shared<abstract::AbstractTuple>(abstract_list));
(void)manager->Replace(outputs_list[index], make_tuple);
}
}
}
}

void GetInputList(const CNodePtr &node, const int32_t cur_fusion_id, std::vector<AnfNodePtr> *inputs_list) {
MS_EXCEPTION_IF_NULL(node);
MS_EXCEPTION_IF_NULL(inputs_list);
auto &inputs = node->inputs();
for (size_t input_index = 1; input_index < inputs.size(); ++input_index) {
auto input = inputs[input_index];
if (AnfAlgo::IsRealCNodeKernel(input)) {
if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) {
auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input, kOpAttrFusionId);
if (fusion_id != cur_fusion_id) {
inputs_list->push_back(input);
bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) {
MS_EXCEPTION_IF_NULL(node1);
MS_EXCEPTION_IF_NULL(node2);
auto getitem1 = node1->cast<CNodePtr>();
auto getitem2 = node2->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(getitem1);
MS_EXCEPTION_IF_NULL(getitem2);
auto output_idx1 = GetValue<int>(GetValueNode(getitem1->input(2)));
auto output_idx2 = GetValue<int>(GetValueNode(getitem2->input(2)));
return output_idx1 < output_idx2;
}

void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
MS_EXCEPTION_IF_NULL(kernel_graph);
MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
auto manager = kernel_graph->manager();
MS_EXCEPTION_IF_NULL(manager);

for (auto &buffer_fusion_info : *buffer_fusion_infos) {
auto fusion_id = buffer_fusion_info.first;
auto fusion_info = buffer_fusion_info.second;
for (const auto &node : fusion_info.anf_nodes) {
if (AnfAlgo::GetOutputTensorNum(node) == 1) {
for (auto use_node : manager->node_users()[node]) {
if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) ==
fusion_info.anf_nodes.end()) {
(*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node);
break;
}
}
} else {
inputs_list->push_back(input);
}
} else if (input->isa<CNode>()) {
for (auto &input_in : input->cast<CNodePtr>()->inputs()) {
if (AnfAlgo::IsRealCNodeKernel(input_in)) {
if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) {
auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input_in, kOpAttrFusionId);
if (fusion_id != cur_fusion_id) {
inputs_list->push_back(input);
int prev_idx = 0;
std::vector<AnfNodePtr> tuple_getitem_nodes;
std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(),
std::back_inserter(tuple_getitem_nodes),
[](const std::pair<AnfNodePtr, int> &use_node) { return use_node.first; });
std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare);
for (auto getitem : tuple_getitem_nodes) {
auto getitem_ptr = getitem->cast<CNodePtr>();
auto input2 = getitem_ptr->input(2);
auto output_idx = GetValue<int>(GetValueNode(input2));
for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) {
auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx));
(*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node);
}
prev_idx = output_idx + 1;
for (auto item_use_node : manager->node_users()[getitem]) {
if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) ==
fusion_info.anf_nodes.end()) {
(*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem);
break;
}
} else {
inputs_list->push_back(input);
}
}
}
} else {
inputs_list->push_back(input);
}
}
}

void CheckCurrentNodeIsInput(const CNodePtr &node, const int32_t &cur_fusion_id,
std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
if ((*buffer_fusion_infos).find(cur_fusion_id) == (*buffer_fusion_infos).end()) {
BufferFusionInfo_t buffer_fusion_info;
(*buffer_fusion_infos)[cur_fusion_id] = buffer_fusion_info;
}
std::vector<AnfNodePtr> inputs_list;
GetInputList(node, cur_fusion_id, &inputs_list);
if (!inputs_list.empty()) {
if (!(*buffer_fusion_infos)[cur_fusion_id].inputs_list.empty()) {
(void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list.insert(
(*buffer_fusion_infos)[cur_fusion_id].inputs_list.end(), inputs_list.begin(), inputs_list.end());
(void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.insert(
(*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.end(), node);
void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector<AnfNodePtr> &outputs_list,
const AnfNodePtr &fusion_kernel) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto manager = kernel_graph->manager();
MS_EXCEPTION_IF_NULL(manager);
for (size_t idx = 0; idx < outputs_list.size(); ++idx) {
auto output = outputs_list[idx];
if (output->isa<CNode>() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) {
auto real_output = AnfAlgo::VisitKernel(output, 0);
auto output_cnode = output->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(output_cnode);
auto input2 = output_cnode->input(2);
auto output_idx = GetValue<int>(GetValueNode(input2));
session::AnfWithOutIndex out_pair(real_output.first, output_idx);
if (kernel_graph->IsInRefOutputMap(out_pair)) {
auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair);
session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx);
kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair);
}
} else {
(*buffer_fusion_infos)[cur_fusion_id].inputs_list = inputs_list;
(*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.push_back(node);
session::AnfWithOutIndex out_pair(output, 0);
if (kernel_graph->IsInRefOutputMap(out_pair)) {
auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair);
session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx);
kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair);
}
}
}
}

void InsertNode(const AnfNodePtr &node, std::vector<AnfNodePtr> *list) {
MS_EXCEPTION_IF_NULL(list);
if (std::find(list->begin(), list->end(), node) == list->end()) {
(void)list->insert(list->end(), node);
void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph,
std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(fused_set);
MS_EXCEPTION_IF_NULL(candidate_fusion);
auto manager = kernel_graph.manager();
MS_EXCEPTION_IF_NULL(manager);
auto conv = cnode->input(1);
if (conv->isa<CNode>() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) {
std::vector<int> output_used_num{SizeToInt(manager->node_users()[conv].size())};
AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), conv);
std::unordered_set<AnfNodePtr> record{cnode, conv};
candidate_fusion->push_back(record);
fused_set->insert(record.begin(), record.end());
}
}

void CheckCurrentNodeIsOutput(const CNodePtr &node, const int32_t &cur_fusion_id,
std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
MS_EXCEPTION_IF_NULL(node);
MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
for (auto &input : node->inputs()) {
MS_EXCEPTION_IF_NULL(input);
if (AnfAlgo::IsRealCNodeKernel(input) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) {
auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input, kOpAttrFusionId);
if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) {
BufferFusionInfo_t buffer_fusion_info;
(*buffer_fusion_infos)[fusion_id] = buffer_fusion_info;
}
if (fusion_id != cur_fusion_id) {
InsertNode(input, &((*buffer_fusion_infos)[fusion_id].outputs_list));
}
} else if (input->isa<CNode>()) {
for (auto &input_in : input->cast<CNodePtr>()->inputs()) {
if (AnfAlgo::IsRealCNodeKernel(input_in) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) {
auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input_in, kOpAttrFusionId);
if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) {
BufferFusionInfo_t buffer_fusion_info;
(*buffer_fusion_infos)[fusion_id] = buffer_fusion_info;
}
if (fusion_id != cur_fusion_id) {
InsertNode(input_in, &((*buffer_fusion_infos)[fusion_id].outputs_list));
}
}
}
void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph,
std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(fused_set);
MS_EXCEPTION_IF_NULL(candidate_fusion);
auto manager = kernel_graph.manager();
MS_EXCEPTION_IF_NULL(manager);
auto getitem = relu_input->cast<CNodePtr>();
auto bnupdate = getitem->input(1);
if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) {
std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0);
for (auto out_getitem : manager->node_users()[bnupdate]) {
auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>();
auto input2 = out_getitem_ptr->input(2);
auto output_idx = GetValue<int>(GetValueNode(input2));
output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size());
}
AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate);
std::unordered_set<AnfNodePtr> record{cnode, bnupdate};
candidate_fusion->push_back(record);
fused_set->insert(record.begin(), record.end());
}
}

void GetFusionScopeNodeList(const session::KernelGraph &kernel_graph,
std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
auto nodes = TopoSort(kernel_graph.get_return());
for (auto &node : nodes) {
MS_EXCEPTION_IF_NULL(node);
if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) {
auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(node, kOpAttrFusionId);
(*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node);
void MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph,
std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(fused_set);
MS_EXCEPTION_IF_NULL(candidate_fusion);
auto manager = kernel_graph.manager();
MS_EXCEPTION_IF_NULL(manager);
auto add = relu_input->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(add);
auto tuple_getitem = add->input(1);
if (tuple_getitem->isa<CNode>() && AnfAlgo::GetCNodeName(tuple_getitem) == prim::kPrimTupleGetItem->name()) {
auto getitem = tuple_getitem->cast<CNodePtr>();
auto bnupdate = getitem->input(1);
if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) {
std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0);
for (auto out_getitem : manager->node_users()[bnupdate]) {
auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>();
auto input2 = out_getitem_ptr->input(2);
auto output_idx = GetValue<int>(GetValueNode(input2));
output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size());
}
AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate);
std::unordered_set<AnfNodePtr> record{cnode, relu_input, bnupdate};
candidate_fusion->push_back(record);
fused_set->insert(record.begin(), record.end());
}
}
}
@@ -470,15 +574,14 @@ void MatchOpNamePattern(const session::KernelGraph &kernel_graph, std::unordered
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (AnfAlgo::GetCNodeName(cnode) == kBNTrainingReduceOpName) {
auto conv = cnode->input(1);
if (conv->isa<CNode>() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) {
auto manager = kernel_graph.manager();
MS_EXCEPTION_IF_NULL(manager);
auto &users = manager->node_users();
AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(users[conv].size()), conv);
std::unordered_set<AnfNodePtr> record({cnode, conv});
candidate_fusion->push_back(record);
fused_set->insert(record.begin(), record.end());
MatchConvBnreduce(cnode, kernel_graph, fused_set, candidate_fusion);
} else if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName ||
AnfAlgo::GetCNodeName(cnode) == prim::kPrimRelu->name()) {
auto relu_input = cnode->input(1);
if (relu_input->isa<CNode>() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTensorAdd->name()) {
MatchBnupdateAddRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion);
} else if (relu_input->isa<CNode>() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTupleGetItem->name()) {
MatchBnupdateRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion);
}
}
}
@@ -536,31 +639,15 @@ void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, std::unord
}
} // namespace

void BufferFusion::GetBufferFusionInfo(const session::KernelGraph &kernel_graph,
void BufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph,
std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const {
MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return());
for (auto &node : node_list) {
if (!AnfAlgo::IsRealCNodeKernel(node)) {
continue;
}

int32_t cur_fusion_id = -1;
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) {
cur_fusion_id = AnfAlgo::GetNodeAttr<int32_t>(cnode, kOpAttrFusionId);
CheckCurrentNodeIsInput(cnode, cur_fusion_id, buffer_fusion_infos);
}
// Check if current node is output
CheckCurrentNodeIsOutput(cnode, cur_fusion_id, buffer_fusion_infos);
}

GetFusionScopeNodeList(kernel_graph, buffer_fusion_infos);
GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos);
GetFusionScopeInputNodeList(kernel_graph, buffer_fusion_infos);
GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos);
for (auto &buffer_fusion_info : *buffer_fusion_infos) {
buffer_fusion_info.second.kernel_build_info =
CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list_in, buffer_fusion_info.second.inputs_list,
buffer_fusion_info.second.outputs_list);
CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list);
}
}

@@ -569,7 +656,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c
bool change = false;
std::unordered_map<int32_t, BufferFusionInfo_t> buffer_fusion_infos;
buffer_fusion_infos.clear();
GetBufferFusionInfo(*kernel_graph, &buffer_fusion_infos);
GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos);

std::vector<mindspore::kernel::FusionScopeInfo> fusion_scope_infos;
for (auto &buffer_fusion_info : buffer_fusion_infos) {
@@ -600,7 +687,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c
MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed";
continue;
}
change = ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_mods[fusion_id], kernel_graph);
change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph);
}
MS_LOG(DEBUG) << "End Buffer Fusion";
return change;
@@ -630,8 +717,10 @@ bool BufferFusion::MatchBufferFusionPattern(const session::KernelGraph &kernel_g
return true;
}

bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr,
bool BufferFusion::ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos,
int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr,
session::KernelGraph *kernel_graph) const {
auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id];
auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list,
buffer_fusion_info.anf_nodes, kernel_graph);
AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get());
@@ -650,8 +739,8 @@ bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info,
}
AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get());
AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get());
// replace node
ReplaceOldNode(buffer_fusion_info.outputs_list, buffer_fusion, kernel_graph);
SetFusionOpRefInfos(kernel_graph, buffer_fusion_info.outputs_list, buffer_fusion);
ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph);
return true;
}



+ 3
- 4
mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h View File

@@ -30,7 +30,6 @@ namespace opt {
struct BufferFusionInfo_t {
std::vector<AnfNodePtr> anf_nodes;
std::vector<AnfNodePtr> inputs_list;
std::vector<AnfNodePtr> inputs_list_in;
std::vector<AnfNodePtr> outputs_list;
kernel::KernelBuildInfoPtr kernel_build_info;
};
@@ -44,10 +43,10 @@ class BufferFusion : public Pass {
bool Run(const FuncGraphPtr &graph) override;
private:
void GetBufferFusionInfo(const session::KernelGraph &kernel_graph,
void GetBufferFusionInfo(session::KernelGraph *kernel_graph,
std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const;
bool ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr,
session::KernelGraph *kernel_graph) const;
bool ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id,
const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const;
bool MatchBufferFusionPattern(const session::KernelGraph &kernel_graph) const;
bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const;
};


+ 49
- 14
mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc View File

@@ -16,6 +16,9 @@
#include "pre_activate/ascend/ir_fission/topk_split.h"
#include <vector>
#include <memory>
#include <unordered_set>
#include "pre_activate/common/helper.h"
#include "kernel/kernel_build_info.h"
#include "utils/utils.h"
#include "session/kernel_graph.h"
#include "session/anf_runtime_algorithm.h"
@@ -25,6 +28,7 @@
namespace mindspore {
namespace opt {
constexpr size_t kFloat16Len = 2; // size of float16;
constexpr size_t kTopkIndexK = 1;
namespace {
tensor::TensorPtr CreateTensor(const AnfNodePtr &node) {
// 1 create tensor
@@ -70,37 +74,68 @@ ValueNodePtr CreateValueNode(const AnfNodePtr &node) {
AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), indices_const.get());
return indices_const;
}

kernel::KernelBuildInfoPtr CreateKernelBuildInfo() {
kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT});
builder.SetOutputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT});
builder.SetInputsDeviceType({kNumberTypeFloat16, kNumberTypeFloat16});
builder.SetOutputsDeviceType({kNumberTypeFloat16, kNumberTypeInt32});
return builder.Build();
}
} // namespace

const BaseRef TopKSplit::DefinePattern() const {
VarPtr X = std::make_shared<Var>();
MS_EXCEPTION_IF_NULL(X);
VarPtr X1 = std::make_shared<Var>();
VarPtr X2 = std::make_shared<Var>();
auto prim = std::make_shared<Primitive>(kTopKOpName);
MS_EXCEPTION_IF_NULL(prim);
return VectorRef({prim, X});
return VectorRef({prim, X1, X2});
}

const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const {
MS_EXCEPTION_IF_NULL(func_graph);
MS_EXCEPTION_IF_NULL(node);
auto kernel_graph = func_graph->cast<KernelGraphPtr>();
auto indices_const = CreateValueNode(node);
// set value node as topk's input
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
MS_LOG(INFO) << "already has input size: " << cnode->inputs().size();
cnode->add_input(indices_const);
// Copy a new node to check supported.
std::vector<AnfNodePtr> new_inputs{NewValueNode(std::make_shared<Primitive>(kTopKOpName))};
new_inputs.insert(new_inputs.end(), cnode->inputs().begin() + 1, cnode->inputs().end());
CNodePtr new_cnode = func_graph->NewCNode(new_inputs);
MS_EXCEPTION_IF_NULL(new_cnode);
new_cnode->set_abstract(cnode->abstract());
new_cnode->set_scope(cnode->scope());
AnfAlgo::CopyNodeAttrs(cnode, new_cnode);
CheckCNodeInputSize(new_cnode, kTopkInputNum);
// Convert the tensor input to scalar and convert it to attr
auto input_k = new_cnode->input(kTopkIndexK + 1);
MS_EXCEPTION_IF_NULL(input_k);
if (!IsValueNode<tensor::Tensor>(input_k)) {
return nullptr;
}
ValuePtr value = GetValueNode(input_k);
MS_EXCEPTION_IF_NULL(value);
auto tensor = value->cast<tensor::TensorPtr>();
MS_EXCEPTION_IF_NULL(tensor);
int32_t *data = reinterpret_cast<int32_t *>(tensor->data_c());
MS_EXCEPTION_IF_NULL(data);
auto new_value_node = std::make_shared<ValueNode>(MakeValue(*data));
new_cnode->set_input(kTopkIndexK + 1, new_value_node);

std::unordered_set<size_t> attr_index{kTopkIndexK};
ConstInputToAttr(new_cnode, attr_index);
auto indices_const = CreateValueNode(new_cnode);
new_cnode->add_input(indices_const);
MS_EXCEPTION_IF_NULL(supported_checker_);
if (!supported_checker_->CheckSupported(new_cnode, CreateKernelBuildInfo())) {
return nullptr;
}

if (kernel_graph != nullptr) {
kernel_graph->AddValueNodeToGraph(indices_const);
}

CNodePtr new_cnode = nullptr;
if (kernel_graph == nullptr) {
new_cnode = std::make_shared<CNode>(*cnode);
} else {
new_cnode = kernel_graph->NewCNode(cnode);
}
MS_EXCEPTION_IF_NULL(new_cnode);
return new_cnode;
}
} // namespace opt


+ 8
- 1
mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h View File

@@ -16,15 +16,22 @@
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_
#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_

#include <memory>
#include "pre_activate/common/optimizer.h"
#include "pre_activate/ascend/ascend_helper.h"

namespace mindspore {
namespace opt {
class TopKSplit : public PatternProcessPass {
public:
explicit TopKSplit(bool multigraph = true) : PatternProcessPass("topk_split", multigraph) {}
explicit TopKSplit(bool multigraph = true)
: PatternProcessPass("topk_split", multigraph), supported_checker_(std::make_shared<SupportedChecker>()) {}
~TopKSplit() override = default;
const BaseRef DefinePattern() const override;
const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;

private:
SupportedCheckerPtr supported_checker_;
};
} // namespace opt
} // namespace mindspore


+ 35
- 3
mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc View File

@@ -72,6 +72,38 @@ AnfNodePtr GetMul0(const FuncGraphPtr &graph, const AnfNodePtr &input2, const An
}
return mul0;
}

bool QuitFusion(const FuncGraphPtr &graph, const AnfNodePtr &mul0_anf, const AnfNodePtr &reduce_sum) {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(mul0_anf);
MS_EXCEPTION_IF_NULL(reduce_sum);
if (!mul0_anf->isa<CNode>()) {
return true;
}
auto mul0 = mul0_anf->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(mul0);

// when network is _VirtualDatasetCell, quit fusion
if (mul0->fullname_with_scope().find("network-_VirtualDatasetCell") != std::string::npos) {
return true;
}

auto manager = graph->manager();
MS_EXCEPTION_IF_NULL(manager);
if (manager->node_users().find(reduce_sum) == manager->node_users().end()) {
MS_LOG(EXCEPTION) << "node has no output in manager";
}
const AnfNodeIndexSet &outputs_set = manager->node_users()[reduce_sum];
auto it = std::find_if(outputs_set.begin(), outputs_set.end(), [&mul0](const std::pair<AnfNodePtr, int> &node_index) {
return node_index.first == mul0->input(1) || node_index.first == mul0;
});
if (it != outputs_set.end()) {
MS_LOG(INFO) << "ReduceSum's output node is mul0's input or mul0! If do fusion, graph will exist a circle";
return true;
}

return false;
}
} // namespace

const BaseRef ConfusionMulGradFusion::DefinePattern() const {
@@ -90,9 +122,6 @@ const AnfNodePtr ConfusionMulGradFusion::Process(const FuncGraphPtr &graph, cons
auto reduce_sum = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(reduce_sum);
auto mul1 = reduce_sum->input(1);
if (mul1->fullname_with_scope().find("bert/encoder") == std::string::npos) {
return nullptr;
}
if (IsUsedByOthers(graph, mul1)) {
MS_LOG(INFO) << "Mul1 is used by others, quit fusion!";
return nullptr;
@@ -102,6 +131,9 @@ const AnfNodePtr ConfusionMulGradFusion::Process(const FuncGraphPtr &graph, cons
MS_LOG(INFO) << "Mul0 do not exist, quit fusion";
return nullptr;
}
if (QuitFusion(graph, mul0, node)) {
return nullptr;
}

auto fusion_node = CreateFusionNode(graph, reduce_sum, mul0, input3);
std::vector<AnfNodePtr> fusion_node_outputs;


+ 71
- 0
mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc View File

@@ -0,0 +1,71 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h"
#include "session/anf_runtime_algorithm.h"
#include "utils/utils.h"
#include "operator/ops.h"
#include "device/kernel_info.h"
#include "pre_activate/common/helper.h"
#include "pre_activate/common/optimizer.h"
#include "pre_activate/ascend/ascend_helper.h"

namespace mindspore {
namespace opt {
void DoRefresh(const CNodePtr &cnode) {
if (cnode == nullptr) {
MS_LOG(EXCEPTION) << "node is nullptr";
}
for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); input_index++) {
auto input_kernel_node = AnfAlgo::GetInputNode(cnode, input_index);
if (input_kernel_node->isa<Parameter>()) {
std::shared_ptr<kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
auto cnode_input_format = AnfAlgo::GetInputFormat(cnode, input_index);
auto kernel_node_format = AnfAlgo::GetOutputFormat(input_kernel_node, 0);
auto dtype = AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0);
if (kernel_node_format != cnode_input_format) {
builder->SetOutputsFormat({cnode_input_format});
builder->SetOutputsDeviceType({dtype});
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get());
}
}
}
}

bool RefreshParameterFormat::Run(const FuncGraphPtr &func_graph) {
if (func_graph == nullptr) {
MS_LOG(ERROR) << "func_graph is nullptr.";
return false;
}
std::vector<AnfNodePtr> node_list = TopoSort(func_graph->get_return());
for (auto node : node_list) {
if (node == nullptr || !node->isa<CNode>()) {
continue;
}
auto cnode = node->cast<CNodePtr>();
if (cnode == nullptr) {
continue;
}
auto node_name = AnfAlgo::GetCNodeName(cnode);
if (node_name == kBNTrainingUpdateOpName) {
DoRefresh(cnode);
}
}
return true;
}
} // namespace opt
} // namespace mindspore

+ 40
- 0
mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h View File

@@ -0,0 +1,40 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_
#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_

#include <vector>
#include <memory>
#include <utility>
#include "ir/anf.h"
#include "pre_activate/common/pass.h"

namespace mindspore {
namespace opt {
class RefreshParameterFormat : public Pass {
public:
explicit RefreshParameterFormat(size_t groups = 1) : Pass("refresh_parameter_format"), groups_(groups) {}
~RefreshParameterFormat() override = default;
bool Run(const FuncGraphPtr &graph) override;

private:
size_t groups_ = 1;
};
} // namespace opt
} // namespace mindspore

#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_

+ 46
- 0
mindspore/ccsrc/pre_activate/common/helper.cc View File

@@ -299,6 +299,10 @@ tensor::TensorPtr CreateTensorWithValueTuple(const ValueTuplePtr &value_tuple_pt
tensor::TensorPtr CreateTupleTensor(const ValueTuplePtr &value_tuple) {
MS_EXCEPTION_IF_NULL(value_tuple);
tensor::TensorPtr tensor = nullptr;
if (value_tuple->value().empty()) {
MS_LOG(WARNING) << "The value tuple is empty.";
return nullptr;
}
ValuePtr v = *(value_tuple->value().begin());
MS_EXCEPTION_IF_NULL(v);
// Currently we only deal with the scalar tuple
@@ -422,5 +426,47 @@ AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePt
AnfAlgo::SetOutputInferTypeAndShape({origin_type}, {origin_shape}, tuple_getitem.get());
return tuple_getitem;
}

void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs) {
MS_EXCEPTION_IF_NULL(cnode);
std::vector<AnfNodePtr> new_inputs;
std::vector<std::string> new_input_names;
auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
MS_EXCEPTION_IF_NULL(primitive);
auto input_names = primitive->GetAttr(kAttrInputNames);
if (input_names == nullptr) {
MS_LOG(DEBUG) << "input_names are nullptr in cnode[" + cnode->DebugString() + "]";
return;
}
auto input_names_vec = GetValue<std::vector<std::string>>(input_names);
auto inputs = cnode->inputs();
new_inputs.push_back(inputs[0]);
bool need_update = false;
for (size_t i = 0; i < inputs.size() - 1; ++i) {
auto input_node = inputs[i + 1];
MS_EXCEPTION_IF_NULL(input_node);
if (input_attrs.find(i) != input_attrs.end() && input_node->isa<ValueNode>()) {
auto value_node = input_node->cast<ValueNodePtr>();
MS_EXCEPTION_IF_NULL(value_node);
MS_LOG(DEBUG) << "start erase input[" << i << "] of cnode[" + cnode->DebugString() + "]";
if (i >= input_names_vec.size()) {
MS_LOG(EXCEPTION) << "index " << i << " is larger than input names size [" << input_names_vec.size() << "]";
}
primitive->set_attr(input_names_vec[i], value_node->value());
need_update = true;
} else {
new_inputs.push_back(input_node);
if (i < input_names_vec.size()) {
new_input_names.push_back(input_names_vec[i]);
}
}
}
if (need_update) {
// Update cnode's inputs
cnode->set_inputs(new_inputs);
// Update cnode's input_names attr
primitive->set_attr(kAttrInputNames, MakeValue(new_input_names));
}
}
} // namespace opt
} // namespace mindspore

+ 4
- 0
mindspore/ccsrc/pre_activate/common/helper.h View File

@@ -19,6 +19,7 @@
#include <vector>
#include <memory>
#include <string>
#include <unordered_set>
#include "ir/func_graph.h"
#include "session/kernel_graph.h"
#include "common/utils.h"
@@ -86,6 +87,7 @@ constexpr size_t kAdamApplyOneOutputNum = 3;
constexpr size_t kBackendTransDataInputNum = 2;
constexpr size_t kApplyMomentumInputNum = 6;
constexpr size_t kBiasAddInputNum = 3;
constexpr size_t kTopkInputNum = 3;

enum FusedBatchNormInput {
kX = 1,
@@ -150,6 +152,8 @@ void RemoveNopNode(session::KernelGraph *const graph);
AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t output_idx);

bool IsUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node);

void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs);
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_HELPER_H_

+ 0
- 1
mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc View File

@@ -52,7 +52,6 @@ ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() {
Register(kFlattenGradOpName, {1});
Register(kExpandDimsOpName, {1});
Register(kSplitOpName, {0});
Register(kTopKOpName, {1});
Register(kErfOpName, {1});
Register(kSparseApplyAdagradOpName, {2});
Register(kResizeNearestNeighborGrad, {1});


+ 1
- 45
mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc View File

@@ -18,10 +18,10 @@
#include <vector>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <memory>

#include "pre_activate/pass/const_input_to_attr_registry.h"
#include "pre_activate/common/helper.h"
#include "utils/utils.h"
#include "utils/context/ms_context.h"
#include "operator/ops.h"
@@ -29,50 +29,6 @@

namespace mindspore {
namespace opt {
namespace {
void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs) {
MS_EXCEPTION_IF_NULL(cnode);
std::vector<AnfNodePtr> new_inputs;
std::vector<std::string> new_input_names;
auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
MS_EXCEPTION_IF_NULL(primitive);
auto input_names = primitive->GetAttr(kAttrInputNames);
if (input_names == nullptr) {
MS_LOG(DEBUG) << "input_names are nullptr in cnode[" + cnode->DebugString() + "]";
return;
}
auto input_names_vec = GetValue<std::vector<std::string>>(input_names);
auto inputs = cnode->inputs();
new_inputs.push_back(inputs[0]);
bool need_update = false;
for (size_t i = 0; i < inputs.size() - 1; ++i) {
auto input_node = inputs[i + 1];
MS_EXCEPTION_IF_NULL(input_node);
if (input_attrs.find(i) != input_attrs.end() && input_node->isa<ValueNode>()) {
auto value_node = input_node->cast<ValueNodePtr>();
MS_EXCEPTION_IF_NULL(value_node);
MS_LOG(DEBUG) << "start erase input[" << i << "] of cnode[" + cnode->DebugString() + "]";
if (i >= input_names_vec.size()) {
MS_LOG(EXCEPTION) << "index " << i << " is larger than input names size [" << input_names_vec.size() << "]";
}
primitive->set_attr(input_names_vec[i], value_node->value());
need_update = true;
} else {
new_inputs.push_back(input_node);
if (i < input_names_vec.size()) {
new_input_names.push_back(input_names_vec[i]);
}
}
}
if (need_update) {
// Update cnode's inputs
cnode->set_inputs(new_inputs);
// Update cnode's input_names attr
primitive->set_attr(kAttrInputNames, MakeValue(new_input_names));
}
}
} // namespace

const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const AnfNodePtr &node,
const EquivPtr &) const {
if (node == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) {


+ 3
- 1
mindspore/ccsrc/session/anf_runtime_algorithm.cc View File

@@ -825,6 +825,8 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n
static std::map<std::string, std::map<size_t, size_t>> spec_node_list = {
{prim::kPrimConv2DBackpropInput->name(), {{0, 1}, {1, 0}}},
{prim::kPrimConv2DBackpropFilter->name(), {{0, 1}, {1, 0}}},
{kFusionOpConv2DBackpropInputReluGradV2Name, {{0, 1}, {1, 0}}},
{kFusionOpConv2DBackpropInputAddNReluGradV2Name, {{0, 1}, {1, 0}}},
{prim::kPrimLogSoftmaxGrad->name(), {{0, 1}, {1, 0}}},
{prim::kPrimLayerNormGrad->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}, {4, 4}}},
{prim::kPrimLayerNormBetaGammaBackprop->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}}},
@@ -835,7 +837,7 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n
auto node_name = AnfAlgo::GetCNodeName(anf_node);
if (AnfAlgo::GetKernelType(anf_node) == TBE_KERNEL) {
auto find = spec_node_list.find(node_name);
if (find != spec_node_list.end()) {
if (find != spec_node_list.end() && cur_index < find->second.size()) {
ret = find->second[cur_index];
MS_LOG(INFO) << "Real input index change to" << ret << ", node name:" << node_name;
}


+ 8
- 11
mindspore/ccsrc/transform/util.cc View File

@@ -171,20 +171,17 @@ GeTensorPtr TransformUtil::ConvertTensor(const MeTensorPtr &tensor, const std::s
MS_LOG(ERROR) << "The Me Tensor data type size is wrong, type size is: " << type_size;
return nullptr;
}
// get tensor buff size
size_t data_buff_size = 0;
size_t elements_num = IntToSize(tensor->ElementsNum());
if (elements_num > 0 && type_size > 0 && UINT_MAX / type_size >= elements_num) {
data_buff_size = elements_num * type_size;
if (UINT_MAX / type_size < elements_num) {
MS_LOG(ERROR) << "The required Me Tensor data buff size " << elements_num << " x " << type_size
<< " overflowed UINT_MAX: " << UINT_MAX << ".";
return nullptr;
}

// get tensor buff size
size_t data_buff_size = elements_num * type_size;
if (data_buff_size == 0) {
if (elements_num > 0 && type_size > 0 && UINT_MAX / type_size < elements_num) {
MS_LOG(ERROR) << "The required Me Tensor data buff size " << elements_num << " x " << type_size
<< " overflowed UINT_MAX: " << UINT_MAX << ".";
} else {
MS_LOG(ERROR) << "The Me Tensor data buff size is 0.";
}
return nullptr;
MS_LOG(INFO) << "The Me Tensor data buff size is 0.";
}
// create ge tensor
auto desc = GetGeTensorDesc(tensor->shape_c(), tensor->data_type(), format);


+ 20
- 1
mindspore/ccsrc/utils/context/ms_context.cc View File

@@ -359,7 +359,12 @@ void MsContext::GetGeOptions(std::map<std::string, std::string> *ge_options) con
}

// Enable auto mixed precision according to the context options
(*ge_options)["ge.exec.auto_mix_precision"] = std::to_string(auto_mixed_precision_flag_);
if (auto_mixed_precision_flag_) {
(*ge_options)["ge.exec.precision_mode"] = "allow_mix_precision";
} else {
(*ge_options)["ge.exec.precision_mode"] = "must_keep_origin_dtype";
}

// Disable the global variable acc, only enable it whlie adding training graph in pipeline
(*ge_options)["ge.exec.variable_acc"] = "0";
#endif
@@ -438,4 +443,18 @@ bool MsContext::PynativeInitGe() {
is_pynative_ge_init_ = true;
return true;
}

bool MsContext::IsTsdOpened() {
if (tsd_ref_ > 0) {
return true;
}
return false;
}

bool MsContext::IsGeInited() {
if (ge_ref_ > 0) {
return true;
}
return false;
}
} // namespace mindspore

+ 2
- 0
mindspore/ccsrc/utils/context/ms_context.h View File

@@ -82,8 +82,10 @@ class MsContext {

bool OpenTsd();
bool CloseTsd(bool force = false);
bool IsTsdOpened();
bool InitGe();
bool FinalizeGe(bool force = false);
bool IsGeInited();
void set_enable_hccl(bool enable_hccl) { enable_hccl_ = enable_hccl; }
bool enable_hccl() const { return enable_hccl_; }
bool PynativeInitGe();


+ 4
- 0
mindspore/ccsrc/utils/utils.h View File

@@ -122,6 +122,10 @@ constexpr auto kSendOpName = "Send";
constexpr auto kRecvOpName = "Recv";
constexpr auto kReluV2OpName = "ReLUV2";
constexpr auto kReluGradV2OpName = "ReluGradV2";
constexpr auto kAddNOpName = "AddN";
constexpr auto kConv2DBackpropInputOpName = "Conv2DBackpropInput";
constexpr auto kFusionOpConv2DBackpropInputReluGradV2Name = "FusionOp_Conv2DBackpropInput_ReluGradV2";
constexpr auto kFusionOpConv2DBackpropInputAddNReluGradV2Name = "FusionOp_Conv2DBackpropInput_AddN_ReluGradV2";

// attr key name
constexpr auto kAttrInputNames = "input_names";


+ 2
- 2
mindspore/common/api.py View File

@@ -22,7 +22,7 @@ from mindspore import context
from mindspore import log as logger
from mindspore.parallel._utils import _get_parallel_mode
from .._c_expression import generate_key, Executor_, Tensor, MetaTensor
from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_ge
from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend
from .tensor import Tensor as MsTensor

# store ms_function class compiled pipeline cache
@@ -184,7 +184,7 @@ class _MindSporeFunction:

@_wrap_func
def __call__(self, *args):
init_ge()
init_backend()
converted, arguments_dict, parse_method = _convert_function_arguments(self.fn, *args)
if not converted:
raise RuntimeError('Process function parameter is failure')


+ 2
- 1
mindspore/model_zoo/alexnet.py View File

@@ -15,6 +15,7 @@
"""Alexnet."""
import mindspore.nn as nn
from mindspore.common.initializer import TruncatedNormal
from mindspore.ops import operations as P

def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid"):
weight = weight_variable()
@@ -44,7 +45,7 @@ class AlexNet(nn.Cell):
self.conv4 = conv(384, 384, 3, pad_mode="same")
self.conv5 = conv(384, 256, 3, pad_mode="same")
self.relu = nn.ReLU()
self.max_pool2d = nn.MaxPool2d(kernel_size=3, stride=2)
self.max_pool2d = P.MaxPool(ksize=3, strides=2)
self.flatten = nn.Flatten()
self.fc1 = fc_with_initialize(6*6*256, 4096)
self.fc2 = fc_with_initialize(4096, 4096)


+ 284
- 0
mindspore/model_zoo/mobilenet.py View File

@@ -0,0 +1,284 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""MobileNetV2 model define"""
import numpy as np
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore.ops.operations import TensorAdd
from mindspore import Parameter, Tensor
from mindspore.common.initializer import initializer

__all__ = ['MobileNetV2', 'mobilenet_v2']


def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v


class GlobalAvgPooling(nn.Cell):
"""
Global avg pooling definition.

Args:

Returns:
Tensor, output tensor.

Examples:
>>> GlobalAvgPooling()
"""
def __init__(self):
super(GlobalAvgPooling, self).__init__()
self.mean = P.ReduceMean(keep_dims=False)

def construct(self, x):
x = self.mean(x, (2, 3))
return x


class DepthwiseConv(nn.Cell):
"""
Depthwise Convolution warpper definition.

Args:
in_planes (int): Input channel.
kernel_size (int): Input kernel size.
stride (int): Stride size.
pad_mode (str): pad mode in (pad, same, valid)
channel_multiplier (int): Output channel multiplier
has_bias (bool): has bias or not

Returns:
Tensor, output tensor.

Examples:
>>> DepthwiseConv(16, 3, 1, 'pad', 1, channel_multiplier=1)
"""
def __init__(self, in_planes, kernel_size, stride, pad_mode, pad, channel_multiplier=1, has_bias=False):
super(DepthwiseConv, self).__init__()
self.has_bias = has_bias
self.in_channels = in_planes
self.channel_multiplier = channel_multiplier
self.out_channels = in_planes * channel_multiplier
self.kernel_size = (kernel_size, kernel_size)
self.depthwise_conv = P.DepthwiseConv2dNative(channel_multiplier=channel_multiplier, kernel_size=kernel_size,
stride=stride, pad_mode=pad_mode, pad=pad)
self.bias_add = P.BiasAdd()
weight_shape = [channel_multiplier, in_planes, *self.kernel_size]
self.weight = Parameter(initializer('ones', weight_shape), name='weight')

if has_bias:
bias_shape = [channel_multiplier * in_planes]
self.bias = Parameter(initializer('zeros', bias_shape), name='bias')
else:
self.bias = None

def construct(self, x):
output = self.depthwise_conv(x, self.weight)
if self.has_bias:
output = self.bias_add(output, self.bias)
return output


class ConvBNReLU(nn.Cell):
"""
Convolution/Depthwise fused with Batchnorm and ReLU block definition.

Args:
in_planes (int): Input channel.
out_planes (int): Output channel.
kernel_size (int): Input kernel size.
stride (int): Stride size for the first convolutional layer. Default: 1.
groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.

Returns:
Tensor, output tensor.

Examples:
>>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
"""
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
super(ConvBNReLU, self).__init__()
padding = (kernel_size - 1) // 2
if groups == 1:
conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='pad',
padding=padding)
else:
conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='pad', pad=padding)
layers = [conv, nn.BatchNorm2d(out_planes), nn.ReLU6()]
self.features = nn.SequentialCell(layers)

def construct(self, x):
output = self.features(x)
return output


class InvertedResidual(nn.Cell):
"""
Mobilenetv2 residual block definition.

Args:
inp (int): Input channel.
oup (int): Output channel.
stride (int): Stride size for the first convolutional layer. Default: 1.
expand_ratio (int): expand ration of input channel

Returns:
Tensor, output tensor.

Examples:
>>> ResidualBlock(3, 256, 1, 1)
"""
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
assert stride in [1, 2]

hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = stride == 1 and inp == oup

layers = []
if expand_ratio != 1:
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, has_bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.SequentialCell(layers)
self.add = TensorAdd()
self.cast = P.Cast()

def construct(self, x):
identity = x
x = self.conv(x)
if self.use_res_connect:
return self.add(identity, x)
return x


class MobileNetV2(nn.Cell):
"""
MobileNetV2 architecture.

Args:
class_num (Cell): number of classes.
width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
has_dropout (bool): Is dropout used. Default is false
inverted_residual_setting (list): Inverted residual settings. Default is None
round_nearest (list): Channel round to . Default is 8
Returns:
Tensor, output tensor.

Examples:
>>> MobileNetV2(num_classes=1000)
"""
def __init__(self, num_classes=1000, width_mult=1.,
has_dropout=False, inverted_residual_setting=None, round_nearest=8):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
last_channel = 1280
# setting of inverted residual blocks
self.cfgs = inverted_residual_setting
if inverted_residual_setting is None:
self.cfgs = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]

# building first layer
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
self.out_channels = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for t, c, n, s in self.cfgs:
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(input_channel, self.out_channels, kernel_size=1))
# make it nn.CellList
self.features = nn.SequentialCell(features)
# mobilenet head
head = ([GlobalAvgPooling(), nn.Dense(self.out_channels, num_classes, has_bias=True)] if not has_dropout else
[GlobalAvgPooling(), nn.Dropout(0.2), nn.Dense(self.out_channels, num_classes, has_bias=True)])
self.head = nn.SequentialCell(head)

self._initialize_weights()

def construct(self, x):
x = self.features(x)
x = self.head(x)
return x

def _initialize_weights(self):
"""
Initialize weights.

Args:

Returns:
None.

Examples:
>>> _initialize_weights()
"""
for _, m in self.cells_and_names():
if isinstance(m, (nn.Conv2d, DepthwiseConv)):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n),
m.weight.data.shape()).astype("float32")))
if m.bias is not None:
m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32")))
elif isinstance(m, nn.BatchNorm2d):
m.gamma.set_parameter_data(Tensor(np.ones(m.gamma.data.shape(), dtype="float32")))
m.beta.set_parameter_data(Tensor(np.zeros(m.beta.data.shape(), dtype="float32")))
elif isinstance(m, nn.Dense):
m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape()).astype("float32")))
if m.bias is not None:
m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32")))


def mobilenet_v2(**kwargs):
"""
Constructs a MobileNet V2 model
"""
return MobileNetV2(**kwargs)

+ 20
- 0
mindspore/model_zoo/resnet.py View File

@@ -260,3 +260,23 @@ def resnet50(class_num=10):
[256, 512, 1024, 2048],
[1, 2, 2, 2],
class_num)

def resnet101(class_num=1001):
"""
Get ResNet101 neural network.

Args:
class_num (int): Class number.

Returns:
Cell, cell instance of ResNet101 neural network.

Examples:
>>> net = resnet101(1001)
"""
return ResNet(ResidualBlock,
[3, 4, 23, 3],
[64, 256, 512, 1024],
[256, 512, 1024, 2048],
[1, 2, 2, 2],
class_num)

+ 2
- 2
mindspore/nn/cell.py View File

@@ -22,7 +22,7 @@ from ..common import dtype as mstype
from ..common.api import _executor
from .._checkparam import _check_str_by_regular
from ..common.parameter import Parameter, ParameterTuple
from .._c_expression import init_ge
from .._c_expression import init_backend
from ..ops.primitive import Primitive
from ..parallel._tensor import _load_tensor_by_layout
from ..parallel._utils import _get_parallel_mode
@@ -66,7 +66,7 @@ class Cell:
self._phase = 'train'
self._parameter_layout_dict = {}
self._create_time = int(time.time() * 1e9)
init_ge()
init_backend()
# call gc to release GE session resources used by non-used cell objects
gc.collect()
self._construct_inputs_num = 0


+ 7
- 6
mindspore/nn/dynamic_lr.py View File

@@ -32,6 +32,7 @@ def piecewise_constant_lr(milestone, learning_rates):

Args:
milestone (Union[list[int], tuple[int]]): A list of milestone. This list is a monotone increasing list.
Every element is a milestone step, and must be greater than 0.
learning_rates (Union[list[float], tuple[float]]): A list of learning rates.

Returns:
@@ -40,7 +41,7 @@ def piecewise_constant_lr(milestone, learning_rates):
Examples:
>>> milestone = [2, 5, 10]
>>> learning_rates = [0.1, 0.05, 0.01]
>>> lr = piecewise_constant_lr(milestone, learning_rates)
>>> piecewise_constant_lr(milestone, learning_rates)
[0.1, 0.1, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01]
"""
validator.check_value_type('milestone', milestone, (tuple, list), None)
@@ -100,7 +101,7 @@ def exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch,
>>> total_step = 6
>>> step_per_epoch = 2
>>> decay_epoch = 1
>>> lr = exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch)
>>> exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch)
[0.1, 0.1, 0.09000000000000001, 0.09000000000000001, 0.08100000000000002, 0.08100000000000002]
"""
_check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair)
@@ -142,7 +143,7 @@ def natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch,
>>> total_step = 6
>>> step_per_epoch = 2
>>> decay_epoch = 2
>>> lr = natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True)
>>> natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True)
[0.1, 0.1, 0.1, 0.1, 0.016529888822158657, 0.016529888822158657]
"""
_check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair)
@@ -185,7 +186,7 @@ def inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, deca
>>> total_step = 6
>>> step_per_epoch = 1
>>> decay_epoch = 1
>>> lr = inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True)
>>> inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True)
[0.1, 0.06666666666666667, 0.05, 0.04, 0.03333333333333333, 0.028571428571428574]
"""
_check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair)
@@ -227,7 +228,7 @@ def cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch):
>>> total_step = 6
>>> step_per_epoch = 2
>>> decay_epoch = 2
>>> lr = cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch)
>>> cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch)
[0.1, 0.1, 0.05500000000000001, 0.05500000000000001, 0.01, 0.01]
"""
validator.check_float_positive('min_lr', min_lr, None)
@@ -282,7 +283,7 @@ def polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_e
>>> step_per_epoch = 2
>>> decay_epoch = 2
>>> power = 0.5
>>> lr = polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power)
>>> polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power)
[0.1, 0.1, 0.07363961030678928, 0.07363961030678928, 0.01, 0.01]
"""
validator.check_float_positive('learning_rate', learning_rate, None)


+ 3
- 2
mindspore/nn/optim/ftrl.py View File

@@ -104,7 +104,7 @@ class FTRL(Optimizer):
self.lr_power = lr_power
self.reciprocal_scale = 1.0 / loss_scale
self.weight_decay = weight_decay
self.decay_tf = tuple((lambda:True)() for x in self.parameters)
self.decay_tf = tuple((lambda: True)() for x in self.parameters)
self.hyper_map = C.HyperMap()
self.opt = P.ApplyFtrl(use_locking=use_locking)
self.one = Tensor(1, mstype.int32)
@@ -118,5 +118,6 @@ class FTRL(Optimizer):
if self.reciprocal_scale != 1.0:
grads = self.hyper_map(F.partial(grad_scale, self.reciprocal_scale), grads)
lr = self.learning_rate
success = self.hyper_map(F.partial(ftrl_opt, self.opt, lr, self.l1, self.l2, self.lr_power), linear, grads, params, moments)
success = self.hyper_map(F.partial(ftrl_opt, self.opt, lr, self.l1, self.l2, self.lr_power), linear, grads,
params, moments)
return success

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save