Browse Source

syn code for 0715

tags/v0.6.0-beta
changzherui 5 years ago
parent
commit
f4cb445ea8
100 changed files with 8352 additions and 42 deletions
  1. +4
    -0
      CMakeLists.txt
  2. +16
    -0
      RELEASE.md
  3. +20
    -3
      build.sh
  4. +2
    -2
      cmake/external_libs/icu4c.cmake
  5. +2
    -2
      cmake/mind_expression.cmake
  6. +5
    -0
      cmake/options.cmake
  7. +14
    -1
      cmake/package.cmake
  8. +15
    -0
      config/data_dump.json
  9. +383
    -0
      config/op_info.config
  10. +1
    -1
      graphengine
  11. +1
    -1
      include/ms_tensor.h
  12. +3
    -2
      mindspore/_extends/parse/parser.py
  13. +15
    -3
      mindspore/_extends/parse/standard_method.py
  14. +53
    -27
      mindspore/ccsrc/CMakeLists.txt
  15. +66
    -0
      mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
  16. +312
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc
  17. +27
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.h
  18. +73
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.cc
  19. +30
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h
  20. +156
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
  21. +75
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h
  22. +56
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc
  23. +64
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
  24. +0
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto
  25. +0
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto
  26. +0
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto
  27. +0
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto
  28. +0
    -0
      mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto
  29. +180
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
  30. +58
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.h
  31. +623
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
  32. +76
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
  33. +50
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc
  34. +31
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.h
  35. +422
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
  36. +56
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h
  37. +132
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
  38. +54
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h
  39. +43
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
  40. +28
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h
  41. +116
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
  42. +82
    -0
      mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
  43. +52
    -0
      mindspore/ccsrc/backend/kernel_compiler/ascend_kernel_mod.h
  44. +1029
    -0
      mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
  45. +145
    -0
      mindspore/ccsrc/backend/kernel_compiler/common_utils.h
  46. +65
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.cc
  47. +48
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.h
  48. +53
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.cc
  49. +44
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.h
  50. +47
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.cc
  51. +58
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h
  52. +67
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc
  53. +45
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.h
  54. +82
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc
  55. +46
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.h
  56. +68
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc
  57. +43
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h
  58. +106
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc
  59. +50
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.h
  60. +80
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
  61. +87
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
  62. +104
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
  63. +79
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.h
  64. +50
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.cc
  65. +41
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.h
  66. +78
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.cc
  67. +46
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h
  68. +212
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc
  69. +74
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h
  70. +46
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.cc
  71. +43
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.h
  72. +115
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.cc
  73. +52
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.h
  74. +91
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc
  75. +43
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h
  76. +93
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
  77. +43
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
  78. +92
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
  79. +43
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
  80. +141
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.cc
  81. +70
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h
  82. +196
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.cc
  83. +71
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h
  84. +71
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc
  85. +50
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h
  86. +106
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc
  87. +52
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h
  88. +40
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.cc
  89. +0
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h
  90. +61
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc
  91. +42
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h
  92. +69
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc
  93. +41
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h
  94. +124
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc
  95. +56
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h
  96. +52
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc
  97. +40
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h
  98. +69
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc
  99. +43
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h
  100. +54
    -0
      mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.cc

+ 4
- 0
CMakeLists.txt View File

@@ -17,6 +17,10 @@ else()
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2") set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
endif() endif()


if (ENABLE_PYTHON)
add_compile_definitions(ENABLE_PYTHON)
endif()

set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp") set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp")


set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC")


+ 16
- 0
RELEASE.md View File

@@ -70,6 +70,22 @@ Alexey Shevlyakov, avakh, baihuawei, BowenK, buxue, caifubi, caojian05, Cathy Wo


Contributions of any kind are welcome! Contributions of any kind are welcome!


# Release 0.3.1-alpha

## Major Features and Improvements

### Ascend 910 Training and Inference Framework
* Frontend and User Interface
* Independent model init interface.
* Data processing, augmentation, and save format
* Support sample padding for minddataset.

## Bugfixes
* Python API
* Fix bugs in the lars optimizer([!1894](https://gitee.com/mindspore/mindspore/pulls/1894))
* Data processing
* Fix accuracy problem of RandomCropDecodeResize ([!2340](https://gitee.com/mindspore/mindspore/pulls/2340))

# Release 0.3.0-alpha # Release 0.3.0-alpha


## Major Features and Improvements ## Major Features and Improvements


+ 20
- 3
build.sh View File

@@ -24,8 +24,8 @@ usage()
{ {
echo "Usage:" echo "Usage:"
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]"
echo " [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
echo "" echo ""
echo "Options:" echo "Options:"
echo " -d Debug mode" echo " -d Debug mode"
@@ -48,6 +48,7 @@ usage()
echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" echo " -P Enable dump anf graph to file in ProtoBuffer format, default on"
echo " -Q Enable dump memory, default off" echo " -Q Enable dump memory, default off"
echo " -D Enable dumping of function graph ir, default on" echo " -D Enable dumping of function graph ir, default on"
echo " -S Enable async data dump, default off"
echo " -z Compile dataset & mindrecord, default on" echo " -z Compile dataset & mindrecord, default on"
echo " -M Enable MPI and NCCL for GPU training, gpu default on" echo " -M Enable MPI and NCCL for GPU training, gpu default on"
echo " -V Specify the minimum required cuda version, default CUDA 10.1" echo " -V Specify the minimum required cuda version, default CUDA 10.1"
@@ -56,6 +57,7 @@ usage()
echo " -s Enable serving module, default off" echo " -s Enable serving module, default off"
echo " -B Enable debugger, default off" echo " -B Enable debugger, default off"
echo " -E Enable IBVERBS for parameter server, default off" echo " -E Enable IBVERBS for parameter server, default off"
echo " -l Compile with python dependency, default on"
} }


# check value of input is 'on' or 'off' # check value of input is 'on' or 'off'
@@ -87,6 +89,7 @@ checkopts()
ENABLE_TIMELINE="off" ENABLE_TIMELINE="off"
ENABLE_DUMP2PROTO="on" ENABLE_DUMP2PROTO="on"
ENABLE_DUMPE2E="off" ENABLE_DUMPE2E="off"
ENABLE_DATA_DUMP="off"
ENABLE_DUMP_IR="on" ENABLE_DUMP_IR="on"
COMPILE_MINDDATA="on" COMPILE_MINDDATA="on"
ENABLE_MPI="off" ENABLE_MPI="off"
@@ -98,9 +101,10 @@ checkopts()
ENABLE_SERVING="off" ENABLE_SERVING="off"
ENABLE_DEBUGGER="off" ENABLE_DEBUGGER="off"
ENABLE_IBVERBS="off" ENABLE_IBVERBS="off"
ENABLE_PYTHON="on"


# Process the options # Process the options
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt
do do
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
case "${opt}" in case "${opt}" in
@@ -151,6 +155,10 @@ checkopts()
check_on_off $OPTARG p check_on_off $OPTARG p
ENABLE_PROFILE="$OPTARG" ENABLE_PROFILE="$OPTARG"
;; ;;
l)
check_on_off $OPTARG l
ENABLE_PYTHON="$OPTARG"
;;
i) i)
INC_BUILD="on" INC_BUILD="on"
;; ;;
@@ -212,6 +220,11 @@ checkopts()
ENABLE_DUMPE2E="$OPTARG" ENABLE_DUMPE2E="$OPTARG"
echo "enable dump end to end" echo "enable dump end to end"
;; ;;
S)
check_on_off $OPTARG S
ENABLE_DATA_DUMP="$OPTARG"
echo "enable data dump"
;;
D) D)
check_on_off $OPTARG D check_on_off $OPTARG D
ENABLE_DUMP_IR="$OPTARG" ENABLE_DUMP_IR="$OPTARG"
@@ -315,7 +328,11 @@ build_mindspore()
if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
fi fi
if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON"
fi
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
if [[ "X$ENABLE_MPI" = "Xon" ]]; then if [[ "X$ENABLE_MPI" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON" CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON"
fi fi


+ 2
- 2
cmake/external_libs/icu4c.cmake View File

@@ -9,11 +9,11 @@ else()
LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N} LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N}
URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz
MD5 0c2662a2b0bc80b0eb56495205247c8f MD5 0c2662a2b0bc80b0eb56495205247c8f
CONFIGURE_COMMAND ./icu4c/source/runConfigureICU Linux --enable-rpath --disable-tests --disable-samples --disable-icuio --disable-extras ICU_DATA_FILTER_FILE=${CMAKE_SOURCE_DIR}/third_party/icu4c/filter.json
CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/scripts/build_icu4c.sh
) )
include_directories(${icu4c_INC}) include_directories(${icu4c_INC})
add_library(mindspore::icuuc ALIAS icu4c::${LIB_ICU_COMMON}) add_library(mindspore::icuuc ALIAS icu4c::${LIB_ICU_COMMON})
add_library(mindspore::icudata ALIAS icu4c::${LIB_ICU_DATA}) add_library(mindspore::icudata ALIAS icu4c::${LIB_ICU_DATA})
add_library(mindspore::icui18n ALIAS icu4c::${LIB_ICU_I18N}) add_library(mindspore::icui18n ALIAS icu4c::${LIB_ICU_I18N})
add_definitions(-D ENABLE_ICU4C) add_definitions(-D ENABLE_ICU4C)
endif()
endif()

+ 2
- 2
cmake/mind_expression.cmake View File

@@ -15,7 +15,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake) include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)


if (ENABLE_DEBUGGER)
if (ENABLE_DEBUGGER OR ENABLE_SERVING)
# build dependencies of gRPC # build dependencies of gRPC
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake)
@@ -30,7 +30,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake)
if(USE_GLOG) if(USE_GLOG)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake)
endif() endif()
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows" AND NOT ENABLE_GE)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake)
endif() endif()


+ 5
- 0
cmake/options.cmake View File

@@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF)
option(ENABLE_AKG "enable akg" OFF) option(ENABLE_AKG "enable akg" OFF)
option(ENABLE_DEBUGGER "enable debugger" OFF) option(ENABLE_DEBUGGER "enable debugger" OFF)
option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF) option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF)
option(ENABLE_PYTHON "Enable python" ON)


if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (WIN32) if (WIN32)
@@ -115,6 +116,10 @@ if(ENABLE_DUMP_E2E)
add_compile_definitions(ENABLE_DUMP_E2E) add_compile_definitions(ENABLE_DUMP_E2E)
endif() endif()


if(ENABLE_DATA_DUMP)
add_compile_definitions(ENABLE_DATA_DUMP)
endif()

if(ENABLE_DEBUGGER) if(ENABLE_DEBUGGER)
add_compile_definitions(ENABLE_DEBUGGER) add_compile_definitions(ENABLE_DEBUGGER)
endif() endif()

+ 14
- 1
cmake/package.cmake View File

@@ -213,7 +213,6 @@ install(
${CMAKE_SOURCE_DIR}/mindspore/parallel ${CMAKE_SOURCE_DIR}/mindspore/parallel
${CMAKE_SOURCE_DIR}/mindspore/mindrecord ${CMAKE_SOURCE_DIR}/mindspore/mindrecord
${CMAKE_SOURCE_DIR}/mindspore/train ${CMAKE_SOURCE_DIR}/mindspore/train
${CMAKE_SOURCE_DIR}/mindspore/model_zoo
${CMAKE_SOURCE_DIR}/mindspore/common ${CMAKE_SOURCE_DIR}/mindspore/common
${CMAKE_SOURCE_DIR}/mindspore/ops ${CMAKE_SOURCE_DIR}/mindspore/ops
${CMAKE_SOURCE_DIR}/mindspore/communication ${CMAKE_SOURCE_DIR}/mindspore/communication
@@ -261,3 +260,17 @@ if (EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset)
COMPONENT mindspore COMPONENT mindspore
) )
endif () endif ()

if (ENABLE_SERVING)
install(
TARGETS ms_serving
DESTINATION ${INSTALL_BASE_DIR}
COMPONENT mindspore
)

install(
TARGETS inference
DESTINATION ${INSTALL_LIB_DIR}
COMPONENT mindspore
)
endif ()

+ 15
- 0
config/data_dump.json View File

@@ -0,0 +1,15 @@
{
"DumpSettings": {
"net_name": "ResNet50",
"mode": 1,
"iteration": 0,
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
},

"DumpSettingsSpec": {
"net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "specified iteration ",
"kernels": "op's full scope name which need to be dump"
}
}

+ 383
- 0
config/op_info.config
File diff suppressed because it is too large
View File


+ 1
- 1
graphengine

@@ -1 +1 @@
Subproject commit 4084909d62c159da6ba316f61ad3d02a4857b34b
Subproject commit 31aa96ef41067a0ecdc4113ef245f8ede48f3457

+ 1
- 1
include/ms_tensor.h View File

@@ -20,7 +20,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <memory> #include <memory>
#include "ir/dtype/type_id.h"
#include "mindspore/core/ir/dtype/type_id.h"


namespace mindspore { namespace mindspore {
#define MS_API __attribute__((visibility("default"))) #define MS_API __attribute__((visibility("default")))


+ 3
- 2
mindspore/_extends/parse/parser.py View File

@@ -334,7 +334,7 @@ class Parser:
def __init__(self, fn: (types.FunctionType, types.MethodType), parse_method=None) -> None: def __init__(self, fn: (types.FunctionType, types.MethodType), parse_method=None) -> None:
self.fn = fn self.fn = fn
self.parse_method = parse_method self.parse_method = parse_method
_, self.line_offset = inspect.getsourcelines(self.fn)
self.line_offset = 0
self.filename: str = inspect.getfile(self.fn) self.filename: str = inspect.getfile(self.fn)


# Used to resolve the function's globals Namespace. # Used to resolve the function's globals Namespace.
@@ -350,7 +350,8 @@ class Parser:
logger.debug("fn = %r", self.fn) logger.debug("fn = %r", self.fn)
tree = None tree = None
if isinstance(self.fn, (types.FunctionType, types.MethodType)): if isinstance(self.fn, (types.FunctionType, types.MethodType)):
original_src = inspect.getsource(self.fn)
lines, self.line_offset = inspect.getsourcelines(self.fn)
original_src = ''.join(lines)
hexstr = hashlib.sha256(original_src.encode()).hexdigest() hexstr = hashlib.sha256(original_src.encode()).hexdigest()
tree = Parser.ast_cache.get(hexstr) tree = Parser.ast_cache.get(hexstr)
if not tree: if not tree:


+ 15
- 3
mindspore/_extends/parse/standard_method.py View File

@@ -108,7 +108,8 @@ def enumerate_(x, start=0):
"""Enumerate list or tuple.""" """Enumerate list or tuple."""
x_type = F.typeof(x) x_type = F.typeof(x)
ret = () ret = ()
if check_is_tuple_or_list(x_type, "enumerate"):
op_name = "enumerate"
if check_is_tuple_or_list(x_type, op_name, "first input") and check_is_const_int(start, op_name, "start"):
ret = zip(range(start, start + len(x)), x) ret = zip(range(start, start + len(x)), x)
return ret return ret


@@ -123,11 +124,22 @@ def while_cond(x):




@constexpr @constexpr
def check_is_tuple_or_list(x, op_name):
def check_is_tuple_or_list(x, op_name, arg_name):
"""check whether x is list or tuple.""" """check whether x is list or tuple."""
if isinstance(x, (mstype.list_type, mstype.tuple_type)): if isinstance(x, (mstype.list_type, mstype.tuple_type)):
return True return True
raise TypeError(f"For '{op_name}', the input parameter should be tuple or list, but got {x}.")
raise TypeError(f"For '{op_name}', the '{arg_name}' should be tuple or list, but got {x}.")


@constexpr
def check_is_const_int(x, op_name, arg_name):
"""check whether x is const int."""
if x is None:
raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got not const.")
if not isinstance(x, int):
raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got {x}.")
return True



@constexpr @constexpr
def check_is_tensor_bool_cond(shp): def check_is_tensor_bool_cond(shp):


+ 53
- 27
mindspore/ccsrc/CMakeLists.txt View File

@@ -1,4 +1,5 @@
## common setting ## common setting
include_directories(${CMAKE_SOURCE_DIR}/mindspore/core)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR})
link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine) link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine)
@@ -35,20 +36,20 @@ if(ENABLE_GPU)
include_directories(${CUDNN_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS}) include_directories(${CUDNN_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS})


file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"device/gpu/*.cc"
"device/gpu/*.cu"
"kernel/gpu/*.cu"
"kernel/akg/gpu/*.cc"
"kernel/akg/akg_kernel_build.cc"
"kernel/akg/akg_kernel_attrs_process.cc"
"runtime/device/gpu/*.cc"
"runtime/device/gpu/*.cu"
"backend/kernel_compiler/gpu/*.cu"
"backend/kernel_compiler/akg/gpu/*.cc"
"backend/kernel_compiler/akg/akg_kernel_build.cc"
"backend/kernel_compiler/akg/akg_kernel_attrs_process.cc"
) )


list(APPEND CUDA_NVCC_FLAGS -arch=sm_53) list(APPEND CUDA_NVCC_FLAGS -arch=sm_53)
list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc")
list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/mpi/mpi_initializer.cc"
"device/gpu/distribution/collective_wrapper.cc"
"device/gpu/distribution/mpi_wrapper.cc"
"device/gpu/distribution/nccl_wrapper.cc"
list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/blocking_queue.cc" "runtime/device/gpu/gpu_buffer_mgr.cc")
list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/mpi/mpi_initializer.cc"
"runtime/device/gpu/distribution/collective_wrapper.cc"
"runtime/device/gpu/distribution/mpi_wrapper.cc"
"runtime/device/gpu/distribution/nccl_wrapper.cc"
) )


set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
@@ -56,6 +57,7 @@ if(ENABLE_GPU)
set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST}) cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS}) set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
add_compile_definitions(ENABLE_GPU)
endif () endif ()


## make flatuffer files ## make flatuffer files
@@ -101,16 +103,20 @@ if (ENABLE_DUMP_PROTO)
endif () endif ()


if (ENABLE_D) if (ENABLE_D)
include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu")
include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu")
include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir") include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir")
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/aicpu/proto/*.proto")
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto")
ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN}) ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto") file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER}) ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})


file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto")
ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})

list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS}) list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS}) list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})


add_compile_definitions(ENABLE_D) add_compile_definitions(ENABLE_D)
endif () endif ()
@@ -121,18 +127,36 @@ if (MINDSPORE_PROTO_LIST)
endif() endif()


## make sub objects ## make sub objects
set(SUB_COMP
transform pre_activate parallel pipeline device kernel common debug gvar ir onnx operator optimizer predict
pybind_api pynative session utils vm
set(SUB_COMP
transform/graph_ir
transform/onnx
backend/optimizer
backend/kernel_compiler
backend/session
runtime/device
frontend/optimizer
frontend/parallel
frontend/operator
pipeline/jit
pipeline/pynative
common debug gvar predict pybind_api utils vm
) )


foreach (_comp ${SUB_COMP}) foreach (_comp ${SUB_COMP})
add_subdirectory(${_comp}) add_subdirectory(${_comp})
if (TARGET _mindspore_${_comp}_obj)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${_comp}_obj>)
add_dependencies(_mindspore_${_comp}_obj proto_input flat_input)
string(REPLACE "/" "_" sub ${_comp})
if (TARGET _mindspore_${sub}_obj)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>)
add_dependencies(_mindspore_${sub}_obj proto_input flat_input)
endif () endif ()
endforeach () endforeach ()
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_base_obj>)
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/abstract abstract)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_abstract_obj>)
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>)
add_dependencies(_mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input flat_input)


set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
@@ -204,8 +228,8 @@ endif()


# set c_expression building # set c_expression building
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
set_property(SOURCE "pipeline/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
pybind11_add_module(_c_expression "pipeline/init.cc")
set_property(SOURCE "pipeline/jit/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
pybind11_add_module(_c_expression "pipeline/jit/init.cc")


MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}") MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
if (CMAKE_SYSTEM_NAME MATCHES "Linux") if (CMAKE_SYSTEM_NAME MATCHES "Linux")
@@ -231,9 +255,11 @@ else ()
target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive) target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module) target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
target_link_libraries(_c_expression PRIVATE mindspore_gvar) target_link_libraries(_c_expression PRIVATE mindspore_gvar)
target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
if (${ENABLE_IBVERBS} STREQUAL "ON")
target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
if (NOT ENABLE_GE)
target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
if (${ENABLE_IBVERBS} STREQUAL "ON")
target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
endif()
endif() endif()
endif () endif ()


@@ -260,8 +286,8 @@ if (ENABLE_CPU)
endif () endif ()


if (ENABLE_MINDDATA) if (ENABLE_MINDDATA)
add_subdirectory(mindrecord)
add_subdirectory(dataset)
add_subdirectory(minddata/mindrecord)
add_subdirectory(minddata/dataset)
endif () endif ()


# build inference # build inference
@@ -270,7 +296,7 @@ set(LOAD_ONNX_SRC
${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc
) )
add_library(inference SHARED add_library(inference SHARED
${CMAKE_CURRENT_SOURCE_DIR}/session/session.cc
${CMAKE_CURRENT_SOURCE_DIR}/backend/session/session.cc
${LOAD_ONNX_SRC} ${LOAD_ONNX_SRC}
) )
target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY} target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}


+ 66
- 0
mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt View File

@@ -0,0 +1,66 @@
file(GLOB_RECURSE KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"kernel_build_info.cc"
"kash/*.cc"
"common_utils.cc"
"oplib/*.cc"
)

if (ENABLE_D)
file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"kernel_query.cc"
"kernel_fusion.cc"
"akg/ascend/*.cc"
"akg/akg_kernel_build.cc"
"akg/akg_kernel_attrs_process.cc"
"akg/akg_kernel_metadata.cc"
"tbe/*.cc"
"aicpu/*.cc"
"rts/*.cc"
"hccl/*.cc"
)
add_compile_definitions(ENABLE_D)
endif ()

if (ENABLE_CPU)
file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"cpu/*.cc"
)

list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/push_kernel.cc"
"cpu/ps/pull_kernel.cc"
"cpu/ps/embedding_look_up_ps_kernel.cc"
"cpu/ps/embedding_look_up_proxy_kernel.cc"
"cpu/ps/apply_momentum_ps_kernel.cc"
"cpu/ps/sparse_apply_adam_ps_kernel.cc"
"cpu/ps/sparse_apply_ftrl_ps_kernel.cc")

if (NOT ENABLE_MPI)
list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/embedding_look_up_comm_grad_cpu_kernel.cc")
endif ()
endif ()

if (ENABLE_GPU)
file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"gpu/*.cu"
"akg/gpu/*.cc"
"akg/akg_kernel_build.cc"
"akg/akg_kernel_attrs_process.cc"
)

file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc")
list(REMOVE_ITEM GPU_SRC_LIST "gpu/nccl/nccl_gpu_kernel.cc")

if (ENABLE_MPI)
include(ExternalProject)
file(GLOB_RECURSE GPU_NCCL_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/nccl/*.cc")
list(APPEND GPU_SRC_LIST ${GPU_NCCL_LIST})
endif ()

# add_library(_mindspore_kernel_cuda_obj OBJECT ${CUDA_SRC_LIST})
endif()

set_property(SOURCE ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_KERNEL)
add_library(_mindspore_backend_kernel_compiler_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})

+ 312
- 0
mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc View File

@@ -0,0 +1,312 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
#include <google/protobuf/text_format.h>
#include <fstream>
#include <utility>
#include <string>
#include <vector>
#include <memory>
#include <algorithm>
#include <map>
#include "runtime/device/kernel_runtime.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
#include "proto/tensor.pb.h"
#include "proto/tensor_shape.pb.h"
#include "proto/attr.pb.h"
#include "proto/node_def.pb.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "common/utils.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "backend/session/kernel_graph.h"
#include "backend/kernel_compiler/common_utils.h"

namespace mindspore {
namespace kernel {
using FNodeAttrHandle = std::function<void(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto)>;

bool SetIOIputSize(const std::shared_ptr<AnfNode> &anf_node, const size_t &input_num,
std::vector<size_t> *input_size_list) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(input_size_list);
for (size_t i = 0; i < input_num; i++) {
std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i);
if (AnfAlgo::GetInputDeviceDataType(anf_node, i) == kObjectTypeString) {
if (!anf_node->isa<CNode>()) {
MS_LOG(EXCEPTION) << "anf_node is not CNode.";
}
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (cnode->inputs().size() < (i + 1)) {
MS_LOG(ERROR) << "cnode inputs size " << cnode->inputs().size() << " is smaller than " << i + 1;
return false;
}
auto input_node = cnode->inputs()[i + 1];
MS_EXCEPTION_IF_NULL(input_node);
if (input_node->isa<ValueNode>()) {
auto value_ptr = GetValueNode(input_node);
auto value = GetValue<std::string>(value_ptr);
input_size_list->push_back(value.size());
}
} else {
auto type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i));
MS_EXCEPTION_IF_NULL(type_ptr);
int64_t size_i = 1;
for (size_t j = 0; j < shape_i.size(); j++) {
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
}
size_t type_byte = GetTypeByte(type_ptr);
if (type_byte == 0) {
return false;
}
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
input_size_list->push_back(LongToSize(size_i));
}
}
return true;
}

bool SetIOSize(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
std::vector<size_t> input_size_list;
std::vector<size_t> output_size_list;
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);

if (!SetIOIputSize(anf_node, input_num, &input_size_list)) {
return false;
}
kernel_mod_ptr->SetInputSizeList(input_size_list);

for (size_t i = 0; i < output_num; i++) {
std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i);
TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i));
MS_EXCEPTION_IF_NULL(type_ptr);
int64_t size_i = 1;
for (size_t j = 0; j < shape_i.size(); j++) {
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
}
size_t type_byte = GetTypeByte(type_ptr);
if (type_byte == 0) {
return false;
}
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
output_size_list.push_back(LongToSize(size_i));
}
kernel_mod_ptr->SetOutputSizeList(output_size_list);
return true;
}

void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value,
::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) {
MS_EXCEPTION_IF_NULL(node_attr);
MS_EXCEPTION_IF_NULL(value);
if (type == "int") {
auto attr_value = GetValue<int>(value);
(*node_attr)[attr_name].set_i(attr_value);
} else if (type == "str") {
auto attr_value = GetValue<std::string>(value);
(*node_attr)[attr_name].set_s(attr_value);
} else if (type == "bool") {
auto attr_value = GetValue<bool>(value);
(*node_attr)[attr_name].set_b(attr_value);
} else if (type == "float") {
auto attr_value = GetValue<float>(value);
(*node_attr)[attr_name].set_f(attr_value);
} else if (type == "listInt") {
std::vector<int> attr_value;
auto value_type = value->type();
MS_EXCEPTION_IF_NULL(value_type);
auto value_type_str = value_type->ToString();
if (value_type_str == "Int32") {
int data = GetValue<int>(value);
attr_value.push_back(data);
} else {
attr_value = GetValue<std::vector<int>>(value);
}
mindspore::AttrValue input_shape_attr;
mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array();
MS_EXCEPTION_IF_NULL(input_shape_attr_list);
for (const auto shape : attr_value) {
input_shape_attr_list->add_i(shape);
}
(*node_attr)[attr_name] = input_shape_attr;
} else {
MS_LOG(EXCEPTION) << "type: " << type << "not support";
}
}

void SetNodeAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(proto);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
if (op_name == kInitDataSetQueue) {
op_name = kInitData;
}
if (op_name == kPrint) {
return;
}

auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU);
MS_EXCEPTION_IF_NULL(op_info_ptr);
auto attrs_ptr = op_info_ptr->attrs_ptr();
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);
::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs();
for (const auto &attr_ptr : attrs_ptr) {
MS_EXCEPTION_IF_NULL(attr_ptr);
std::string attr_name = attr_ptr->name();
auto value = primitive->GetAttr(attr_name);
if (value != nullptr) {
if (attr_name == kQueueName || attr_name == kSharedName) {
attr_name = kChannelName;
} else if (attr_name == kSeed0) {
attr_name = kSeed;
} else if (attr_name == kSeed1) {
attr_name = kSeed2;
}
std::string type = attr_ptr->type();
ParseAttrValue(type, attr_name, value, node_attr);
}
}
MS_LOG(INFO) << "Set node attr end!";
}

void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(proto);
MS_EXCEPTION_IF_NULL(anf_node);
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
if (input_num == 0) {
MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have input.";
return;
}

for (size_t input_index = 0; input_index < input_num; input_index++) {
::mindspore::Tensor *node_inputs = proto->add_inputs();
MS_EXCEPTION_IF_NULL(node_inputs);
TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
std::vector<size_t> input_shape;
int32_t input_data_type;
if (input_type == kObjectTypeString) {
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto input_node = cnode->inputs()[input_index + 1];
auto value_ptr = GetValueNode(input_node);
auto value = GetValue<std::string>(value_ptr);
input_shape.push_back(1);
input_shape.push_back(value.size());
input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);
} else {
input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);
}

mindspore::TensorShape *tensorShape = node_inputs->mutable_tensor_shape();
for (auto item : input_shape) {
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
dim->set_size((::google::protobuf::int64)item);
}
node_inputs->set_tensor_type((mindspore::DataType)input_data_type);
node_inputs->set_mem_device("HBM");
}
}

void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(proto);
MS_EXCEPTION_IF_NULL(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
if (output_num == 0) {
MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. ";
return;
}

for (size_t output_index = 0; output_index < output_num; output_index++) {
::mindspore::Tensor *node_outputs = proto->add_outputs();
MS_EXCEPTION_IF_NULL(node_outputs);
std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape();
MS_EXCEPTION_IF_NULL(tensorShape);
for (auto item : output_shape) {
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
MS_EXCEPTION_IF_NULL(dim);
dim->set_size((::google::protobuf::int64)item);
}
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
node_outputs->set_tensor_type((mindspore::DataType)output_data_type);
node_outputs->set_mem_device("HBM");
}
}

void SetNodedefProto(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(proto);
MS_LOG(INFO) << "SetNodedefProto entry";
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
if (op_name == kInitDataSetQueue) {
op_name = kInitData;
}
// set op name
proto->set_op(op_name);
// set inputs tensor
SetNodeInputs(anf_node, proto);
// set outputs tensor
SetNodeOutputs(anf_node, proto);
// set node attr
SetNodeAttr(anf_node, proto);
MS_LOG(INFO) << "SetNodedefProto end!";
}

bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
MS_EXCEPTION_IF_NULL(anf_node);
MS_LOG(INFO) << "CreateNodeDefBytes entry";

mindspore::NodeDef proto;
SetNodedefProto(anf_node, &proto);
std::string nodeDefStr;
if (!proto.SerializeToString(&nodeDefStr)) {
MS_LOG(ERROR) << "Serialize nodeDef to string failed.";
return false;
}
kernel_mod_ptr->SetNodeDef(nodeDefStr);
MS_LOG(INFO) << "CreateNodeDefBytes end!";
return true;
}

KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
if (op_name == kInitDataSetQueue) {
op_name = kInitData;
}
auto kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>();
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
kernel_mod_ptr->SetAnfNode(anf_node);
kernel_mod_ptr->SetNodeName(op_name);
if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
}
if (!SetIOSize(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Set input output size list failed.";
}
return kernel_mod_ptr;
}
} // namespace kernel
} // namespace mindspore

+ 27
- 0
mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.h View File

@@ -0,0 +1,27 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
#include <memory>
#include "backend/kernel_compiler/kernel.h"

namespace mindspore {
namespace kernel {
KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_

+ 73
- 0
mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.cc View File

@@ -0,0 +1,73 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"
#include <memory>
#include <string>
#include "backend/kernel_compiler/oplib/oplib.h"
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "backend/session/anf_runtime_algorithm.h"

namespace mindspore {
namespace kernel {
void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
MS_LOG(INFO) << "AicpuMetadataInfo.";
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_info_list);
std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
if (op_name == kInitDataSetQueue) {
op_name = kInitData;
}
auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU);
if (op_info_ptr == nullptr) {
MS_LOG(DEBUG) << "Aicpu does not have op [" << op_name << "]";
return;
}
// For compatibility with the current framework
if (op_name == kPrint || op_name == kGetNext || op_name == kPack) {
std::vector<std::string> inputs_format{};
std::vector<TypeId> inputs_type{};
if (op_name == kPrint || op_name == kPack) {
for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
inputs_format.emplace_back(kOpFormat_DEFAULT);
inputs_type.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index));
}
}
std::vector<std::string> outputs_format;
std::vector<TypeId> outputs_type;
for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) {
outputs_format.emplace_back(kOpFormat_DEFAULT);
outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index));
}
auto builder = KernelBuildInfo::KernelBuildInfoBuilder();
builder.SetInputsFormat(inputs_format);
builder.SetInputsDeviceType(inputs_type);
builder.SetOutputsFormat(outputs_format);
builder.SetOutputsDeviceType(outputs_type);
builder.SetProcessor(AICPU);
builder.SetKernelType(AICPU_KERNEL);
builder.SetFusionType(OPAQUE);
kernel_info_list->push_back(builder.Build());
return;
}
if (!ParseMetadata(kernel_node, op_info_ptr, AICPU, kernel_info_list)) {
MS_LOG(WARNING) << "Aicpu parsed metadata op [" << op_name << "] failed";
return;
}
}
} // namespace kernel
} // namespace mindspore

+ 30
- 0
mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h View File

@@ -0,0 +1,30 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_

#include <string>
#include <vector>
#include <memory>
#include "backend/kernel_compiler/kernel_build_info.h"

namespace mindspore {
namespace kernel {
void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_

+ 156
- 0
mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc View File

@@ -0,0 +1,156 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"

#include <memory>
#include <vector>
#include <string>
#include <algorithm>

#include "runtime/mem.h"
#include "runtime/rt.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
#include "utils/convert_utils.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "utils/context/ms_context.h"

using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;

namespace mindspore {
namespace kernel {
constexpr auto AICPU_OPS_SO_NAME = "libaicpu_kernels.so";

AicpuOpKernelMod::AicpuOpKernelMod() : anf_node_(nullptr) {}

AicpuOpKernelMod::~AicpuOpKernelMod() {
args_.clear();
inputList_.clear();
outputList_.clear();
anf_node_ = nullptr;
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
}

void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
const std::vector<size_t> &AicpuOpKernelMod::GetInputSizeList() const { return input_size_list_; }
void AicpuOpKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }
const std::vector<size_t> &AicpuOpKernelMod::GetOutputSizeList() const { return output_size_list_; }
void AicpuOpKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
anf_node_ = anf_node;
}

void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
MS_LOG(INFO) << "CreateCpuKernelInfoOffline start";

node_so_ = AICPU_OPS_SO_NAME;

// InputOutputAddr
vector<void *> io_addrs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(io_addrs),
[](const AddressPtr &input) -> void * { return input->addr; });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(io_addrs),
[](const AddressPtr &output) -> void * { return output->addr; });

auto io_addrs_num = io_addrs.size();
// calculate paramLen: AicpuParamHead.len + ioAddrsSize + notifyId.len + customizedAttr.len
auto param_len = sizeof(AicpuParamHead);

// get input and output addrs size, no need to check overflow
auto io_addrs_size = io_addrs_num * sizeof(uint64_t);
// refresh paramLen, no need to check overflow
param_len += io_addrs_size;

auto node_def_len = node_def_str_.length();
param_len += node_def_len;

// Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr
AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)};
args_.clear();
(void)args_.append(reinterpret_cast<const char *>(&paramHead), sizeof(AicpuParamHead));
// TaskArgs append ioAddrs
if (io_addrs_size != 0) {
(void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size);
}

// When it's aicpu customized ops, taskArgs should append customized attr
if (node_def_len != 0) {
(void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len);
}

MS_LOG(INFO) << "CreateCpuKernelInfoOffline end";
}

bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (stream_ptr == nullptr) {
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
return false;
}

CreateCpuKernelInfo(inputs, outputs);
if (node_name_ == kTopK) {
node_name_ = kTopKV2;
}
MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_
<< ", args_size:" << args_.length();
if (rtCpuKernelLaunch(reinterpret_cast<const void *>(node_so_.c_str()),
reinterpret_cast<const void *>(node_name_.c_str()), 1,
reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()), nullptr,
stream_ptr) != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Aicpu op launch failed!";

return false;
}
return true;
}

std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
MS_LOG(INFO) << "AicpuOpKernelMod GenTask start";

stream_id_ = stream_id;
node_so_ = AICPU_OPS_SO_NAME;
std::vector<void *> input_data_addrs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs),
[](const AddressPtr &input) -> void * { return input->addr; });

std::vector<void *> output_data_addrs;
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs),
[](const AddressPtr &output) -> void * { return output->addr; });

if (node_name_ == kTopK) {
node_name_ = kTopKV2;
}

AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());

MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
return {task_info_ptr};
}
} // namespace kernel
} // namespace mindspore

+ 75
- 0
mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h View File

@@ -0,0 +1,75 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_
#include <vector>
#include <memory>
#include <string>
#include "backend/kernel_compiler/ascend_kernel_mod.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
namespace mindspore {
namespace kernel {
class AicpuOpKernelMod : public AscendKernelMod {
public:
AicpuOpKernelMod();
~AicpuOpKernelMod() override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;

std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;

void SetInputList(const std::vector<int64_t> &inputList);
void SetOutputList(const std::vector<int64_t> &outputList);
void SetAnfNode(const AnfNodePtr &anf_node);
void SetNodeDef(const std::string &nodeDef);
void SetNodeName(const std::string &node_name);

/**
* @brief Build AICPU Engine kernel structure, and allocate device memory for offline task generate
* @return SUCCESS
* @return FAIL
*
*/
void CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

void SetInputSizeList(const std::vector<size_t> &size_list);
void SetOutputSizeList(const std::vector<size_t> &size_list);
void SetWorkspaceSizeList(const std::vector<size_t> &size_list);
const std::vector<size_t> &GetInputSizeList() const override;
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;

private:
std::string args_;
std::string node_def_str_;
std::string node_name_;
std::string node_so_;
std::vector<int64_t> inputList_;
std::vector<int64_t> outputList_;
AnfNodePtr anf_node_;

std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};

using AicpuOpKernelModPtr = std::shared_ptr<AicpuOpKernelMod>;
using AicputOpKernelModPtrList = std::vector<AicpuOpKernelModPtr>;
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_

+ 56
- 0
mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc View File

@@ -0,0 +1,56 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include <vector>
#include <string>
#include "proto/types.pb.h"
#include "runtime/mem.h"
#include "runtime/rt.h"
#include "utils/convert_utils.h"
#include "backend/session/anf_runtime_algorithm.h"

namespace mindspore {
namespace kernel {
static std::map<int32_t, int32_t> MS_PROTO_DATA_TYPE_MAP = {
{mindspore::TypeId::kTypeUnknown, mindspore::DataType::MS_UNKNOWN},
{mindspore::TypeId::kNumberTypeBool, mindspore::DataType::MS_BOOL},
{mindspore::TypeId::kNumberTypeInt, mindspore::DataType::MS_INT32},
{mindspore::TypeId::kNumberTypeInt8, mindspore::DataType::MS_INT8},
{mindspore::TypeId::kNumberTypeInt16, mindspore::DataType::MS_INT16},
{mindspore::TypeId::kNumberTypeInt32, mindspore::DataType::MS_INT32},
{mindspore::TypeId::kNumberTypeInt64, mindspore::DataType::MS_INT64},
{mindspore::TypeId::kNumberTypeUInt, mindspore::DataType::MS_UINT32},
{mindspore::TypeId::kNumberTypeUInt8, mindspore::DataType::MS_UINT8},
{mindspore::TypeId::kNumberTypeUInt16, mindspore::DataType::MS_UINT16},
{mindspore::TypeId::kNumberTypeUInt32, mindspore::DataType::MS_UINT32},
{mindspore::TypeId::kNumberTypeUInt64, mindspore::DataType::MS_UINT64},
{mindspore::TypeId::kNumberTypeFloat16, mindspore::DataType::MS_FLOAT16},
{mindspore::TypeId::kNumberTypeFloat, mindspore::DataType::MS_FLOAT32},
{mindspore::TypeId::kNumberTypeFloat32, mindspore::DataType::MS_FLOAT32},
{mindspore::TypeId::kNumberTypeFloat64, mindspore::DataType::MS_FLOAT64},
};

int AicpuOpUtil::MsTypeToProtoType(TypeId ms_type) {
auto iter = MS_PROTO_DATA_TYPE_MAP.find(ms_type);
if (iter != MS_PROTO_DATA_TYPE_MAP.end()) {
return MS_PROTO_DATA_TYPE_MAP[ms_type];
} else {
MS_LOG(ERROR) << "UnSupported ms_type value" << static_cast<int>(ms_type);
return -1;
}
}
} // namespace kernel
} // namespace mindspore

+ 64
- 0
mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h View File

@@ -0,0 +1,64 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_

#include <cstdint>
#include <vector>
#include <map>
#include <string>
#include "backend/kernel_compiler/kernel.h"

namespace mindspore {
namespace kernel {
constexpr auto kInitDataSetQueue = "InitDataSetQueue";
constexpr auto kInitData = "InitData";
constexpr auto kGetNext = "GetNext";
constexpr auto kPrint = "Print";
constexpr auto kPack = "Pack";
constexpr auto kOutputTypes = "output_types";
constexpr auto kOutputShapes = "output_shapes";
constexpr auto kChannelName = "channel_name";
constexpr auto kSharedName = "shared_name";
constexpr auto kShapes = "shapes";
constexpr auto kTypes = "types";
constexpr auto kQueueName = "queue_name";
constexpr auto kSeed = "seed";
constexpr auto kSeed0 = "Seed0";
constexpr auto kSeed1 = "Seed1";
constexpr auto kSeed2 = "seed2";
constexpr auto kTopK = "TopK";
constexpr auto kTopKV2 = "TopKV2";

struct AicpuParamHead {
uint32_t length; // Total length: include cunstom message
uint32_t ioAddrNum; // Input and output address number
uint32_t extInfoLength; // extInfo struct Length
uint64_t extInfoAddr; // extInfo address
} __attribute__((packed));

class AicpuOpUtil {
public:
static int MsTypeToProtoType(TypeId ms_type);

private:
// kernel id
static uint64_t KernelId_;
};
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_

mindspore/ccsrc/kernel/aicpu/proto/attr.proto → mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto View File


mindspore/ccsrc/kernel/aicpu/proto/node_def.proto → mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto View File


mindspore/ccsrc/kernel/aicpu/proto/tensor.proto → mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto View File


mindspore/ccsrc/kernel/aicpu/proto/tensor_shape.proto → mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto View File


mindspore/ccsrc/kernel/aicpu/proto/types.proto → mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto View File


+ 180
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc View File

@@ -0,0 +1,180 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"

#include <algorithm>
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/optimizer/common/helper.h"

namespace mindspore {
namespace kernel {
void SetAkgAttrsForFour2Five(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
// The x and output are akg op input and output param.
std::vector<std::string> input_names = {"x"};
std::vector<std::string> output_names = {"output"};
AnfAlgo::SetNodeAttr("input_names", MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr("output_names", MakeValue(output_names), anf_node);

TypeId dst_type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, 0);
std::string dst_type;
if (dst_type_id == kFloat32->type_id()) {
dst_type = "float32";
} else if (dst_type_id == kFloat16->type_id()) {
dst_type = "float16";
}
AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node);
}

void SetAkgAttrsForFive2Four(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> input_names = {"x"};
std::vector<std::string> output_names = {"output"};
AnfAlgo::SetNodeAttr("input_names", MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr("output_names", MakeValue(output_names), anf_node);
std::vector<size_t> origin_shape = AnfAlgo::GetOutputInferShape(anf_node, 0);
if (origin_shape.size() != kShape4dDims) {
MS_LOG(EXCEPTION) << "The dim of origin_shape is not equal to 4, but it's dim is " << origin_shape.size() << ".";
}
std::vector<int> shape_transform;
(void)std::transform(origin_shape.begin(), origin_shape.end(), std::back_inserter(shape_transform),
[](const int &origin_shape) { return static_cast<int>(origin_shape); });
AnfAlgo::SetNodeAttr("shape4d", MakeValue(shape_transform), anf_node);
AnfAlgo::SetNodeAttr("output_format", MakeValue(kOpFormat_NCHW), anf_node);

TypeId dst_type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, 0);
std::string dst_type;
if (dst_type_id == kFloat32->type_id()) {
dst_type = "float32";
} else if (dst_type_id == kFloat16->type_id()) {
dst_type = "float16";
}
AnfAlgo::SetNodeAttr("dstType", MakeValue(dst_type), anf_node);
}

void SetAkgAttrsForCast(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
// The x and output are akg op input and output param.
std::vector<std::string> input_names = {"x", "dst_type"};
std::vector<std::string> output_names = {"output"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node);

std::string dst_type;
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, 0);
if (output_type == kFloat32->type_id()) {
dst_type = "float32";
} else if (output_type == kFloat16->type_id()) {
dst_type = "float16";
} else if (output_type == kInt32->type_id()) {
dst_type = "int32";
} else {
MS_LOG(WARNING) << "Unknown cast_to type: " << TypeIdToType(output_type)->ToString();
}
AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node);
}

void SetAkgAttrsForBNGrad1(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> input_names{"dy", "data", "mean"};
std::vector<std::string> output_names{"dgamma_red_hw", "dbeta_red_hw", "data_minus_mean"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node);
}

void SetAkgAttrsForBNGrad2(const AnfNodePtr &anf_node) {
const size_t kBNGrad2InputSize = 5;
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> input_names{"dgamma_red_hw", "dbeta_red_hw", "variance", "gamma"};
std::vector<std::string> output_names{"bn_scale", "bn_bias", "rs", "dgamma_dx", "dbeta_dx"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node);
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (cnode->inputs().size() < kBNGrad2InputSize) {
MS_LOG(EXCEPTION) << "The inputs size of BNGrad2 is less then " << kBNGrad2InputSize;
}
auto input1 = cnode->input(1);
MS_EXCEPTION_IF_NULL(input1);
auto tuple_getitem = input1->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(tuple_getitem);
if (tuple_getitem->inputs().size() < kTupleGetItemInputSize) {
MS_LOG(EXCEPTION) << "The inputs size of tuple_getitem is less then " << kTupleGetItemInputSize;
}
auto bn_grad1 = tuple_getitem->input(kRealInputNodeIndexInTupleGetItem);
std::vector<size_t> data_shape = AnfAlgo::GetInputDeviceShape(bn_grad1, 0);
AnfAlgo::SetNodeAttr(kAttrDataShape, MakeValue(opt::Convert2Int(data_shape)), anf_node);
}

void SetAkgAttrsForBNGrad3(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> input_names{"dy", "rs", "dgamma_dx", "dbeta_dx", "data_minus_mean"};
std::vector<std::string> output_names{"dx"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node);
}

void SetAkgAttrsForFusedBN1(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
// Set attr for fused_bn1
std::vector<std::string> fused_bn1_input_names{"data"};
std::vector<std::string> fused_bn1_output_names{"mean", "var_part"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn1_input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn1_output_names), anf_node);
}

void SetAkgAttrsForFusedBN2(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
// Set attr for fused_bn2
std::vector<std::string> fused_bn2_input_names{"mean", "var_part", "running_mean", "running_var"};
std::vector<std::string> fused_bn2_output_names{"variance", "running_mean", "running_variance"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn2_input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn2_output_names), anf_node);
}

void SetAkgAttrsForFusedBN3(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
// Set attr for fused_bn3
std::vector<std::string> fused_bn3_input_names{"data", "mean", "variance", "gamma", "beta"};
std::vector<std::string> fused_bn3_output_names{"y"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn3_input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn3_output_names), anf_node);
}

void SetAkgAttrsForConvBN1(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> conv_bn1_output_names{"data", "var_part", "mean"};
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(conv_bn1_output_names), anf_node);
}

void SetAkgAttrsForBN2AddRelu(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> bn2_add_relu_input_names{"data", "var_part", "mean", "other_branch_data",
"gamma", "beta", "running_mean", "running_var"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(bn2_add_relu_input_names), anf_node);
std::vector<std::string> bn2_add_relu_output_names{"output", "running_mean", "running_variance", "save_inv_variance"};
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(bn2_add_relu_output_names), anf_node);
}

void SetAkgAttrsForBN2Relu(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::vector<std::string> bn2_input_names{"data", "var_part", "mean", "gamma", "beta", "running_mean", "running_var"};
std::vector<std::string> bn2_output_names{"y", "running_mean", "running_variance", "save_inv_variance"};
AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(bn2_input_names), anf_node);
AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(bn2_output_names), anf_node);
}
} // namespace kernel
} // namespace mindspore

+ 58
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.h View File

@@ -0,0 +1,58 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H
#define MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H

#include <vector>
#include <memory>
#include <string>
#include <unordered_map>
#include "ir/anf.h"
#include "utils/utils.h"
#include "frontend/operator/ops.h"

namespace mindspore {
namespace kernel {
void SetAkgAttrsForFour2Five(const AnfNodePtr &anf_node);
void SetAkgAttrsForFive2Four(const AnfNodePtr &anf_node);
void SetAkgAttrsForCast(const AnfNodePtr &anf_node);
void SetAkgAttrsForBNGrad1(const AnfNodePtr &anf_node);
void SetAkgAttrsForBNGrad2(const AnfNodePtr &anf_node);
void SetAkgAttrsForBNGrad3(const AnfNodePtr &anf_node);
void SetAkgAttrsForFusedBN1(const AnfNodePtr &anf_node);
void SetAkgAttrsForFusedBN2(const AnfNodePtr &anf_node);
void SetAkgAttrsForFusedBN3(const AnfNodePtr &anf_node);
void SetAkgAttrsForConvBN1(const AnfNodePtr &anf_node);
void SetAkgAttrsForBN2AddRelu(const AnfNodePtr &anf_node);
void SetAkgAttrsForBN2Relu(const AnfNodePtr &anf_node);

const std::unordered_map<std::string, std::function<void(const AnfNodePtr &anf_node)>> kAkgKernelAttrsProcessMap = {
{kFour2FiveOpName, SetAkgAttrsForFour2Five},
{kFive2FourOpName, SetAkgAttrsForFive2Four},
{"Cast", SetAkgAttrsForCast},
{kBNGrad1OpName, SetAkgAttrsForBNGrad1},
{kBNGrad2OpName, SetAkgAttrsForBNGrad2},
{kBNGrad3OpName, SetAkgAttrsForBNGrad3},
{kFusedBN1OpName, SetAkgAttrsForFusedBN1},
{kFusedBN2OpName, SetAkgAttrsForFusedBN2},
{kFusedBN3OpName, SetAkgAttrsForFusedBN3},
{kConvBN1OpName, SetAkgAttrsForConvBN1},
{kBN2AddReluOpName, SetAkgAttrsForBN2AddRelu},
{kBN2ReLUOpName, SetAkgAttrsForBN2Relu},
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H

+ 623
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc View File

@@ -0,0 +1,623 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/akg/akg_kernel_build.h"
#include <Python.h>
#include <sys/types.h>
#include <signal.h>
#include <unistd.h>
#include <dirent.h>
#include <cctype>
#include <cstdint>
#include <memory>
#include <map>
#include <utility>
#include <algorithm>
#include <functional>
#include <sstream>
#include <iterator>
#include <numeric>
#include <unordered_set>
#include "common/utils.h"
#include "utils/convert_utils.h"
#include "utils/any.h"
#include "utils/utils.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"

namespace mindspore {
namespace kernel {
constexpr int ME_MAX_KERNEL_NAME_LENGTH = 200;
constexpr int32_t ARGS_SIZE = 1;
constexpr auto kCompileWithJsonFunc = "compilewithjson";

// json key
constexpr auto kOpDesc = "op_desc";
constexpr auto kInputDesc = "input_desc";
constexpr auto kShape = "shape";
constexpr auto kDataType = "data_type";
constexpr auto kOutputDesc = "output_desc";
constexpr auto kName = "name";
constexpr auto kTensorName = "tensor_name";
constexpr auto kValue = "value";
constexpr auto KDynInputSizes = "dyn_input_sizes";
constexpr auto KInputNames = "input_names";
constexpr auto KInput = "input";
constexpr auto KDtype = "dtype";
namespace {
template <typename T>
std::string Vector2Str(const std::vector<T> &inputs) {
if (!inputs.empty()) {
std::ostringstream oss;
(void)std::copy(inputs.begin(), inputs.end() - 1, std::ostream_iterator<T>(oss, ", "));
oss << inputs.back();
return oss.str();
}
return "";
}
} // namespace

std::string AkgKernelBuild::PyObjectToStr(PyObject *const PyObj) {
char *pChar = nullptr;
std::string str_res;
if (PyObj == nullptr) {
MS_LOG(ERROR) << "Input parameter is nullptr.";
return str_res;
}
PyObject *strArgs = PyObject_Str(PyObj);
if (strArgs != nullptr) {
(void)PyArg_Parse(strArgs, "s", &pChar);
}
if (pChar == nullptr) {
MS_LOG(ERROR) << "pChar is nullptr.";
return str_res;
}
str_res = pChar;
return str_res;
}

std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag,
const std::pair<size_t, size_t> &position) {
if (node_json.count(tag) == 0) {
MS_LOG(ERROR) << "Node [" << node_json.dump() << "] has no key [" << tag << "].";
return "";
}

auto const &tag_desc = node_json[tag];
nlohmann::json first_index;
if (tag == kOutputDesc) {
first_index = tag_desc;
} else if (!tag_desc.is_array() || tag_desc.size() <= position.first) {
MS_LOG(ERROR) << "Node [" << tag_desc.dump() << "] has no enough value [" << position.first << "].";
return "";
} else {
first_index = tag_desc[position.first];
}

if (!first_index.is_array() || first_index.size() <= position.second) {
MS_LOG(ERROR) << "Node [" << first_index.dump() << "] has no enough value [" << position.second << "].";
return "";
}
auto const &second_index = first_index[position.second];
if (second_index.count(kTensorName) == 0) {
MS_LOG(ERROR) << "Node [" << second_index.dump() << "] has no key [" << kTensorName << "].";
return "";
}

return second_index[kTensorName];
}

void SetTensorName(const std::string &tag, const std::string &new_name, const std::pair<size_t, size_t> &position,
nlohmann::json *const node_json) {
MS_EXCEPTION_IF_NULL(node_json);
if (node_json->count(tag) == 0) {
MS_LOG(ERROR) << "Node [" << node_json->dump() << "] has no key [" << tag << "].";
return;
}

nlohmann::json *tag_desc = &((*node_json)[tag]);
nlohmann::json *first_index;
if (tag == kOutputDesc) {
first_index = tag_desc;
} else if (!tag_desc->is_array() || tag_desc->size() <= position.first) {
MS_LOG(ERROR) << "Node [" << tag_desc->dump() << "] has no enough value [" << position.first << "].";
return;
} else {
first_index = &((*tag_desc)[position.first]);
}

if (!first_index->is_array() || first_index->size() <= position.second) {
MS_LOG(ERROR) << "Node [" << first_index->dump() << "] has no enough value [" << position.second << "].";
return;
}
nlohmann::json *second_index = &((*first_index)[position.second]);
if (second_index->count(kTensorName) == 0) {
MS_LOG(ERROR) << "Node [" << second_index->dump() << "] has no key [" << kTensorName << "].";
return;
}
(*second_index)[kTensorName] = new_name;
return;
}

int AkgKernelBuild::op_cnt_ = 0;
std::mutex AkgKernelBuild::op_cnt_mtx_;

std::string AkgKernelBuild::GetProcessor(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string device;
switch (AnfAlgo::GetProcessor(anf_node)) {
case Processor::AICORE:
device = kProcessorAiCore;
break;

case Processor::AICPU:
device = kProcessorAiCpu;
break;

case Processor::CUDA:
device = kProcessorCuda;
break;

default:
MS_LOG(ERROR) << "Unknown processor type.";
break;
}

return device;
}

bool GetIOSize(const nlohmann::json &node_json, std::vector<size_t> *const input_size,
std::vector<size_t> *const output_size) {
if (input_size == nullptr || output_size == nullptr) {
MS_LOG(ERROR) << "input size or output size is nullptr";
return false;
}
input_size->clear();
output_size->clear();

for (size_t i = 0; i < node_json[kInputDesc].size(); i++) {
for (size_t m = 0; m < node_json[kInputDesc][i].size(); m++) {
std::string dtype = node_json[kInputDesc][i][m][kDataType];
size_t nbyte = GetDtypeNbyte(dtype);
size_t size_i = std::accumulate(node_json[kInputDesc][i][m][kShape].begin(),
node_json[kInputDesc][i][m][kShape].end(), nbyte, std::multiplies<size_t>());
input_size->push_back(size_i);
}
}

for (size_t i = 0; i < node_json[kOutputDesc].size(); i++) {
std::string dtype = node_json[kOutputDesc][i][kDataType];
size_t nbyte = GetDtypeNbyte(dtype);
size_t size_i = std::accumulate(node_json[kOutputDesc][i][kShape].begin(), node_json[kOutputDesc][i][kShape].end(),
nbyte, std::multiplies<size_t>());
output_size->push_back(size_i);
}

return true;
}

int AkgKernelBuild::GetOpCntInc() {
op_cnt_mtx_.lock();
int cnt = op_cnt_++;
op_cnt_mtx_.unlock();
return cnt;
}

bool AkgKernelBuild::CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const inputs_json) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(inputs_json);

// for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input.
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
auto op_info = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG);
if (op_info == nullptr) {
MS_LOG(ERROR) << "Apply kernel [" << op_name << "] op_info is nullptr";
return false;
}

std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr = op_info->inputs_ptr();
if (inputs_ptr.empty()) {
MS_LOG(INFO) << "Apply kernel [" << op_name << "] regist info has no input info";
return true;
}
auto op_info_input_num = inputs_ptr.size();

// for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input.
std::vector<int> dyn_input_sizes;
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);

if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) {
dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes));
}

size_t real_input_index = 0;
std::vector<nlohmann::json> input_list;
for (size_t i = 0; i < op_info_input_num; i++) {
size_t input_tensor_num;
std::shared_ptr<OpIOInfo> input_ptr = inputs_ptr[i];
std::string op_input_name;
if (input_ptr == nullptr) {
MS_LOG(ERROR) << "Apply kernel [" << op_name << "] regist input[" << i << "] is nullptr";
return false;
}

op_input_name = input_ptr->name();
if (dyn_input_sizes.empty()) {
input_tensor_num = 1;
} else {
input_tensor_num = IntToSize(dyn_input_sizes[i]);
}

input_list.clear();
for (size_t input_i = 0; input_i < input_tensor_num; input_i++) {
// dtype : float16
auto type_id = AnfAlgo::GetInputDeviceDataType(anf_node, real_input_index);
std::string dtype = TypeId2String(type_id);
if (dtype.empty()) {
MS_LOG(ERROR) << "Op [" << op_name << "] input [" << input_i << "] data type is null. ";
return false;
}
nlohmann::json input_desc_json;
input_desc_json[kDataType] = dtype;
input_desc_json[kName] = op_input_name;
input_desc_json[kTensorName] = "input_" + std::to_string(GetInputTensorIdxInc(anf_node, real_input_index));
auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, real_input_index);
if (anf_node->func_graph() != nullptr && anf_node->func_graph()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL) &&
GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) {
MS_LOG(WARNING) << "we take input[" << real_input_index << "] of [" << anf_node->DebugString(2)
<< "] as const tensor, shape: [" << Vector2Str(input_shape)
<< "], value: " << input_desc_json[kValue];

input_shape.clear();
}
if (input_shape.empty()) {
input_shape.push_back(1);
}
input_desc_json[kShape] = input_shape;
input_list.emplace_back(input_desc_json);
real_input_index++;
}
inputs_json->emplace_back(input_list);
}
return true;
}

bool AkgKernelBuild::CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const outputs_json) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(outputs_json);
size_t output_tensor_num = AnfAlgo::GetOutputTensorNum(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);

auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG);
auto outputs = op_info_ptr->outputs_ptr();
for (size_t i = 0; i < output_tensor_num; i++) {
nlohmann::json output_json;
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, i);
std::string dtype = TypeId2String(type_id);
if (dtype.empty()) {
MS_LOG(ERROR) << "Op [" << op_name << "] output [" << i << "] data type is null. ";
return false;
}

std::string output_name = outputs[i]->name();
output_json[kDataType] = dtype;
output_json[kName] = output_name;
output_json[kTensorName] = "output_" + std::to_string(i) + "_" + std::to_string(GetOutputTensorIdxInc());
output_json[kShape] = AnfAlgo::GetOutputDeviceShape(anf_node, i);
outputs_json->push_back(output_json);
}
return true;
}

void GetJson(const AnfNodePtr &anf_node, const std::vector<int> &dyn_input_sizes,
const std::shared_ptr<OpAttr> &op_attr, nlohmann::json *const attr_json, const ValuePtr &attr_value) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(op_attr);
MS_EXCEPTION_IF_NULL(attr_json);
std::string type = op_attr->type();
if (type == "int") {
(*attr_json)[kValue] = GetValue<int>(attr_value);
} else if (type == "str") {
(*attr_json)[kValue] = GetValue<std::string>(attr_value);
} else if (type == "bool") {
(*attr_json)[kValue] = GetValue<bool>(attr_value);
} else if (type == "float") {
(*attr_json)[kValue] = GetValue<float>(attr_value);
} else if (type == "listInt") {
(*attr_json)[kValue] = GetValue<std::vector<int>>(attr_value);
} else if (type == "listStr") {
std::vector<std::string> data_format;
if (op_attr->name() == kArgDataformat) {
size_t tensor_args_num = !dyn_input_sizes.empty() ? dyn_input_sizes.size() : AnfAlgo::GetInputTensorNum(anf_node);
for (size_t format_i = 0; format_i < tensor_args_num; format_i++) {
auto input_format = AnfAlgo::GetInputFormat(anf_node, format_i);
data_format.push_back(input_format);
}
} else {
data_format = GetValue<std::vector<std::string>>(attr_value);
}
(*attr_json)[kValue] = data_format;
} else {
MS_LOG(WARNING) << "attr type:" << type;
}
}

bool AkgKernelBuild::CreateAttrDescJson(const AnfNodePtr &anf_node, const std::string &op_name,
const std::shared_ptr<OpInfo> &op_info, nlohmann::json *const attrs_json) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(attrs_json);
MS_EXCEPTION_IF_NULL(op_info);
std::vector<std::shared_ptr<OpAttr>> attrs = op_info->attrs_ptr();
if (attrs.empty()) {
MS_LOG(INFO) << "Apply kernel [" << op_name << "] op info attrs is empty";
return true;
}
std::vector<std::shared_ptr<OpIOInfo>> inputs = op_info->inputs_ptr();

std::vector<int> dyn_input_sizes;
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);
if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) {
dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes));
}

if (inputs.empty()) {
MS_LOG(ERROR) << "Apply kernel [" << op_name << "] op info inputs is empty";
return false;
}

// create input name list for atch "x_shape" in att with "x" in primitive.
std::map<size_t, std::string> op_info_shape_name;
for (size_t op_info_input_i = 0; op_info_input_i < inputs.size(); op_info_input_i++) {
std::string input_name = inputs[op_info_input_i]->name();
std::string x_shape_name = input_name + "_shape";
(void)op_info_shape_name.insert(make_pair(op_info_input_i, x_shape_name));
}

for (const auto &op_attr : attrs) {
nlohmann::json attr_json;
ValuePtr attr_value = primitive->GetAttr(op_attr->name());
if (attr_value == nullptr && op_attr->name() != kArgDataformat) {
if (op_attr->param_type() == "required") {
// match "x_shape" in att with "x" in primitive.
std::string attr_name = op_attr->name();
auto find_item = std::find_if(
op_info_shape_name.begin(), op_info_shape_name.end(),
[attr_name](const std::map<size_t, std::string>::value_type item) { return item.second == attr_name; });
if (find_item != op_info_shape_name.end()) {
if (!dyn_input_sizes.empty()) {
if (find_item->first >= dyn_input_sizes.size() - 1) {
MS_LOG(EXCEPTION) << "dyn_input_sizes list index:" << find_item->first
<< " is out of range:" << dyn_input_sizes.size() - 1 << ".";
return false;
}
size_t tensor_idx = IntToSize(std::accumulate(&dyn_input_sizes[0], &dyn_input_sizes[find_item->first], 0));
for (int input_i = 0; input_i < dyn_input_sizes[find_item->first]; input_i++) {
attr_json[kValue] = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, tensor_idx);
attr_json[kName] = op_attr->name();
attrs_json->push_back(attr_json);
tensor_idx++;
}
} else {
attr_json[kValue] = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, find_item->first);
attr_json[kName] = op_attr->name();
attrs_json->push_back(attr_json);
}
} else {
MS_LOG(ERROR) << "op [" << op_name << "] should have attr :" << op_attr->name();
return false;
}
}
continue;
}

GetJson(anf_node, dyn_input_sizes, op_attr, &attr_json, attr_value);

attr_json[kName] = op_attr->name();
attrs_json->push_back(attr_json);
}
return true;
}

bool AkgKernelBuild::GenerateSingleKernelJson(const AnfNodePtr &anf_node, const std::string &op_name,
nlohmann::json *const node_json) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(node_json);
int op_cnt = GetOpCntInc();
auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG);
MS_EXCEPTION_IF_NULL(op_info_ptr);

// get basic params from currentNodeOpDesc
(*node_json)[kName] = op_name;
(*node_json)["impl_path"] = op_info_ptr->impl_path();
(*node_json)["process"] = AkgKernelBuild::GetProcessor(anf_node);
(*node_json)["composite"] = false;

auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);
ValuePtr input_names_v = primitive->GetAttr(KInputNames);
if (input_names_v == nullptr) {
MS_LOG(ERROR) << "ApplyKernel has no input_names, op[" << op_name << "].";
return false;
}
std::vector<std::string> prim_input_names = GetValue<const std::vector<std::string>>(input_names_v);
std::string inputs_name;
for (const auto &prim_input_name : prim_input_names) {
(void)inputs_name.append("_input_").append(prim_input_name).append("_");
}

// input desc
nlohmann::json inputs_json;
if (!CreateInputDescJson(anf_node, &inputs_json)) {
MS_LOG(ERROR) << "Create input desc json failed, op[" << op_name << "].";
return false;
}
(*node_json)[kInputDesc] = inputs_json;
MS_LOG(INFO) << "Akg create input desc json success.";
std::string inputs_shape = "inputs_shape_";
for (auto &i : inputs_json) {
for (auto &m : i) {
std::string data_type = m[kDataType];
(void)inputs_shape.append("_").append(data_type).append("_");
for (auto &j : m[kShape]) {
size_t n = j;
(void)inputs_shape.append(std::to_string(n)).append("_");
}
}
}

// output desc
nlohmann::json outputs_json;
if (!CreateOutputDescJson(anf_node, &outputs_json)) {
MS_LOG(ERROR) << "Create output desc json failed, op[" << op_name << "].";
return false;
}

(*node_json)[kOutputDesc] = outputs_json;
MS_LOG(INFO) << "Akg create output desc json success.";
std::string outputs_shape = "outputs_shape_";
for (auto &i : outputs_json) {
std::string data_type = i[kDataType];
(void)outputs_shape.append("_").append(data_type).append("_");
for (auto &j : i[kShape]) {
size_t m = j;
(void)outputs_shape.append(std::to_string(m)).append("_");
}
}

// attribute desc
nlohmann::json attrs_json;
if (!CreateAttrDescJson(anf_node, op_name, op_info_ptr, &attrs_json)) {
MS_LOG(ERROR) << "Create attr desc json failed, op[" << op_name << "].";
return false;
}
(*node_json)["attr"] = attrs_json;
std::string json_str = node_json->dump();
size_t hash_id = std::hash<std::string>()(json_str);
json_name_ = op_name + "_";
(void)json_name_.append(std::to_string(hash_id));
MS_LOG(INFO) << "full scope name is : " << anf_node->fullname_with_scope() << ", json info name is : " << json_name_;
json_info_ = json_str;
(*node_json)["id"] = op_cnt;
(*node_json)["op"] = json_name_;
MS_LOG(INFO) << "Akg create node desc json success.";
return true;
}

KernelPackPtr AkgKernelBuild::OpBuild(const std::string &node_json, const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
auto processor = AkgKernelBuild::GetProcessor(anf_node);
auto cached_kernel_pack = SearchCache(json_name_, processor);
if (cached_kernel_pack != nullptr) {
MS_LOG(INFO) << "Use cached kernel, json_name_[" << json_name_ << "], fullname_with_scope["
<< anf_node->fullname_with_scope() << "].";
return cached_kernel_pack;
}

PyObject *pModule = nullptr;
PyObject *pFunc = nullptr;
PyObject *pArg = nullptr;
PyObject *pRes = nullptr;

pModule = PyImport_ImportModule(kAkgModule);
if (pModule == nullptr) {
MS_LOG(ERROR) << "Failed to import [" << kAkgModule << "].";
return nullptr;
}

pFunc = PyObject_GetAttrString(pModule, kCompileWithJsonFunc);
pArg = PyTuple_New(ARGS_SIZE);
(void)PyTuple_SetItem(pArg, 0, Py_BuildValue("s", node_json.c_str()));

(void)alarm(AUTODIFF_COMPILE_OVERTIME);
pRes = PyEval_CallObject(pFunc, pArg);
(void)alarm(0);
if (pRes == nullptr) {
MS_LOG(ERROR) << "No ret got, failed to call function [" << kCompileWithJsonFunc << "], args:\n("
<< AkgKernelBuild::PyObjectToStr(pArg) << ").";
return nullptr;
}
if (PyObject_IsTrue(pRes) != 1) {
MS_LOG(ERROR) << "Illegal ret, failed to call function [" << kCompileWithJsonFunc << "], args:\n("
<< AkgKernelBuild::PyObjectToStr(pArg) << ").";
return nullptr;
}

auto new_kernel_pack = InsertCache(json_name_, processor);
kernel::SaveJsonInfo(json_name_, json_info_);
if (new_kernel_pack == nullptr) {
MS_LOG(ERROR) << "Insert to cache failed, json_name_[" << json_name_ << "], fullname_with_scope["
<< anf_node->fullname_with_scope() << "].";
return nullptr;
}
return new_kernel_pack;
}

KernelPackPtr AkgKernelBuild::BuildByJson(const AnfNodePtr &anf_node, std::vector<size_t> *const input_size,
std::vector<size_t> *const output_size) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
auto it = kAkgKernelAttrsProcessMap.find(op_name);
if (it != kAkgKernelAttrsProcessMap.end()) {
it->second(anf_node);
}
MS_LOG(INFO) << "Akg start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]";
nlohmann::json node_json;
if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) {
MS_LOG(ERROR) << "Op[" << op_name << "] create single kernel json failed.";
}

std::string json_str = node_json.dump();
auto kernel_pack = OpBuild(json_str, anf_node);
if (kernel_pack == nullptr) {
MS_LOG(ERROR) << "Akg build failed op[" << op_name << "], json:" << json_str;
return nullptr;
}

if (!GetIOSize(node_json, input_size, output_size)) {
MS_LOG(ERROR) << "Cal mem size failed.";
return nullptr;
}
MS_LOG(INFO) << "Akg compile success, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node)
<< "]";
return kernel_pack;
}

size_t AkgKernelBuild::GetInputTensorIdxInc(const AnfNodePtr &anf_node, size_t input_idx) {
MS_EXCEPTION_IF_NULL(anf_node);
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (input_idx + 1 >= cnode->inputs().size()) {
MS_EXCEPTION(ArgumentError) << "input_idx [" << input_idx << "] is out of index of inputs of ["
<< cnode->inputs().size() - 1 << "][" << cnode->DebugString() << "]";
}

auto input_node = cnode->input(input_idx + 1);
if (input_tensor_idx_.find(input_node) == input_tensor_idx_.end()) {
size_t index = input_tensor_idx_.size();
input_tensor_idx_[input_node] = index;
}

return input_tensor_idx_[input_node];
}

size_t AkgKernelBuild::GetOutputTensorIdxInc() {
size_t idx = output_tensor_idx_++;
return idx;
}
} // namespace kernel
} // namespace mindspore

+ 76
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h View File

@@ -0,0 +1,76 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_
#include <unordered_map>
#include <string>
#include <vector>
#include <memory>
#include <map>
#include <utility>
#include "backend/kernel_compiler/kernel.h"
#include "ir/dtype.h"
#include <nlohmann/json.hpp>
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/oplib/oplib.h"

namespace mindspore {
namespace kernel {
class AkgKernelBuild {
public:
AkgKernelBuild() {
input_tensor_idx_ = {};
output_tensor_idx_ = 0;
}
~AkgKernelBuild() = default;

KernelPackPtr BuildByJson(const AnfNodePtr &anf_node, std::vector<size_t> *const input_size,
std::vector<size_t> *const output_size);
static std::string GetProcessor(const AnfNodePtr &anf_node);
static std::string PyObjectToStr(PyObject *const PyObj);

protected:
bool CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const inputs_json);
bool CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const outputs_json);
bool CreateAttrDescJson(const AnfNodePtr &anf_node, const std::string &op_name,
const std::shared_ptr<OpInfo> &op_info, nlohmann::json *const attrs_json);
KernelPackPtr OpBuild(const std::string &node_json, const AnfNodePtr &anf_node);
int GetOpCntInc();
size_t GetInputTensorIdxInc(const AnfNodePtr &anf_node, size_t input_idx);
size_t GetOutputTensorIdxInc();
bool GenerateSingleKernelJson(const AnfNodePtr &anf_node, const std::string &op_name,
nlohmann::json *const node_json);

static int op_cnt_;
// lock for variable fusionOpCnt in singleton mode
static std::mutex op_cnt_mtx_;
std::string json_name_;
std::string json_info_;
std::unordered_map<AnfNodePtr, size_t> input_tensor_idx_;
size_t output_tensor_idx_;
};

bool GetIOSize(const nlohmann::json &node_json, std::vector<size_t> *const input_size,
std::vector<size_t> *const output_size);
void SetTensorName(const std::string &tag, const std::string &new_name, const std::pair<size_t, size_t> &position,
nlohmann::json *const node_json);
std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag,
const std::pair<size_t, size_t> &position);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_

+ 50
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc View File

@@ -0,0 +1,50 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/akg/akg_kernel_metadata.h"
#include <memory>
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/oplib/oplib.h"
#include "backend/kernel_compiler/common_utils.h"

namespace mindspore {
namespace kernel {
void AkgMetadataInfo(const CNodePtr &kernel_node,
std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list) {
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_info_list);

std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
for (size_t i = 0; i < support_devices.size(); i++) {
auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG);
if (op_info_ptr == nullptr) {
continue;
}

if (!ParseMetadata(kernel_node, op_info_ptr, Processor(i), kernel_info_list)) {
MS_LOG(WARNING) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "] failed.";
} else {
MS_LOG(DEBUG) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "].";
break;
}
}

if (kernel_info_list->empty()) {
MS_LOG(WARNING) << "Akg dose not has metadata of op[" << op_name << "].";
}
}
} // namespace kernel
} // namespace mindspore

+ 31
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.h View File

@@ -0,0 +1,31 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_

#include <string>
#include <vector>
#include <unordered_map>
#include <memory>
#include "backend/kernel_compiler/kernel_build_info.h"

namespace mindspore {
namespace kernel {
void AkgMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_

+ 422
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc View File

@@ -0,0 +1,422 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h"

#include <algorithm>
#include <map>
#include <memory>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
#include <Python.h>
#include "ir/dtype.h"
#include "ir/func_graph.h"
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
#include "backend/session/anf_runtime_algorithm.h"

namespace mindspore {
namespace kernel {
constexpr int32_t PARALLEL_ARGS_SIZE = 3;
constexpr int32_t PROCESS_NUM = 16;
constexpr int32_t TIME_OUT = 300;

constexpr auto kOpDesc = "op_desc";
constexpr auto kShape = "shape";
constexpr auto kDataType = "data_type";
constexpr auto kInputDesc = "input_desc";
constexpr auto kOutputDesc = "output_desc";
constexpr auto kTensorName = "tensor_name";
constexpr auto kCompileAkgKernelParallelFunc = "compile_akg_kernel_parallel";
constexpr auto kMultiProcModule = "mindspore._extends.parallel_compile.akg_compiler.multi_process_compiler";
namespace {
void UpdateTensorNameInJson(const std::vector<AnfNodePtr> &anf_nodes,
std::map<AnfNodePtr, nlohmann::json> *node_json_map) {
for (auto const &anf_node : anf_nodes) {
std::vector<int> dyn_input_sizes;
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);

if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) {
dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes));
}

bool is_dynamic_input = !dyn_input_sizes.empty();
size_t input_num = is_dynamic_input ? dyn_input_sizes.size() : AnfAlgo::GetInputTensorNum(anf_node);
size_t real_input_index = 0;
for (size_t i = 0; i < input_num; ++i) {
size_t input_tensor_num = is_dynamic_input ? IntToSize(dyn_input_sizes[i]) : 1;
for (size_t j = 0; j < input_tensor_num; ++j) {
auto tmp_input = GetKernelInput(anf_node, real_input_index);
std::string tensor_name = GetTensorName((*node_json_map)[anf_node], kInputDesc, std::make_pair(i, j));
if (node_json_map->find(tmp_input.first) != node_json_map->end()) {
std::string new_tensor_name =
GetTensorName((*node_json_map)[tmp_input.first], kOutputDesc, std::make_pair(0, tmp_input.second));
SetTensorName(kInputDesc, new_tensor_name, std::make_pair(i, j), &((*node_json_map)[anf_node]));
MS_LOG(DEBUG) << "Update [" << real_input_index << "] input [" << tensor_name << "] of ["
<< anf_node->fullname_with_scope() << "] to [" << tmp_input.second << "] output ["
<< new_tensor_name << "] of [" << tmp_input.first->fullname_with_scope() << "].";
} else {
MS_LOG(DEBUG) << "[" << real_input_index << "] input " << tensor_name << "] of ["
<< anf_node->fullname_with_scope() << "] is out input.";
}
real_input_index++;
}
}
}
}

nlohmann::json GetInputsJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list,
std::map<AnfNodePtr, nlohmann::json> *node_json_map) {
nlohmann::json inputs_json;
auto input_index = GetInputIndex(anf_nodes, input_list);
for (size_t i = 0; i < input_index.size(); ++i) {
auto tmp_input = input_index[i];
auto type_id = AnfAlgo::GetInputDeviceDataType(tmp_input.first, tmp_input.second.first);
std::string dtype = TypeId2String(type_id);
nlohmann::json input_desc_json;
input_desc_json[kTensorName] = GetTensorName((*node_json_map)[tmp_input.first], kInputDesc, tmp_input.second);
input_desc_json[kDataType] = dtype;
input_desc_json[kShape] = AnfAlgo::GetInputDeviceShape(tmp_input.first, tmp_input.second.first);
inputs_json.emplace_back(std::vector<nlohmann::json>{input_desc_json});
}

return inputs_json;
}

nlohmann::json GetOutputsJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list,
const std::vector<AnfNodePtr> &output_list, const nlohmann::json &inputs_json,
std::map<AnfNodePtr, nlohmann::json> *node_json_map) {
nlohmann::json outputs_json;
auto output_index = GetOutputIndex(anf_nodes, input_list, output_list);
for (size_t i = 0; i < output_index.size(); ++i) {
auto tmp_output = output_index[i];
bool found = false;
nlohmann::json output_desc_json;
for (size_t input_i = 0; input_i < input_list.size(); ++input_i) {
if (tmp_output.first == input_list[input_i]) {
output_desc_json = inputs_json[input_i][0];
found = true;
break;
}
}
if (!found) {
auto type_id = AnfAlgo::GetOutputDeviceDataType(tmp_output.first, tmp_output.second);
std::string dtype = TypeId2String(type_id);
output_desc_json[kTensorName] =
GetTensorName((*node_json_map)[tmp_output.first], kOutputDesc, std::make_pair(0, tmp_output.second));
output_desc_json[kDataType] = dtype;
auto output_shape = AnfAlgo::GetOutputDeviceShape(tmp_output.first, tmp_output.second);
if (output_shape.empty()) {
output_shape.push_back(1);
}
output_desc_json[kShape] = output_shape;
}
outputs_json.emplace_back(output_desc_json);
}

return outputs_json;
}

std::pair<std::vector<std::string>, std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>>> PreProcessJsonForBuild(
const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args) {
// Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess.
std::vector<std::string> jsons;
std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> repeat_nodes;
std::unordered_set<std::string> json_name_set;
for (const auto &[builder, anf_node] : build_args) {
MS_EXCEPTION_IF_NULL(anf_node);
auto json_name = builder.json_name();
MS_LOG(DEBUG) << "Akg start compile op: " << json_name;
auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node));
if (cached_kernel_pack != nullptr) {
MS_LOG(DEBUG) << "Use cached kernel, json_name_[" << json_name << "], fullname_with_scope["
<< anf_node->fullname_with_scope() << "].";
auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(cached_kernel_pack);
kernel_mod_ptr->SetInputSizeList(builder.input_size_list());
kernel_mod_ptr->SetOutputSizeList(builder.output_size_list());
AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get());
continue;
}

if (json_name_set.count(json_name) != 0) {
repeat_nodes.push_back({builder, anf_node});
continue;
}
json_name_set.insert(json_name);
auto node_json = builder.kernel_json();
kernel::SaveJsonInfo(json_name, node_json);
jsons.push_back(node_json);
}

return std::make_pair(jsons, repeat_nodes);
}

bool PostProcessAfterCompile(const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args,
const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &repeat_nodes) {
for (const auto &[builder, anf_node] : build_args) {
auto json_name = builder.json_name();
auto new_kernel_pack = tbe::TbeUtils::InsertCache(json_name, AkgKernelBuild::GetProcessor(anf_node));
if (new_kernel_pack == nullptr) {
MS_LOG(ERROR) << "Insert to cache failed, json_name_[" << json_name << "], fullname_with_scope["
<< anf_node->fullname_with_scope() << "].";
return false;
}
auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(new_kernel_pack);
kernel_mod_ptr->SetInputSizeList(builder.input_size_list());
kernel_mod_ptr->SetOutputSizeList(builder.output_size_list());
AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get());
MS_LOG(DEBUG) << "Akg compile " << json_name << " kernel and insert cache successfully!";
}

for (const auto &[builder, anf_node] : repeat_nodes) {
auto node_json = builder.kernel_json();
auto json_name = builder.json_name();
auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node));
if (cached_kernel_pack == nullptr) {
return false;
}
MS_LOG(INFO) << "Use just compiled kernel, json_name_[" << json_name << "], fullname_with_scope["
<< anf_node->fullname_with_scope() << "].";
auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(cached_kernel_pack);
kernel_mod_ptr->SetInputSizeList(builder.input_size_list());
kernel_mod_ptr->SetOutputSizeList(builder.output_size_list());
AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get());
}

return true;
}
} // namespace

bool AkgAscendKernelBuilder::CollectJson(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
MS_LOG(INFO) << "AKG start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]";
auto it = kAkgKernelAttrsProcessMap.find(op_name);
if (it != kAkgKernelAttrsProcessMap.end()) {
it->second(anf_node);
}
MS_LOG(INFO) << "Akg start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]";
nlohmann::json node_json;
if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) {
MS_LOG(ERROR) << "Op[" << op_name << "] create single kernel json failed.";
}

kernel_json_ = node_json.dump();

if (!GetIOSize(node_json, &input_size_list_, &output_size_list_)) {
MS_LOG(ERROR) << "Cal mem size failed.";
return false;
}

return true;
}

bool AkgAscendKernelBuilder::GenJsonAndPreprocess4Fused(const std::vector<AnfNodePtr> &anf_nodes,
std::map<AnfNodePtr, nlohmann::json> *node_json_map) {
for (auto const &anf_node : anf_nodes) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
if (!AnfAlgo::IsRealKernel(anf_node)) {
MS_LOG(ERROR) << "Invalid anf node to build [" << anf_node->fullname_with_scope() << "].";
return false;
}
auto it = kAkgKernelAttrsProcessMap.find(op_name);
if (it != kAkgKernelAttrsProcessMap.end()) {
it->second(anf_node);
}

nlohmann::json node_json;
if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) {
MS_LOG(ERROR) << "Op [" << op_name << "] create single kernel json failed.";
return false;
}
// No need for composite op.
node_json.erase("id");
node_json.erase("op");
node_json.erase("composite");

auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);

if (primitive->GetAttr("fusion") != nullptr) {
node_json["fusion"] = primitive->GetAttr("fusion")->ToString();
}

(*node_json_map)[anf_node] = node_json;
}
return true;
}

bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector<AnfNodePtr> &anf_nodes,
const std::vector<AnfNodePtr> &input_list,
const std::vector<AnfNodePtr> &output_list) {
if (anf_nodes.empty() || input_list.empty()) {
MS_LOG(ERROR) << "Invalid input size, anf_nodes [" << anf_nodes.size() << "], input_list [" << input_list.size()
<< "].";
return false;
}
MS_LOG(INFO) << "anf_nodes [" << output_list.size() << "], input_list [" << anf_nodes.size() << "], output_list ["
<< input_list.size() << "].";

std::map<AnfNodePtr, nlohmann::json> node_json_map;
if (!GenJsonAndPreprocess4Fused(anf_nodes, &node_json_map)) {
return false;
}

UpdateTensorNameInJson(anf_nodes, &node_json_map);

nlohmann::json fused_node_json;
std::vector<nlohmann::json> node_json_desc;
std::transform(anf_nodes.begin(), anf_nodes.end(), std::back_inserter(node_json_desc),
[&node_json_map](const AnfNodePtr &anf_node) { return node_json_map[anf_node]; });
fused_node_json[kOpDesc] = node_json_desc;
fused_node_json[kInputDesc] = GetInputsJson(anf_nodes, input_list, &node_json_map);
fused_node_json[kOutputDesc] =
GetOutputsJson(anf_nodes, input_list, output_list, fused_node_json[kInputDesc], &node_json_map);

size_t hash_id = std::hash<std::string>()(fused_node_json.dump());
json_name_ = "Fused_";
auto fg = anf_nodes[0]->func_graph();
MS_EXCEPTION_IF_NULL(fg);
auto attr_val = fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL);
if (attr_val != nullptr) {
auto fg_attr = GetValue<std::string>(attr_val);
(void)json_name_.append(fg_attr).append("_");
}
(void)json_name_.append(std::to_string(hash_id));
fused_node_json["composite_graph"] = fg->ToString();
fused_node_json["op"] = json_name_;
fused_node_json["platform"] = "AKG";
fused_node_json["process"] = "aicore";
fused_node_json["composite"] = true;

kernel_json_ = fused_node_json.dump();

if (!GetIOSize(fused_node_json, &input_size_list_, &output_size_list_)) {
MS_LOG(ERROR) << "Cal mem size failed.";
return false;
}

return true;
}

void GenParallelCompileFuncArgs(const std::vector<std::string> &kernel_jsons, PyObject **p_args) {
MS_EXCEPTION_IF_NULL(p_args);
*p_args = PyTuple_New(PARALLEL_ARGS_SIZE);

PyObject *arg1 = PyList_New(kernel_jsons.size());
for (int i = 0; i < PyList_Size(arg1); ++i) {
PyList_SetItem(arg1, i, Py_BuildValue("s", kernel_jsons[i].c_str()));
}
PyObject *arg2 = Py_BuildValue("i", PROCESS_NUM);
PyObject *arg3 = Py_BuildValue("i", TIME_OUT);

(void)PyTuple_SetItem(*p_args, 0, arg1);
(void)PyTuple_SetItem(*p_args, 1, arg2);
(void)PyTuple_SetItem(*p_args, 2, arg3);
}

bool AkgOpParallelBuild(const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args) {
auto [jsons, repeat_nodes] = PreProcessJsonForBuild(build_args);
if (jsons.empty()) {
return true;
}

// Try to call python method to compile nodes parallely.
PyObject *p_module = nullptr;
PyObject *p_func = nullptr;
PyObject *p_arg = nullptr;
PyObject *p_res = nullptr;

p_module = PyImport_ImportModule(kMultiProcModule);
if (p_module == nullptr) {
MS_LOG(ERROR) << "Failed to import [" << kMultiProcModule << "].";
return false;
}

p_func = PyObject_GetAttrString(p_module, kCompileAkgKernelParallelFunc);
GenParallelCompileFuncArgs(jsons, &p_arg);
MS_LOG(DEBUG) << "Call function [" << kCompileAkgKernelParallelFunc << "], try to compile " << jsons.size()
<< " Akg kernels parallelly.";
p_res = PyEval_CallObject(p_func, p_arg);
if (p_res == nullptr) {
PyErr_Print();
MS_LOG(ERROR) << "No ret got, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n("
<< AkgKernelBuild::PyObjectToStr(p_arg) << ").";
return false;
}
if (PyObject_IsTrue(p_res) != 1) {
PyErr_Print();
MS_LOG(ERROR) << "Illegal ret, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n("
<< AkgKernelBuild::PyObjectToStr(p_arg) << ").";
return false;
}

if (!PostProcessAfterCompile(build_args, repeat_nodes)) {
return false;
}

return true;
}

bool AkgAscendKernelParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> json_and_node;
for (const auto &anf_node : anf_nodes) {
MS_EXCEPTION_IF_NULL(anf_node);
AkgAscendKernelBuilder akg_cce_kernel_builder;
KernelPackPtr kernel_pack = nullptr;
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (AnfAlgo::IsGraphKernel(cnode)) {
auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(cnode);
auto mng = func_graph->manager();
if (mng == nullptr) {
mng = Manage(func_graph, true);
func_graph->set_manager(mng);
}
MS_EXCEPTION_IF_NULL(func_graph);
std::vector<AnfNodePtr> node_list;
std::vector<AnfNodePtr> input_list;
std::vector<AnfNodePtr> output_list;
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
MS_LOG(INFO) << "Akg start compile composite op[" << op_name << "]";
GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list);
if (!akg_cce_kernel_builder.CollectFusedJson(node_list, input_list, output_list)) {
MS_EXCEPTION(UnknownError) << "Akg build failed composite op[" << op_name << "].";
}
} else {
if (!akg_cce_kernel_builder.CollectJson(anf_node)) {
MS_EXCEPTION(UnknownError) << "Akg build failed op[" << AnfAlgo::GetCNodeName(anf_node) << "].";
}
}
json_and_node.push_back({akg_cce_kernel_builder, anf_node});
}

if (json_and_node.empty()) {
MS_LOG(DEBUG) << "There is no kernel needed to be compiled.";
return true;
}

return AkgOpParallelBuild(json_and_node);
}
} // namespace kernel
} // namespace mindspore

+ 56
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h View File

@@ -0,0 +1,56 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_

#include <string>
#include <memory>
#include <vector>
#include <map>
#include "ir/anf.h"
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"

namespace mindspore {
namespace kernel {
class AkgAscendKernelBuilder : public AkgKernelBuild {
public:
AkgAscendKernelBuilder() = default;
~AkgAscendKernelBuilder() = default;

bool CollectJson(const AnfNodePtr &anf_node);
bool CollectFusedJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list,
const std::vector<AnfNodePtr> &output_list);
std::string json_name() const { return json_name_; }
std::string kernel_json() const { return kernel_json_; }
const std::vector<size_t> &input_size_list() const { return input_size_list_; }
const std::vector<size_t> &output_size_list() const { return output_size_list_; }

private:
bool GenJsonAndPreprocess4Fused(const std::vector<AnfNodePtr> &anf_nodes,
std::map<AnfNodePtr, nlohmann::json> *node_json_map);

std::string kernel_json_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
};

bool AkgAscendKernelParallelBuild(const std::vector<AnfNodePtr> &anf_nodes);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_

+ 132
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc View File

@@ -0,0 +1,132 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
#include <algorithm>
#include <fstream>
#include <map>
#include <memory>
#include <mutex>
#include <unordered_map>
#include <vector>
#include "nlohmann/json.hpp"
#include "runtime/rt.h"
#include "utils/log_adapter.h"
#include "utils/convert_utils.h"
#include "utils/context/ms_context.h"

namespace mindspore {
namespace kernel {
using std::fstream;
using std::map;
using std::mutex;
using std::string;
using TbeTaskInfoPtr = std::shared_ptr<ge::model_runner::TbeTaskInfo>;
using tbe::KernelManager;
constexpr uint32_t DEFAULT_BLOCK_DIM = 1;
/**
* @brief infotable contain func_stub\blockdim\kernel file buffer
*/
AkgKernelMod::AkgKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {}

void AkgKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }

void AkgKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }

void AkgKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }

const std::vector<size_t> &AkgKernelMod::GetInputSizeList() const { return input_size_list_; }

const std::vector<size_t> &AkgKernelMod::GetOutputSizeList() const { return output_size_list_; }

const std::vector<size_t> &AkgKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }

bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (stream_ptr == nullptr) {
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
return false;
}

if (kernel_pack_ == nullptr) {
MS_LOG(ERROR) << "kernel pack should not be nullptr.";
return false;
}

uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1.
auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim);
if (func_stub == 0) {
MS_LOG(ERROR) << "GenFuncStub failed.";
return false;
}

// pack all addresses into a vector.
std::vector<void *> runtime_args;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtime_args),
[](const AddressPtr &input) -> void * { return input->addr; });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtime_args),
[](const AddressPtr &output) -> void * { return output->addr; });

rtL2Ctrl_t *l2ctrl = nullptr;
auto stream = reinterpret_cast<rtStream_t *>(stream_ptr);
if (RT_ERROR_NONE != rtKernelLaunch(reinterpret_cast<void *>(func_stub), block_dim, runtime_args.data(),
SizeToUint(sizeof(void *) * runtime_args.size()), l2ctrl, stream)) {
MS_LOG(ERROR) << "Call runtime rtKernelLaunch error.";
return false;
}

return true;
}

std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
if (kernel_pack_ == nullptr) {
MS_LOG(EXCEPTION) << "kernel pack should not be nullptr.";
}

std::vector<uint8_t> args;
const uint32_t args_size = 0;
std::vector<uint8_t> sm_desc;
void *binary = nullptr;
const uint32_t binary_size = 0;
std::vector<uint8_t> meta_data;
std::vector<void *> input_data_addrs;
std::vector<void *> output_data_addrs;
std::vector<void *> workspace_addrs;

// pack all addresses into a vector.
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs),
[](const AddressPtr &input) -> void * { return input->addr; });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs),
[](const AddressPtr &output) -> void * { return output->addr; });

uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1.
auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim);
if (func_stub == 0) {
MS_LOG(EXCEPTION) << "GenFuncStub failed.";
}

std::string stub_func = KernelManager::GetStubFuncName(kernel_pack_);

MS_LOG(DEBUG) << "The block_dim is:" << block_dim;

TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data,
input_data_addrs, output_data_addrs, workspace_addrs, NeedDump());
return {task_info_ptr};
}
} // namespace kernel
} // namespace mindspore

+ 54
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h View File

@@ -0,0 +1,54 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_
#include <string>
#include <vector>
#include <memory>
#include "backend/kernel_compiler/ascend_kernel_mod.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"

namespace mindspore {
namespace kernel {
class AkgKernelMod : public AscendKernelMod {
public:
explicit AkgKernelMod(const KernelPackPtr &kernel_pack);
~AkgKernelMod() final {}

void SetInputSizeList(const std::vector<size_t> &size_list);
void SetOutputSizeList(const std::vector<size_t> &size_list);
void SetWorkspaceSizeList(const std::vector<size_t> &size_list);
const std::vector<size_t> &GetInputSizeList() const override;
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;

private:
KernelPackPtr kernel_pack_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};

using AkgKernelModPtr = std::shared_ptr<AkgKernelMod>;
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_

+ 43
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc View File

@@ -0,0 +1,43 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h"
#include <vector>
#include <memory>
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
#include "common/utils.h"

namespace mindspore {
namespace kernel {
KernelModPtr AkgGpuKernelBuild(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
AkgKernelBuild akg_kernel_build;

std::vector<size_t> input_size_list;
std::vector<size_t> output_size_list;
KernelPackPtr kernel_pack = akg_kernel_build.BuildByJson(anf_node, &input_size_list, &output_size_list);
MS_EXCEPTION_IF_NULL(kernel_pack);

auto kernel_mod_ptr = std::make_shared<GpuKernelMod>(kernel_pack);
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
kernel_mod_ptr->SetInputSizeList(input_size_list);
kernel_mod_ptr->SetOutputSizeList(output_size_list);
return kernel_mod_ptr;
}
} // namespace kernel
} // namespace mindspore

+ 28
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h View File

@@ -0,0 +1,28 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
#include "backend/kernel_compiler/kernel.h"
#include "base/base.h"

namespace mindspore {
namespace kernel {
KernelModPtr AkgGpuKernelBuild(const AnfNodePtr &anf_node);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_

+ 116
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc View File

@@ -0,0 +1,116 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
#include <fstream>
#include <algorithm>
#include "nlohmann/json.hpp"
#include "common/utils.h"

namespace mindspore {
namespace kernel {
using std::fstream;
using std::string;
using std::vector;

GpuKernelManagerPtr GpuKernelMod::kernelmanager_ = std::make_shared<GpuKernelManager>();
GpuKernelManager::GpuKernelManager() {}

CUresult GpuKernelManager::GetFunction(const KernelPackPtr &kernel_pack, bool force_reload,
vector<uint32_t> *thread_info, CUfunction *func) {
if (kernel_pack->GetJson() == nullptr || kernel_pack->GetJson()->contents == nullptr ||
kernel_pack->GetKernel() == nullptr || kernel_pack->GetKernel()->contents == nullptr) {
MS_LOG(ERROR) << "GPU:Invalid kernel pack, json or kernel is nullptr.";
return CUDA_ERROR_INVALID_IMAGE;
}
auto js = nlohmann::json::parse(kernel_pack->GetJson()->contents,
kernel_pack->GetJson()->contents + kernel_pack->GetJson()->len);
string fn = js["kernelName"];
if (!force_reload) {
auto iter = infotable_.find(fn);
if (iter != infotable_.end()) {
auto kernelmeta = iter->second;
*thread_info = kernelmeta->thread_info_;
*func = kernelmeta->func_addr_;
return CUDA_SUCCESS;
}
}
thread_info->emplace_back(js["blockIdx.x"]);
thread_info->emplace_back(js["blockIdx.y"]);
thread_info->emplace_back(js["blockIdx.z"]);
thread_info->emplace_back(js["threadIdx.x"]);
thread_info->emplace_back(js["threadIdx.y"]);
thread_info->emplace_back(js["threadIdx.z"]);
CUmodule module;
CUresult result = cuModuleLoadData(&module, kernel_pack->GetKernel()->contents);
if (result != CUDA_SUCCESS) {
MS_LOG(ERROR) << "cuModuleLoadData failed.";
return result;
}
result = cuModuleGetFunction(func, module, fn.c_str());
if (result != CUDA_SUCCESS) {
MS_LOG(ERROR) << "cuModuleGetFunction failed.";
return result;
}
infotable_[fn] = std::make_shared<GpuKernelMeta>(*func, module, *thread_info);
return result;
}

GpuKernelMod::GpuKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {}

void GpuKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }

void GpuKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }

const std::vector<size_t> &GpuKernelMod::GetInputSizeList() const { return input_size_list_; }

const std::vector<size_t> &GpuKernelMod::GetOutputSizeList() const { return output_size_list_; }

const std::vector<size_t> &GpuKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }

bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (stream_ptr == 0) {
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
return false;
}
if (kernel_pack_ == nullptr) {
MS_LOG(ERROR) << "kernel pack should not be nullptr.";
return false;
}
vector<uint32_t> thread_info;
CUfunction kernel_addr;
CUresult result = kernelmanager_->GetFunction(kernel_pack_, false, &thread_info, &kernel_addr);
if (result != CUDA_SUCCESS) {
MS_LOG(ERROR) << "GetFunction failed.";
return false;
}
std::vector<void *> runtimeargs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtimeargs),
[](const AddressPtr &input) -> void * { return reinterpret_cast<void *>(&(input->addr)); });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs),
[](const AddressPtr &output) -> void * { return reinterpret_cast<void *>(&(output->addr)); });
result = cuLaunchKernel(kernel_addr, thread_info[0], thread_info[1], thread_info[2], thread_info[3], thread_info[4],
thread_info[5], 0, reinterpret_cast<CUstream>(stream_ptr),
reinterpret_cast<void **>(&runtimeargs[0]), 0);
if (result != CUDA_SUCCESS) {
MS_LOG(ERROR) << "Launch Kernel failed.";
return false;
}
return true;
}
} // namespace kernel
} // namespace mindspore

+ 82
- 0
mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h View File

@@ -0,0 +1,82 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_
#include <cuda.h>
#include <string>
#include <vector>
#include <unordered_map>
#include <memory>
#include "backend/kernel_compiler/kernel.h"

namespace mindspore {
namespace kernel {
struct GpuKernelMeta {
CUfunction func_addr_;
CUmodule module_;
std::vector<uint32_t> thread_info_;
GpuKernelMeta(CUfunction funcAddr, CUmodule module, const std::vector<uint32_t> &thread_info)
: func_addr_(funcAddr), module_(module), thread_info_(thread_info) {}
};
using GpuKernelMetaPtr = std::shared_ptr<GpuKernelMeta>;

class GpuKernelManager {
public:
GpuKernelManager();
virtual ~GpuKernelManager() {
for (auto iter = infotable_.begin(); iter != infotable_.end(); ++iter) {
CUresult ret = cuModuleUnload(iter->second->module_);
if (ret != CUDA_SUCCESS && ret != CUDA_ERROR_DEINITIALIZED) {
MS_LOG(ERROR) << "Unload GPU Module failed.";
}
}
}
CUresult GetFunction(const KernelPackPtr &kernel_pack, bool force_reload, std::vector<uint32_t> *thread_info,
CUfunction *func);

private:
std::unordered_map<std::string, GpuKernelMetaPtr> infotable_;
};
using GpuKernelManagerPtr = std::shared_ptr<GpuKernelManager>;

class GpuKernelMod : public KernelMod {
public:
explicit GpuKernelMod(const KernelPackPtr &kernel_pack);
virtual ~GpuKernelMod() {}

void SetInputSizeList(const std::vector<size_t> &size_list);
void SetOutputSizeList(const std::vector<size_t> &size_list);
const std::vector<size_t> &GetInputSizeList() const override;
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;

static GpuKernelManagerPtr kernelmanager_;

private:
KernelPackPtr kernel_pack_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};

using GpuKernelModPtr = std::shared_ptr<GpuKernelMod>;
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_

+ 52
- 0
mindspore/ccsrc/backend/kernel_compiler/ascend_kernel_mod.h View File

@@ -0,0 +1,52 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_
#define MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_

#include <vector>
#include <memory>
#include "framework/ge_runtime/task_info.h"
#include "backend/kernel_compiler/kernel.h"
#ifdef ENABLE_DATA_DUMP
#include "debug/data_dump_parser.h"
#endif

using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
namespace mindspore {
namespace kernel {
class AscendKernelMod : public KernelMod {
public:
virtual std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, uint32_t) = 0;
uint32_t block_dim() { return block_dim_; }
uint32_t stream_id() { return stream_id_; }
virtual bool NeedDump() {
#ifdef ENABLE_DATA_DUMP
return DataDumpParser::GetInstance().NeedDump(kernel_name_);
#else
return false;
#endif
}

protected:
uint32_t block_dim_{1};
uint32_t stream_id_{0};
};
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_

+ 1029
- 0
mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
File diff suppressed because it is too large
View File


+ 145
- 0
mindspore/ccsrc/backend/kernel_compiler/common_utils.h View File

@@ -0,0 +1,145 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_
#define MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_

#include <dirent.h>
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <map>
#include <string>
#include <vector>
#include <utility>
#include <nlohmann/json.hpp>
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/oplib/opinfo.h"
#include "backend/kernel_compiler/kernel_build_info.h"

namespace mindspore {
namespace kernel {
constexpr auto kCceKernelMeta = "./kernel_meta/";
constexpr auto kGpuKernelMeta = "./cuda_meta";
constexpr auto kProcessorAiCore = "aicore";
constexpr auto kProcessorAiCpu = "aicpu";
constexpr auto kProcessorCuda = "cuda";
constexpr auto kJsonSuffix = ".json";
constexpr auto kInfoSuffix = ".info";
constexpr unsigned int AUTODIFF_COMPILE_OVERTIME = 600;
constexpr auto kAkgModule = "_akg";
constexpr auto kArgDataformat = "data_format";

const std::vector<std::string> support_devices = {"aicore", "aicpu", "cuda"};

struct KernelMetaInfo {
uintptr_t func_stub_;
uint32_t block_dim_;
};
using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>;

class KernelMeta {
public:
KernelMeta() = default;
void Initialize();
void RemoveKernelCache();
std::string Search(const std::string &kernel_name) const;
bool Insert(const std::string &kernel_name, const std::string &kernel_json);
std::string GetKernelMetaPath() { return kernel_meta_path_; }

static KernelMeta *GetInstance() {
static KernelMeta kernel_meta;
return &kernel_meta;
}
~KernelMeta() = default;

private:
bool initialized_ = false;
std::string kernel_meta_path_;
std::unordered_map<std::string, std::string> kernel_meta_map_;
};

struct SparseGradient {
float *value_;
int *indices_;
size_t indices_size_;
};

struct MultiThreadComputeParams {
float *var_;
float *accum_;
float *linear_;
float *m_;
float *m_t_;
float *v_;
float lr_;
float l1_;
float l2_;
float lr_power_;
float beta1_;
float beta2_;
float epsilon_;
SparseGradient sparse_grad_;
size_t var_first_dim_size_;
size_t var_outer_dim_size_;
bool use_nesterov_;
};
using MultiThreadComputeFunc = std::function<void(MultiThreadComputeParams *param, size_t start, size_t end)>;

bool CheckCache(const std::string &kernel_name);
KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor);
KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor);
TypeId DtypeToTypeId(const std::string &dtypes);
std::string Dtype2ShortType(const std::string &dtypes);
std::string TypeId2String(TypeId type_id);
size_t GetDtypeNbyte(const std::string &dtypes);
bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpInfo> &op_info_ptr, Processor processor,
std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list);
void SaveJsonInfo(const std::string &json_name, const std::string &info);
std::string GetProcessor(const AnfNodePtr &anf_node);
bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b);
int Sign(float x);
void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim);
void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim, bool use_multi_threads = true);
std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index);
std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list,
const std::vector<AnfNodePtr> &input_list);
std::vector<std::pair<AnfNodePtr, size_t>> GetOutputIndex(const std::vector<AnfNodePtr> &node_list,
const std::vector<AnfNodePtr> &input_list,
const std::vector<AnfNodePtr> &output_list);
void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list,
std::vector<AnfNodePtr> *input_list, std::vector<AnfNodePtr> *output_list);
void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list);
bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json);
void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list);
bool IsWeightBoundary(const AnfNodePtr &node);
void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params,
size_t total_compute_size);
void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad,
size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices,
std::vector<size_t> *slice_positions);
void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads,
SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim);
void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad,
SparseGradient *unique_grad, size_t first_dim, size_t outer_dim);
std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_

+ 65
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.cc View File

@@ -0,0 +1,65 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/addn_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
void AddNCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
input_num_ = AnfAlgo::GetInputTensorNum(kernel_node);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
CPUKernelUtils::ExpandDimsTo4(&output_shape_);
}

bool AddNCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);

size_t offset = 0;
for (size_t i = 0; i < output_shape_[0]; ++i) {
for (size_t j = 0; j < output_shape_[1]; ++j) {
for (size_t k = 0; k < output_shape_[2]; ++k) {
for (size_t m = 0; m < output_shape_[3]; ++m) {
float sum = 0;
for (size_t index = 0; index < input_num_; ++index) {
auto input_addr = reinterpret_cast<float *>(inputs[index]->addr);
sum += input_addr[offset];
}
output_addr[offset++] = sum;
}
}
}
}

return true;
}

void AddNCPUKernel::CheckParam(const CNodePtr &kernel_node) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.size() > 4) {
MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but AddNCPUKernel olny support 4d or lower.";
}

size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but AddNCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 48
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/addn_cpu_kernel.h View File

@@ -0,0 +1,48 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class AddNCPUKernel : public CPUKernel {
public:
AddNCPUKernel() : input_num_(0) {}
~AddNCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
size_t input_num_;
std::vector<size_t> output_shape_;
};

MS_REG_CPU_KERNEL(AddN,
KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
AddNCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_

+ 53
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.cc View File

@@ -0,0 +1,53 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/allgather_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "runtime/device/cpu/mpi/mpi_adapter.h"
#include "utils/log_adapter.h"

namespace mindspore {
namespace kernel {
namespace {
constexpr auto kRanksGroup = "group";
constexpr auto kAllGatherInputNum = 1;
} // namespace

void AllGatherCPUKernel::InitKernel(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != kAllGatherInputNum) {
MS_LOG(EXCEPTION) << "allgather input num:" << input_num;
}

auto ranks_group = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kRanksGroup);
if (ranks_group != nullptr) {
ranks_group_ = GetValue<std::vector<int>>(ranks_group);
} else {
MS_LOG(EXCEPTION) << "Miss attribute " << kRanksGroup;
}
}

bool AllGatherCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
auto input_data_num = inputs[0]->size / sizeof(float);
auto mpi_instance = device::cpu::MPIAdapter::Instance();
MS_EXCEPTION_IF_NULL(mpi_instance);
return mpi_instance->AllGather(input_addr, output_addr, ranks_group_, input_data_num);
}
} // namespace kernel
} // namespace mindspore

+ 44
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/allgather_cpu_kernel.h View File

@@ -0,0 +1,44 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class AllGatherCPUKernel : public CPUKernel {
public:
AllGatherCPUKernel() = default;
~AllGatherCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
std::vector<int> ranks_group_;
};

MS_REG_CPU_KERNEL(_HostAllGather, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
AllGatherCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_

+ 47
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.cc View File

@@ -0,0 +1,47 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"

namespace mindspore {
namespace kernel {
void ApplyMomentumCPUKernel::InitKernel(const CNodePtr & /*kernel_node*/) {}

bool ApplyMomentumCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> & /*outputs*/) {
if (inputs.size() < 5) {
MS_LOG(EXCEPTION) << "error input output size!";
}
if (inputs[0]->size != inputs[1]->size || inputs[0]->size != inputs[3]->size) {
MS_LOG(EXCEPTION) << "error input data size!";
}
auto weight = reinterpret_cast<float *>(inputs[0]->addr);
auto accumulate = reinterpret_cast<float *>(inputs[1]->addr);
float learning_rate = reinterpret_cast<float *>(inputs[2]->addr)[0];
auto gradient = reinterpret_cast<float *>(inputs[3]->addr);
float moment = reinterpret_cast<float *>(inputs[4]->addr)[0];
size_t elem_num = inputs[0]->size / sizeof(float);
for (size_t i = 0; i < elem_num; ++i) {
accumulate[i] = accumulate[i] * moment + gradient[i];
weight[i] -= accumulate[i] * learning_rate;
}
return true;
}
} // namespace kernel
} // namespace mindspore

+ 58
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h View File

@@ -0,0 +1,58 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"

namespace mindspore {
namespace kernel {
class ApplyMomentumCPUKernel : public MKLCPUKernel {
public:
ApplyMomentumCPUKernel() = default;
~ApplyMomentumCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};

MS_REG_CPU_KERNEL(ApplyMomentum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ApplyMomentumCPUKernel);
MS_REG_CPU_KERNEL(ApplyMomentum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ApplyMomentumCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_

+ 67
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc View File

@@ -0,0 +1,67 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/argmax_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
void ArgmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (shape.size() != 2) {
MS_LOG(EXCEPTION) << "argmax kernel dims invalid " << shape.size();
}
batch_size_ = shape[0];
class_num_ = shape[1];

int axis = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
if (axis != -1 && axis != 1) {
MS_LOG(EXCEPTION) << "argmax kernel not support axis " << axis;
}
}

bool ArgmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspaces*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "input or output empty!";
}

size_t batch_float_size = batch_size_ * sizeof(float);
size_t batch_class_float_size = class_num_ * batch_float_size;
if (inputs[0]->size != batch_class_float_size || outputs[0]->size != batch_float_size) {
MS_LOG(EXCEPTION) << "invalid input or output data size!";
}
auto input = reinterpret_cast<float *>(inputs[0]->addr);
auto output = reinterpret_cast<int *>(outputs[0]->addr);
size_t row_start = 0;
for (size_t i = 0; i < batch_size_; ++i) {
size_t max_index = 0;
float max_value = input[row_start];
for (size_t j = 1; j < class_num_; ++j) {
size_t index = row_start + j;
if (input[index] > max_value) {
max_value = input[index];
max_index = j;
}
}
output[i] = SizeToInt(max_index);
row_start += class_num_;
}
return true;
}
} // namespace kernel
} // namespace mindspore

+ 45
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.h View File

@@ -0,0 +1,45 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class ArgmaxCPUKernel : public CPUKernel {
public:
ArgmaxCPUKernel() = default;
~ArgmaxCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
size_t class_num_{0};
size_t batch_size_{0};
};

MS_REG_CPU_KERNEL(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32),
ArgmaxCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_

+ 82
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc View File

@@ -0,0 +1,82 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/bias_add_cpu_kernel.h"

namespace mindspore {
namespace kernel {
void BiasAddCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
bias_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
if (input_shape_.size() == 4) {
data_shape_ = 4;
} else if (input_shape_.size() == 2) {
data_shape_ = 2;
} else {
MS_LOG(EXCEPTION) << "bias add input data format should be NCHW or NC";
}
if (input_shape_.size() != 2 && input_shape_.size() != 4) {
MS_LOG(EXCEPTION) << "bias add input shape nchw or nc";
}
if (bias_shape_.size() != 1) {
MS_LOG(EXCEPTION) << "bias shape invalid";
}
if (input_shape_[1] != bias_shape_[0]) {
MS_LOG(EXCEPTION) << "bias shape not match";
}
}

bool BiasAddCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs) {
if (inputs.size() != 2 || outputs.size() != 1) {
MS_LOG(EXCEPTION) << "inputs outputs size not supoort";
}

auto src_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto bias_addr = reinterpret_cast<float *>(inputs[1]->addr);
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);

if (data_shape_ == 4) {
size_t h_size = input_shape_[3];
size_t c_size = input_shape_[2] * h_size;
size_t n_size = input_shape_[1] * c_size;
size_t hw_size = input_shape_[2] * input_shape_[3];
size_t n_offset = 0;
for (size_t n = 0; n < input_shape_[0]; ++n) {
size_t c_offset = 0;
for (size_t c = 0; c < input_shape_[1]; ++c) {
for (size_t hw = 0; hw < hw_size; ++hw) {
size_t offset = n_offset + c_offset + hw;
output_addr[offset] = src_addr[offset] + bias_addr[c];
}
c_offset += c_size;
}
n_offset += n_size;
}
} else {
size_t n_offset = 0;
for (size_t n = 0; n < input_shape_[0]; ++n) {
for (size_t c = 0; c < input_shape_[1]; ++c) {
output_addr[n_offset + c] = src_addr[n_offset + c] + bias_addr[c];
}
n_offset += input_shape_[1];
}
}
return true;
}
} // namespace kernel
} // namespace mindspore

+ 46
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.h View File

@@ -0,0 +1,46 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class BiasAddCPUKernel : public CPUKernel {
public:
BiasAddCPUKernel() = default;
~BiasAddCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
uint8_t data_shape_{0};
std::vector<size_t> input_shape_;
std::vector<size_t> bias_shape_;
};
MS_REG_CPU_KERNEL(
BiasAdd,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BiasAddCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_

+ 68
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc View File

@@ -0,0 +1,68 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h"

namespace mindspore {
namespace kernel {
void BiasAddGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (input_shape_.size() != 4 && input_shape_.size() != 2) {
MS_LOG(EXCEPTION) << "input data format not support";
}
}

bool BiasAddGradCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs) {
if (inputs.size() != 1 || outputs.size() != 1) {
MS_LOG(EXCEPTION) << "input output size not support";
}
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);

if (input_shape_.size() == 4) {
size_t h_size = input_shape_[3];
size_t c_size = h_size * input_shape_[2];
size_t n_size = c_size * input_shape_[1];
size_t hw_size = input_shape_[2] * input_shape_[3];
size_t c_offset = 0;
for (size_t c = 0; c < input_shape_[1]; ++c) {
output_addr[c] = 0;
size_t n_offset = 0;
for (size_t n = 0; n < input_shape_[0]; ++n) {
for (size_t hw = 0; hw < hw_size; ++hw) {
size_t offset = c_offset + n_offset + hw;
output_addr[c] += input_addr[offset];
}
n_offset += n_size;
}
c_offset += c_size;
}
} else if (input_shape_.size() == 2) {
for (size_t c = 0; c < input_shape_[1]; ++c) {
output_addr[c] = 0;
size_t n_offset = 0;
for (size_t n = 0; n < input_shape_[0]; ++n) {
output_addr[c] += input_addr[c + n_offset];
n_offset += input_shape_[1];
}
}
}
return true;
}
} // namespace kernel
} // namespace mindspore

+ 43
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h View File

@@ -0,0 +1,43 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class BiasAddGradCPUKernel : public CPUKernel {
public:
BiasAddGradCPUKernel() = default;
~BiasAddGradCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
std::vector<size_t> input_shape_;
};
MS_REG_CPU_KERNEL(BiasAddGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BiasAddGradCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_

+ 106
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc View File

@@ -0,0 +1,106 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/concat_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
void ConcatCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);

axis_ = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
auto input_1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (axis_ < 0) {
axis_ = axis_ + SizeToInt(input_1_shape.size());
}
axis_ += 4 - input_1_shape.size();

auto input_num = AnfAlgo::GetInputTensorNum(kernel_node);
for (size_t i = 0; i < input_num; i++) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i);
CPUKernelUtils::ExpandDimsTo4(&input_shape);
input_shape_list_.push_back(input_shape);
}

output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
CPUKernelUtils::ExpandDimsTo4(&output_shape_);
}

bool ConcatCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
auto buff_size = outputs[0]->size;
size_t dim0 = output_shape_[0];
size_t dim1 = output_shape_[1];
size_t dim2 = output_shape_[2];

if (axis_ == 3) {
for (size_t i = 0; i < dim0; ++i) {
for (size_t j = 0; j < dim1; ++j) {
for (size_t k = 0; k < dim2; ++k) {
CopyDataToOutput(inputs, i, j, k, &output_addr, &buff_size);
}
}
}
} else if (axis_ == 2) {
for (size_t i = 0; i < dim0; ++i) {
for (size_t j = 0; j < dim1; ++j) {
CopyDataToOutput(inputs, i, j, 0, &output_addr, &buff_size);
}
}
} else if (axis_ == 1) {
for (size_t i = 0; i < dim0; ++i) {
CopyDataToOutput(inputs, i, 0, 0, &output_addr, &buff_size);
}
} else if (axis_ == 0) {
CopyDataToOutput(inputs, 0, 0, 0, &output_addr, &buff_size);
}
return true;
}

void ConcatCPUKernel::CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1,
size_t dim2, float **output_addr, size_t *buff_size) {
for (size_t i = 0; i < input_shape_list_.size(); ++i) {
auto input_i_shape = input_shape_list_[i];
auto input_i_addr = reinterpret_cast<float *>(inputs[i]->addr);

size_t num = CPUKernelUtils::GetElementNumOnAxis(input_i_shape, axis_);
num *= input_i_shape[axis_];
auto pos = CPUKernelUtils::CalcOffset(input_i_shape, dim0, dim1, dim2, 0);
auto ret = memcpy_s(*output_addr, *buff_size, input_i_addr + pos, num * sizeof(float));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy failed.";
}
*output_addr += num;
*buff_size -= num * sizeof(float);
}
}

void ConcatCPUKernel::CheckParam(const CNodePtr &kernel_node) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.size() > 4) {
MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but ConcatCPUKernel olny support 4d or lower.";
}

size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ConcatCPUKernel needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

+ 50
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.h View File

@@ -0,0 +1,50 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class ConcatCPUKernel : public CPUKernel {
public:
ConcatCPUKernel() : axis_(0) {}
~ConcatCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
void CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2,
float **output_addr, size_t *buff_size);
int axis_;
std::vector<std::vector<size_t>> input_shape_list_;
std::vector<size_t> output_shape_;
};

MS_REG_CPU_KERNEL(Concat,
KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ConcatCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_

+ 80
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc View File

@@ -0,0 +1,80 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
namespace mindspore {
namespace kernel {
void CPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
size_t type_size = sizeof(float);
for (size_t input_index = 0; input_index < input_num; ++input_index) {
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, input_index);
size_t tensor_size =
shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies<size_t>());
input_size_list_.emplace_back(tensor_size);
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
for (size_t output_index = 0; output_index < output_num; ++output_index) {
std::vector<size_t> shape = AnfAlgo::GetOutputDeviceShape(kernel_node, output_index);
size_t tensor_size =
shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies<size_t>());
output_size_list_.emplace_back(tensor_size);
}
}
void CPUKernel::Init(const CNodePtr &kernel_node) {
InitKernel(kernel_node);
InitInputOutputSize(kernel_node);
}
void CPUKernelUtils::ExpandDimsTo4(std::vector<size_t> *shape) {
auto len = shape->size();
if (len < 4) {
for (size_t i = 0; i < 4 - len; ++i) {
shape->insert(shape->begin(), 1);
}
}
}
size_t CPUKernelUtils::CalcOffset(const std::vector<size_t> &shape, size_t dim0, size_t dim1, size_t dim2,
size_t dim3) {
size_t offset = dim0 * shape[1] * shape[2] * shape[3] + dim1 * shape[2] * shape[3] + dim2 * shape[3] + dim3;
return offset;
}
size_t CPUKernelUtils::GetElementNumOnAxis(const std::vector<size_t> &shape, int axis) {
if (axis < 0) {
axis = axis + SizeToInt(shape.size());
}
size_t result = 1;
for (int j = 3; j > axis; --j) {
result *= shape[j];
}
return result;
}
void CPUKernelUtils::GetElementNumEveryDim(const std::vector<size_t> &shape, std::vector<size_t> *element_num) {
size_t accumulation = 1;
element_num->emplace_back(1);
for (size_t i = shape.size() - 1; i > 0; --i) {
accumulation *= shape[i];
element_num->emplace_back(accumulation);
}
std::reverse(element_num->begin(), element_num->end());
}
} // namespace kernel
} // namespace mindspore

+ 87
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h View File

@@ -0,0 +1,87 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_
#include <string>
#include <vector>
#include <memory>
#include <numeric>
#include <functional>
#include "backend/kernel_compiler/kernel.h"
#include "ir/anf.h"
#include "backend/session/anf_runtime_algorithm.h"
using mindspore::kernel::Address;
using mindspore::kernel::AddressPtr;
namespace mindspore {
namespace kernel {
const char KSIZE[] = "ksize";
const char STRIDE[] = "stride";
const char STRIDES[] = "strides";
const char DILATION[] = "dilation";
const char PAD[] = "pad";
const char PAD_MODE[] = "pad_mode";
const char PADDING[] = "padding";
const char PAD_MODE_LOWER_SAME[] = "same";
const char PAD_MODE_LOWER_VALID[] = "valid";
const char PAD_MODE_UPPER_SAME[] = "SAME";
const char PAD_MODE_UPPER_VALID[] = "VALID";
const char TRANSPOSE_A[] = "transpose_a";
const char TRANSPOSE_B[] = "transpose_b";
const char IS_GRAD[] = "is_grad";
const char TRANSPOSE_NO = 'N';
const char TRANSPOSE_YES = 'T';
const char AXIS[] = "axis";
const char BEGIN[] = "begin";
const char END[] = "end";
const char SIZE[] = "size";
const char USE_NESTEROV[] = "use_nesterov";
class CPUKernel : public kernel::KernelMod {
public:
CPUKernel() = default;
~CPUKernel() override = default;
virtual void Init(const CNodePtr &kernel_node);
virtual void InitKernel(const CNodePtr &kernel_node) = 0;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void * /*stream_ptr*/) override {
return Launch(inputs, workspace, outputs);
};
virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) = 0;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
protected:
virtual void InitInputOutputSize(const CNodePtr &kernel_node);
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};
class CPUKernelUtils {
public:
static void ExpandDimsTo4(std::vector<size_t> *shape);
static size_t CalcOffset(const std::vector<size_t> &shape, size_t dim0, size_t dim1, size_t dim2, size_t dim3);
static size_t GetElementNumOnAxis(const std::vector<size_t> &shape, int axis);
static void GetElementNumEveryDim(const std::vector<size_t> &shape, std::vector<size_t> *element_num);
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_

+ 104
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc View File

@@ -0,0 +1,104 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

#include <memory>
#include <iostream>
#include <string>

#include "runtime/device/kernel_info.h"

namespace mindspore {
namespace kernel {
CPUKernelFactory &CPUKernelFactory::GetInstance() {
static CPUKernelFactory instance;
return instance;
}

void CPUKernelFactory::Register(const std::string &kernel_name, const KernelAttr &kernel_attr,
CPUKernelCreator &&kernel_creator) {
(void)name_to_attr_creator_[kernel_name].emplace_back(kernel_attr, kernel_creator);
#if !defined(_WIN32) && !defined(_WIN64)
MS_LOG(DEBUG) << "CPUKernelFactory register operator: " << kernel_name;
#endif
}

std::shared_ptr<CPUKernel> CPUKernelFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) {
auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info());
MS_EXCEPTION_IF_NULL(kernel_info);
const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info();
MS_EXCEPTION_IF_NULL(kernel_build_Info);
std::pair<bool, size_t> ret_pair = CPUKernelAttrCheck(kernel_name, *kernel_build_Info);
if (ret_pair.first) {
return (name_to_attr_creator_.find(kernel_name)->second)[ret_pair.second].second();
}
return nullptr;
}

std::pair<bool, size_t> CPUKernelFactory::CPUKernelAttrCheck(const std::string &kernel_name,
const KernelBuildInfo &kernel_info) {
auto iter = name_to_attr_creator_.find(kernel_name);
if (iter == name_to_attr_creator_.end()) {
MS_LOG(INFO) << "Not registered CPU kernel: op[" << kernel_name << "]!";
return std::make_pair(false, 0);
}
auto creators = iter->second;
for (size_t index = 0; index < creators.size(); ++index) {
auto attr_creator = creators[index];
if (CPUKernelSingleAttrCheck(attr_creator.first, kernel_info)) {
return std::make_pair(true, index);
}
}
return std::make_pair(false, 0);
}

bool CPUKernelFactory::CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info) {
for (size_t i = 0; i < kernel_info.GetInputNum(); ++i) {
auto dtype = kernel_attr.GetAllSame() ? kernel_attr.GetInputAttr(0).first : kernel_attr.GetInputAttr(i).first;
if (kernel_info.GetInputDeviceType(i) != dtype) {
MS_LOG(DEBUG) << "input index:" << i << ", kernel info type:" << kernel_info.GetInputDeviceType(i)
<< ", register type:" << dtype;
return false;
}
}
for (size_t i = 0; i < kernel_info.GetOutputNum(); ++i) {
auto dtype = kernel_attr.GetAllSame() ? kernel_attr.GetOutputAttr(0).first : kernel_attr.GetOutputAttr(i).first;
if (kernel_info.GetOutputDeviceType(i) != dtype) {
MS_LOG(DEBUG) << "output index:" << i << ", kernel info type:" << kernel_info.GetOutputDeviceType(i)
<< ", register type:" << dtype;
return false;
}
}
return true;
}

std::vector<KernelAttr> CPUKernelFactory::GetSupportedKernelAttrList(const std::string &kernel_name) {
std::vector<KernelAttr> result;
auto iter = name_to_attr_creator_.find(kernel_name);
if (iter == name_to_attr_creator_.end()) {
MS_LOG(WARNING) << "Not registered CPU kernel: op[" << kernel_name << "]!";
return result;
}
auto creators = iter->second;
for (size_t index = 0; index < creators.size(); ++index) {
auto attr_creator = creators[index];
result.push_back(attr_creator.first);
}
return result;
}
} // namespace kernel
} // namespace mindspore

+ 79
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.h View File

@@ -0,0 +1,79 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_

#include <functional>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "common/utils.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "runtime/device/cpu/kernel_select_cpu.h"

namespace mindspore {
namespace kernel {
using mindspore::device::cpu::KernelAttr;
using CPUKernelCreator = std::function<std::shared_ptr<CPUKernel>()>;
class CPUKernelFactory {
public:
static CPUKernelFactory &GetInstance();
void Register(const std::string &kernel_name, const KernelAttr &kernel_attr, CPUKernelCreator &&kernel_creator);
std::shared_ptr<CPUKernel> Create(const std::string &kernel_name, const CNodePtr &apply_kernel);
std::vector<KernelAttr> GetSupportedKernelAttrList(const std::string &kernel_name);

private:
CPUKernelFactory() = default;
~CPUKernelFactory() = default;
DISABLE_COPY_AND_ASSIGN(CPUKernelFactory)
std::pair<bool, size_t> CPUKernelAttrCheck(const std::string &kernel_name, const KernelBuildInfo &kernel_info);
bool CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info);
std::map<std::string, std::vector<std::pair<KernelAttr, CPUKernelCreator>>> name_to_attr_creator_;
};

class CPUKernelRegistrar {
public:
CPUKernelRegistrar(const std::string &kernel_name, const KernelAttr &kernel_attr, CPUKernelCreator &&kernel_creator) {
CPUKernelFactory::GetInstance().Register(kernel_name, kernel_attr, std::move(kernel_creator));
}
~CPUKernelRegistrar() = default;
};

#define MS_REG_CPU_KERNEL(OPNAME, ATTR, OPCLASS) MS_REG_CPU_KERNEL_(__COUNTER__, OPNAME, ATTR, OPCLASS)
#define MS_REG_CPU_KERNEL_(COUNT, OPNAME, ATTR, OPCLASS) _MS_REG_CPU_KERNEL_(COUNT, OPNAME, ATTR, OPCLASS)
#define _MS_REG_CPU_KERNEL_(COUNT, OPNAME, ATTR, OPCLASS) \
static_assert(std::is_base_of<CPUKernel, OPCLASS>::value, " must be base of CPUKernel"); \
static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_reg(#OPNAME, ATTR, \
[]() { return std::make_shared<OPCLASS>(); });

#define MS_REG_CPU_KERNEL_T(OPNAME, ATTR, OPCLASS, T) MS_REG_CPU_KERNEL_T_(__COUNTER__, OPNAME, ATTR, OPCLASS, T)
#define MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T)
#define _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) \
static_assert(std::is_base_of<CPUKernel, OPCLASS<T>>::value, " must be base of CPUKernel"); \
static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_##OPNAME##_##T##_reg( \
#OPNAME, ATTR, []() { return std::make_shared<OPCLASS<T>>(); });

#define MS_REG_CPU_KERNEL_T_S(OPNAME, ATTR, OPCLASS, T, S) \
static_assert(std::is_base_of<CPUKernel, OPCLASS<T, S>>::value, " must be base of CPUKernel"); \
static const CPUKernelRegistrar g_cpu_kernel_##OPNAME##_##T##_##S##_reg( \
#OPNAME, ATTR, []() { return std::make_shared<OPCLASS<T, S>>(); });
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_

+ 50
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.cc View File

@@ -0,0 +1,50 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/debug_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"
#ifdef ENABLE_DEBUGGER
#include "debug/debugger/debugger.h"
#endif

namespace mindspore {
namespace kernel {
void DebugCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); }

bool DebugCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 1 || outputs.empty()) {
MS_LOG(EXCEPTION) << " input or output empty!";
}
auto val = reinterpret_cast<float *>(inputs[0]->addr);
MS_LOG(DEBUG) << " launch DebugCountCPUKernel val " << *val;

auto output = reinterpret_cast<int *>(outputs[0]->addr);
size_t elem_num = inputs[0]->size / sizeof(int);
for (size_t i = 0; i < elem_num; i++) {
output[i] = val[i];
}

#ifdef ENABLE_DEBUGGER
// debugger will suspend execution is neccessary
Debugger::GetInstance()->PostDebugOp();
#endif

return true;
}
} // namespace kernel
} // namespace mindspore

+ 41
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/debug_cpu_kernel.h View File

@@ -0,0 +1,41 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class DebugCPUKernel : public CPUKernel {
public:
DebugCPUKernel() = default;
~DebugCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};

MS_REG_CPU_KERNEL(Debug, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32), DebugCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_

+ 78
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.cc View File

@@ -0,0 +1,78 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <thread>
#include "backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "runtime/device/cpu/mpi/mpi_adapter.h"

namespace mindspore {
namespace kernel {
void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
split_num_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "split_num");
MS_LOG(INFO) << "split_num: " << split_num_;
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape[0] % split_num_ != 0) {
MS_LOG(EXCEPTION) << "Input shape[0] is " << input_shape[0] << ", but it must be multiple of split_num.";
}
}

bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
#if defined(_WIN32) || defined(_WIN64)
auto start_time = std::chrono::steady_clock::now();
#else
struct timeval start_time, end_time;
(void)gettimeofday(&start_time, nullptr);
#endif
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
size_t input_size = inputs[0]->size;
size_t output_size = outputs[0]->size;
MS_LOG(DEBUG) << "input addr: " << input_addr << "input size: " << input_size;
MS_LOG(DEBUG) << "output addr: " << output_addr << "output size: " << output_size;
memset_s(output_addr, output_size, 0, output_size);
const std::vector<int> &rank_group = {0, 1, 2, 3, 4, 5, 6, 7};
size_t input_split_lens = input_size / split_num_ / sizeof(float_t);
size_t output_split_lens = output_size / split_num_ / sizeof(float_t);
auto mpi_instance = device::cpu::MPIAdapter::Instance();
MS_EXCEPTION_IF_NULL(mpi_instance);
for (int i = 0; i < split_num_; i++) {
mpi_instance->AllGather(input_addr + i * input_split_lens, output_addr + i * output_split_lens, rank_group,
input_split_lens);
}
#if defined(_WIN32) || defined(_WIN64)
auto end_time = std::chrono::steady_clock::now();
std::chrono::duration<double, std::ratio<1, 1000000>> cost = end_time - start_time;
MS_LOG(INFO) << "EmbeddingLookUpCommGradCPUKernel, used time: " << cost.count() << " us";
#else
(void)gettimeofday(&end_time, nullptr);
uint64_t time = 1000000 * static_cast<uint64_t>(end_time.tv_sec - start_time.tv_sec);
time += static_cast<uint64_t>(end_time.tv_usec - start_time.tv_usec);
MS_LOG(INFO) << "EmbeddingLookUpCommGradCPUKernel, used time: " << time << " us";
#endif
return true;
}

void EmbeddingLookUpCommGradCPUKernel::CheckParam(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCommGradCPUKernel needs 1.";
}
}
} // namespace kernel
} // namespace mindspore

+ 46
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h View File

@@ -0,0 +1,46 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class EmbeddingLookUpCommGradCPUKernel : public CPUKernel {
public:
EmbeddingLookUpCommGradCPUKernel() : split_num_(1) {}
~EmbeddingLookUpCommGradCPUKernel() override{};

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
void CheckParam(const CNodePtr &kernel_node);
int split_num_;
};

MS_REG_CPU_KERNEL(EmbeddingLookupCommGrad,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EmbeddingLookUpCommGradCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_

+ 212
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc View File

@@ -0,0 +1,212 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <thread>
#include <string>
#include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "runtime/device/cpu/mpi/mpi_adapter.h"
#include "ir/primitive.h"

namespace mindspore {
namespace kernel {
void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
input_lens_ = 1;
for (auto shape : input_shape_) {
input_lens_ = input_lens_ * shape;
}
indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
indices_lens_ = 1;
for (auto shape : indices_shape_) {
indices_lens_ = indices_lens_ * shape;
}
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
axis_ = 4 - input_shape_.size();
if (AnfAlgo::HasNodeAttr(kAttrReduceScatterFlag, kernel_node)) {
reduce_scatter_flag_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, kAttrReduceScatterFlag);
}
#ifdef ENABLE_MPI
if (reduce_scatter_flag_) {
size_t gatherv2_out_lens = 1;
for (int i = 0; i < SizeToInt(input_shape_.size()); i++) {
if (i == 0) {
for (int j = 0; j < SizeToInt(indices_shape_.size()); j++) {
gatherv2_out_lens = gatherv2_out_lens * indices_shape_[j];
}
} else {
gatherv2_out_lens = gatherv2_out_lens * input_shape_[i];
}
}
gatherv2_out_lens_ = gatherv2_out_lens * sizeof(float);
gather_v2_out_ = malloc(gatherv2_out_lens_);
if (gather_v2_out_ == nullptr) {
MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel malloc failed, malloc lens: " << gatherv2_out_lens_;
}
auto ret = memset_s(gather_v2_out_, gatherv2_out_lens_, 0, gatherv2_out_lens_);
if (ret != 0) {
MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel memset gatherv2 out buff failed";
}
split_num_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "split_num");
}
#else
if (reduce_scatter_flag_) {
MS_LOG(EXCEPTION) << "Not Enable MPI, please build version with -M on when set reduce_scatter_flag true";
}
#endif
if (AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) {
offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, kAttrOffset);
}
CPUKernelUtils::ExpandDimsTo4(&input_shape_);
CPUKernelUtils::ExpandDimsTo4(&output_shape_);
}

bool EmbeddingLookUpCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
float *gather_out_addr = reduce_scatter_flag_ ? reinterpret_cast<float *>(gather_v2_out_) : output_addr;
size_t dim0 = input_shape_[0];
size_t dim1 = input_shape_[1];
size_t dim2 = input_shape_[2];
if (axis_ == 3) {
for (size_t i = 0; i < dim0; ++i) {
for (size_t j = 0; j < dim1; ++j) {
for (size_t k = 0; k < dim2; ++k) {
LookUpTable(inputs, i, j, k, &gather_out_addr);
}
}
}
} else if (axis_ == 2) {
for (size_t i = 0; i < dim0; ++i) {
for (size_t j = 0; j < dim1; ++j) {
LookUpTable(inputs, i, j, 0, &gather_out_addr);
}
}
} else if (axis_ == 1) {
for (size_t i = 0; i < dim0; ++i) {
LookUpTable(inputs, i, 0, 0, &gather_out_addr);
}
} else if (axis_ == 0) {
LookUpTable(inputs, 0, 0, 0, &gather_out_addr);
}
#ifdef ENABLE_MPI
if (reduce_scatter_flag_) {
size_t one_split_lens = gatherv2_out_lens_ / split_num_ / sizeof(float);
size_t reduce_scatter_out_lens = one_split_lens / 8;
const std::vector<int> &group = {0, 1, 2, 3, 4, 5, 6, 7};
auto mpi_instance = device::cpu::MPIAdapter::Instance();
MS_EXCEPTION_IF_NULL(mpi_instance);
for (int i = 0; i < split_num_; i++) {
mpi_instance->ReduceScatter(reinterpret_cast<float *>(gather_v2_out_) + i * one_split_lens,
output_addr + i * reduce_scatter_out_lens, group, one_split_lens / 8, "sum");
}
}
#endif
return true;
}

void LookUpTable_task(const float *input_addr, float *output_addr, const int *indices_addr, size_t indices_lens,
size_t num, size_t dim0, size_t dim1, size_t dim2, int offset, size_t axis,
std::vector<size_t> input_shape, size_t input_lens) {
size_t lens = num * sizeof(float);
for (size_t i = 0; i < indices_lens; ++i) {
int indices = indices_addr[i] - offset;
if (indices >= 0) {
size_t index = IntToSize(indices);
if (index < input_shape[axis]) {
size_t pos = 0;
if (axis == 3) {
pos = CPUKernelUtils::CalcOffset(input_shape, dim0, dim1, dim2, index);
} else if (axis == 2) {
pos = CPUKernelUtils::CalcOffset(input_shape, dim0, dim1, index, 0);
} else if (axis == 1) {
pos = CPUKernelUtils::CalcOffset(input_shape, dim0, index, 0, 0);
} else if (axis == 0) {
pos = CPUKernelUtils::CalcOffset(input_shape, index, 0, 0, 0);
}
if (pos + num <= input_lens) {
auto ret = memcpy_s(output_addr, lens, input_addr + pos, lens);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "LookUpTable task memcpy failed.";
}
} else {
auto ret = memset_s(output_addr, lens, 0, lens);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "LookUpTable task memset failed.";
}
}
} else {
auto ret = memset_s(output_addr, lens, 0, lens);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "LookUpTable task memset failed.";
}
}
} else {
auto ret = memset_s(output_addr, lens, 0, lens);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "LookUpTable task memset failed.";
}
}
output_addr += num;
}
}

void EmbeddingLookUpCPUKernel::LookUpTable(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1,
size_t dim2, float **output_addr) {
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr);
size_t num = CPUKernelUtils::GetElementNumOnAxis(input_shape_, axis_);
float *task_out_addr = *output_addr;
const size_t thread_num = 8;
std::thread threads[8];
size_t task_proc_lens = (indices_lens_ + thread_num - 1) / thread_num;
size_t i;
size_t task_offset = 0;
MS_LOG(DEBUG) << "indices_lens_: " << indices_lens_ << " one task proc lens:" << task_proc_lens;
for (i = 0; i < thread_num; i++) {
if (task_offset >= indices_lens_) {
break;
}
MS_LOG(DEBUG) << "task_offset: " << task_offset << " task_proc_lenss:" << task_proc_lens;
threads[i] =
std::thread(LookUpTable_task, input_addr, task_out_addr + task_offset * num, indices_addr + task_offset,
task_proc_lens, num, dim0, dim1, dim2, offset_, axis_, input_shape_, input_lens_);
task_offset += task_proc_lens;
if (task_offset + task_proc_lens > indices_lens_) {
task_proc_lens = indices_lens_ - task_offset;
}
}
for (size_t j = 0; j < i; j++) {
threads[j].join();
}
*output_addr += num * indices_lens_;
}

void EmbeddingLookUpCPUKernel::CheckParam(const CNodePtr &kernel_node) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.size() > 4) {
MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size()
<< ", but EmbeddingLookUpCPUKernel olny support 4d or lower.";
}

size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCPUKernel needs 2.";
}
}
} // namespace kernel
} // namespace mindspore

+ 74
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h View File

@@ -0,0 +1,74 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class EmbeddingLookUpCPUKernel : public CPUKernel {
public:
EmbeddingLookUpCPUKernel() {
axis_ = 0;
offset_ = 0;
split_num_ = 0;
input_lens_ = 0;
indices_lens_ = 0;
gatherv2_out_lens_ = 0;
reduce_scatter_flag_ = false;
gather_v2_out_ = nullptr;
}
~EmbeddingLookUpCPUKernel() override {
if (gather_v2_out_ != nullptr) {
free(gather_v2_out_);
gather_v2_out_ = nullptr;
}
}

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
void LookUpTable(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2,
float **output_addr);
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> input_shape_;
std::vector<size_t> indices_shape_;
std::vector<size_t> output_shape_;
int axis_;
int offset_;
int split_num_;
size_t input_lens_;
size_t indices_lens_;
size_t gatherv2_out_lens_;
bool reduce_scatter_flag_;

void *gather_v2_out_;
};

MS_REG_CPU_KERNEL(
EmbeddingLookup,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
EmbeddingLookUpCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_

+ 46
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.cc View File

@@ -0,0 +1,46 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/equal_count_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
void EqualCountCPUKernel::InitKernel(const CNodePtr & /*kernel_node*/) {}

bool EqualCountCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "input or output empty!";
}
if (inputs[0]->size != inputs[1]->size) {
MS_LOG(EXCEPTION) << "input or output size!";
}
int count = 0;
auto left = reinterpret_cast<int *>(inputs[0]->addr);
auto right = reinterpret_cast<int *>(inputs[1]->addr);
size_t elem_num = inputs[0]->size / sizeof(int);
for (size_t i = 0; i < elem_num; i++) {
if (left[i] == right[i]) {
count++;
}
}
auto output = reinterpret_cast<int *>(outputs[0]->addr);
output[0] = count;
return true;
}
} // namespace kernel
} // namespace mindspore

+ 43
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/equal_count_cpu_kernel.h View File

@@ -0,0 +1,43 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class EqualCountCPUKernel : public CPUKernel {
public:
EqualCountCPUKernel() = default;
~EqualCountCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};

MS_REG_CPU_KERNEL(
EqualCount,
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
EqualCountCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_

+ 115
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.cc View File

@@ -0,0 +1,115 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/gather_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
void GatherV2CPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
axis_ = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS);
if (axis_ < 0) {
axis_ = axis_ + SizeToInt(input_shape_.size());
}
axis_ += 4 - input_shape_.size();
CPUKernelUtils::ExpandDimsTo4(&input_shape_);
CPUKernelUtils::ExpandDimsTo4(&output_shape_);
}

bool GatherV2CPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
auto buff_size = outputs[0]->size;
size_t dim0 = input_shape_[0];
size_t dim1 = input_shape_[1];
size_t dim2 = input_shape_[2];
if (axis_ == 3) {
for (size_t i = 0; i < dim0; ++i) {
for (size_t j = 0; j < dim1; ++j) {
for (size_t k = 0; k < dim2; ++k) {
CopyDataToOutput(inputs, i, j, k, &output_addr, &buff_size);
}
}
}
} else if (axis_ == 2) {
for (size_t i = 0; i < dim0; ++i) {
for (size_t j = 0; j < dim1; ++j) {
CopyDataToOutput(inputs, i, j, 0, &output_addr, &buff_size);
}
}
} else if (axis_ == 1) {
for (size_t i = 0; i < dim0; ++i) {
CopyDataToOutput(inputs, i, 0, 0, &output_addr, &buff_size);
}
} else if (axis_ == 0) {
CopyDataToOutput(inputs, 0, 0, 0, &output_addr, &buff_size);
}
return true;
}

void GatherV2CPUKernel::CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1,
size_t dim2, float **output_addr, size_t *buff_size) {
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr);
size_t elem_num = inputs[1]->size / 4;
size_t num = CPUKernelUtils::GetElementNumOnAxis(input_shape_, axis_);
for (size_t i = 0; i < elem_num; ++i) {
if (indices_addr[i] < 0) {
MS_LOG(EXCEPTION) << "The indices value is less than 0.";
}
size_t index = IntToSize(indices_addr[i]);
if (index >= input_shape_[IntToSize(axis_)]) {
auto ret = memset_s(*output_addr, *buff_size, 0., num * sizeof(float));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memset failed.";
}
} else {
size_t pos = 0;
if (axis_ == 3) {
pos = CPUKernelUtils::CalcOffset(input_shape_, dim0, dim1, dim2, index);
} else if (axis_ == 2) {
pos = CPUKernelUtils::CalcOffset(input_shape_, dim0, dim1, index, 0);
} else if (axis_ == 1) {
pos = CPUKernelUtils::CalcOffset(input_shape_, dim0, index, 0, 0);
} else if (axis_ == 0) {
pos = CPUKernelUtils::CalcOffset(input_shape_, index, 0, 0, 0);
}
auto ret = memcpy_s(*output_addr, *buff_size, input_addr + pos, num * sizeof(float));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy failed.";
}
}
*output_addr += num;
*buff_size -= num * sizeof(float);
}
} // namespace kernel

void GatherV2CPUKernel::CheckParam(const CNodePtr &kernel_node) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (input_shape.size() > 4) {
MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but GatherV2CPUKernel olny support 4d or lower.";
}
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but GatherV2CPUKernel needs 2.";
}
}
} // namespace kernel
} // namespace mindspore

+ 52
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/gather_cpu_kernel.h View File

@@ -0,0 +1,52 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class GatherV2CPUKernel : public CPUKernel {
public:
GatherV2CPUKernel() : axis_(0) {}
~GatherV2CPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
void CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2,
float **output_addr, size_t *buff_size);
void CheckParam(const CNodePtr &kernel_node);
std::vector<size_t> input_shape_;
std::vector<size_t> indices_shape_;
std::vector<size_t> output_shape_;
int axis_;
};

MS_REG_CPU_KERNEL(
GatherV2,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
GatherV2CPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_

+ 91
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc View File

@@ -0,0 +1,91 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h"
#include <string>
#include "common/utils.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
if (src_shape.size() != 4 || weight_shape.size() != 4) {
MS_LOG(EXCEPTION) << "conv2d only support nchw input!";
}
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);

int kernel_size = SizeToInt(weight_shape[3]);
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) {
MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!";
}
if (stride_ori[0] != 1 || stride_ori[1] != 1) {
MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!";
}
if (dilation_ori.size() != 4 || dilation_ori[2] != 1 || dilation_ori[3] != 1) {
MS_LOG(EXCEPTION) << "conv2d dilation only support 1, and dilation must be 4d!";
}
if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!";
}
int stride = stride_ori[2];
int dilation = dilation_ori[2];

dnnl::memory::dims strides{stride, stride};
dnnl::memory::dims dilates{dilation - 1, dilation - 1};
std::vector<int> int_padding_l;
std::vector<int> int_padding_r;

const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
MS_LOG(EXCEPTION) << "get padding failed";
}
dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]};
dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]};
dnnl::convolution_forward::desc desc =
dnnl::convolution_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_auto, src_desc,
weights_desc, dst_desc, strides, dilates, padding_l, padding_r);

auto prim_desc = dnnl::convolution_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
primitive_ = std::make_shared<dnnl::convolution_forward>(prim_desc);

AddArgument(DNNL_ARG_SRC, src_desc);
AddArgument(DNNL_ARG_WEIGHTS, weights_desc);
AddArgument(DNNL_ARG_DST, dst_desc);
}

bool Conv2dCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "error input output size!";
}
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();
return true;
}
} // namespace kernel
} // namespace mindspore

+ 43
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h View File

@@ -0,0 +1,43 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"

namespace mindspore {
namespace kernel {
class Conv2dCPUKernel : public MKLCPUKernel {
public:
Conv2dCPUKernel() = default;
~Conv2dCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};

MS_REG_CPU_KERNEL(
Conv2D,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
Conv2dCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_

+ 93
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc View File

@@ -0,0 +1,93 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
#include <string>
#include "common/utils.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> weight_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (src_shape.size() != 4 || weight_shape.size() != 4) {
MS_LOG(EXCEPTION) << ("conv2d grad filter only support nchw input!");
}
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);

int kernel_size = SizeToInt(weight_shape[3]);
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel only support equal stride, and stride must be 2d!";
}
if (dilation_ori.size() != 4 || dilation_ori[2] != 1 || dilation_ori[3] != 1) {
MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel dilation only support 1, and dilation must be 4d!";
}
if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel dilation only support 1 in N axis and C axis!";
}
int stride = stride_ori[0];
int dilation = dilation_ori[2];

dnnl::memory::dims strides{stride, stride};
dnnl::memory::dims dilates{dilation - 1, dilation - 1};
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
std::vector<int> int_padding_l;
std::vector<int> int_padding_r;
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
MS_LOG(EXCEPTION) << "get padding failed";
}
dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]};
dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]};
dnnl::convolution_forward::desc forward_desc =
dnnl::convolution_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_auto, src_desc,
weights_desc, dst_desc, strides, dilates, padding_l, padding_r);

auto forward_prim_desc = dnnl::convolution_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine());

dnnl::convolution_backward_weights::desc backward_desc = dnnl::convolution_backward_weights::desc(
dnnl::algorithm::convolution_auto, src_desc, weights_desc, dst_desc, strides, dilates, padding_l, padding_r);

auto backward_prim_desc = dnnl::convolution_backward_weights::primitive_desc(
backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc);
primitive_ = std::make_shared<dnnl::convolution_backward_weights>(backward_prim_desc);

AddArgument(DNNL_ARG_SRC, src_desc);
AddArgument(DNNL_ARG_DIFF_DST, dst_desc);
AddArgument(DNNL_ARG_DIFF_WEIGHTS, weights_desc);
}

bool Conv2dGradFilterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "error input output size!";
}
SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS, outputs[0]->addr);
ExecutePrimitive();
return true;
}
} // namespace kernel
} // namespace mindspore

+ 43
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h View File

@@ -0,0 +1,43 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"

namespace mindspore {
namespace kernel {
class Conv2dGradFilterCPUKernel : public MKLCPUKernel {
public:
Conv2dGradFilterCPUKernel() = default;
~Conv2dGradFilterCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};

MS_REG_CPU_KERNEL(
Conv2DBackpropFilter,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
Conv2dGradFilterCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_

+ 92
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc View File

@@ -0,0 +1,92 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
#include <string>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"

namespace mindspore {
namespace kernel {
void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (src_shape.size() != 4 || weight_shape.size() != 4) {
MS_LOG(EXCEPTION) << "conv2d grad filter only support nchw input!";
}
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);

int kernel_size = SizeToInt(weight_shape[3]);
auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE);
auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION);
if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel only support equal stride, and stride must be 2d!";
}
if (dilation_ori.size() != 4 || dilation_ori[2] != 1 || dilation_ori[3] != 1) {
MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel dilation only support 1, and dilation must be 4d!";
}
if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel dilation only support 1 in N axis and C axis!";
}
int stride = stride_ori[0];
int dilation = dilation_ori[2];
dnnl::memory::dims strides{stride, stride};
dnnl::memory::dims dilates{dilation - 1, dilation - 1};
std::vector<int> int_padding_l;
std::vector<int> int_padding_r;
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);
GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r);
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
MS_LOG(EXCEPTION) << "conv2d grad get padding failed";
}
dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]};
dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]};
dnnl::convolution_forward::desc forward_desc =
dnnl::convolution_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_auto, src_desc,
weights_desc, dst_desc, strides, dilates, padding_l, padding_r);

auto forward_prim_desc = dnnl::convolution_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine());

dnnl::convolution_backward_data::desc backward_desc = dnnl::convolution_backward_data::desc(
dnnl::algorithm::convolution_auto, src_desc, weights_desc, dst_desc, strides, dilates, padding_l, padding_r);

auto backward_prim_desc =
dnnl::convolution_backward_data::primitive_desc(backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc);
primitive_ = std::make_shared<dnnl::convolution_backward_data>(backward_prim_desc);

AddArgument(DNNL_ARG_DIFF_SRC, src_desc);
AddArgument(DNNL_ARG_DIFF_DST, dst_desc);
AddArgument(DNNL_ARG_WEIGHTS, weights_desc);
}

bool Conv2dGradInputCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "error input output size!";
}
SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SRC, outputs[0]->addr);
ExecutePrimitive();
return true;
}
} // namespace kernel
} // namespace mindspore

+ 43
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h View File

@@ -0,0 +1,43 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"

namespace mindspore {
namespace kernel {
class Conv2dGradInputCPUKernel : public MKLCPUKernel {
public:
Conv2dGradInputCPUKernel() = default;
~Conv2dGradInputCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};

MS_REG_CPU_KERNEL(
Conv2DBackpropInput,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
Conv2dGradInputCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_

+ 141
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.cc View File

@@ -0,0 +1,141 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h"
#include <string>
#include "common/utils.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) {
#ifdef PLATFORM_86
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
#endif
MS_EXCEPTION_IF_NULL(kernel_node);
using tag = dnnl::memory::format_tag;
using dim = dnnl::memory::dims;
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias");
batch_size_ = SizeToInt(src_shape[1]);
seq_len_ = SizeToInt(src_shape[0]);
num_directions_ = 1;
if (bidirectional_) {
num_directions_ = 2;
}
if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) {
MS_LOG(EXCEPTION) << "error iteration shape!";
}
if (num_layers_ <= 0) {
MS_LOG(EXCEPTION) << "layers must be greater than zero!";
}
if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) {
MS_LOG(EXCEPTION) << "conv2d only support 3-D input!";
}
const int gate_size = 4 * hidden_size_;
for (int i = 0; i < num_layers_; ++i) {
weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);
weight_h_size_ += gate_size * hidden_size_;
}
weight_size_ = weight_size_ * num_directions_;
weight_h_size_ = weight_h_size_ * num_directions_;
auto eng = MKLKernelEngine::Get().engine();
dnnl::stream s(eng);
dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional;
if (bidirectional_) {
direction = dnnl::rnn_direction::bidirectional_concat;
}
dim src_dims = {seq_len_, batch_size_, input_size_};
dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_};
weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_};
bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_};
dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_};
dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc);
dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc);
dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc);
dnnl::memory::desc bias_desc = formatted_md(bias_dims_, tag::ldgo);
dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc);
dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc);
dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc);
auto desc = std::make_shared<dnnl::lstm_forward::desc>(dnnl::prop_kind::forward_training, direction, src_desc,
src_h_desc, src_c_desc, formatted_md(weights_dims_, tag::any),
formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc,
dst_h_desc, dst_c_desc);
prim_desc_ = dnnl::lstm_forward::primitive_desc(*desc, eng);
primitive_ = std::make_shared<dnnl::lstm_forward>(prim_desc_);
AddArgument(DNNL_ARG_SRC_LAYER, src_desc);
AddArgument(DNNL_ARG_SRC_ITER, src_h_desc);
AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc);
AddArgument(DNNL_ARG_WEIGHTS_LAYER, prim_desc_.weights_layer_desc());
AddArgument(DNNL_ARG_WEIGHTS_ITER, prim_desc_.weights_iter_desc());
AddArgument(DNNL_ARG_BIAS, bias_desc);
AddArgument(DNNL_ARG_DST_LAYER, dst_desc);
AddArgument(DNNL_ARG_DST_ITER, dst_h_desc);
AddArgument(DNNL_ARG_DST_ITER_C, dst_c_desc);
AddArgument(DNNL_ARG_WORKSPACE, prim_desc_.workspace_desc());
}
bool LstmCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
using dt = dnnl::memory::data_type;
using tag = dnnl::memory::format_tag;
auto eng = MKLKernelEngine::Get().engine();
auto user_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng);
auto user_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng);
auto weights_memory = dnnl::memory(prim_desc_.weights_layer_desc(), eng);
auto weights_h_memory = dnnl::memory(prim_desc_.weights_iter_desc(), eng);
user_weights_memory.set_data_handle(inputs[3]->addr);
user_weights_h_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_);
Reorder(&user_weights_memory, &weights_memory);
Reorder(&user_weights_h_memory, &weights_h_memory);
auto bias_memory = dnnl::memory(prim_desc_.bias_desc(), eng);
if (has_bias_) {
bias_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_);
} else {
auto ret =
memset_s(bias_memory.get_data_handle(), prim_desc_.bias_desc().get_size(), 0, prim_desc_.bias_desc().get_size());
if (ret != 0) {
MS_LOG(EXCEPTION) << "bias memset error";
}
}
// set handle
SetArgumentHandle(DNNL_ARG_SRC_LAYER, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_SRC_ITER, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_SRC_ITER_C, inputs[2]->addr);
SetArgumentHandle(DNNL_ARG_WEIGHTS_LAYER, weights_memory.get_data_handle());
SetArgumentHandle(DNNL_ARG_WEIGHTS_ITER, weights_h_memory.get_data_handle());
SetArgumentHandle(DNNL_ARG_BIAS, bias_memory.get_data_handle());
SetArgumentHandle(DNNL_ARG_DST_LAYER, outputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DST_ITER, outputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DST_ITER_C, outputs[2]->addr);
SetArgumentHandle(DNNL_ARG_WORKSPACE, outputs[3]->addr);
ExecutePrimitive();
return true;
}
} // namespace kernel
} // namespace mindspore

+ 70
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h View File

@@ -0,0 +1,70 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H_
#if defined(__x86_64__) || defined(__amd64__) || defined(_M_IX86) || defined(_M_X64)
#define PLATFORM_86
#endif
#ifdef PLATFORM_86
#include <pmmintrin.h>
#endif
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class LstmCPUKernel : public MKLCPUKernel {
public:
LstmCPUKernel() = default;
~LstmCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
int weight_size_ = 0;
int weight_h_size_ = 0;
int input_size_;
int hidden_size_;
int num_layers_;
int batch_size_;
int seq_len_;
int num_directions_;
bool bidirectional_;
bool has_bias_;
dnnl::memory::dims weights_dims_;
dnnl::memory::dims weights_h_dims_;
dnnl::memory::dims bias_dims_;
dnnl::lstm_forward::primitive_desc prim_desc_;
};
MS_REG_CPU_KERNEL(LSTM,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
LstmCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H

+ 196
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.cc View File

@@ -0,0 +1,196 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h"
#include <cstring>
#include <cmath>
#include <numeric>
#include <string>
#include "common/utils.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
void LSTMGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
using tag = dnnl::memory::format_tag;
using dim = dnnl::memory::dims;
auto eng = MKLKernelEngine::Get().engine();
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias");
batch_size_ = SizeToInt(src_shape[1]);
seq_len_ = SizeToInt(src_shape[0]);
num_directions_ = 1;
if (bidirectional_) {
num_directions_ = 2;
}
if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) {
MS_LOG(EXCEPTION) << "error iteration shape!";
}
if (num_layers_ <= 0) {
MS_LOG(EXCEPTION) << "layers must be greater than zero!";
}
if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) {
MS_LOG(EXCEPTION) << "conv2d only support 3-D input!";
}
const int gate_size = 4 * hidden_size_;
for (int i = 0; i < num_layers_; ++i) {
weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);
weight_h_size_ += gate_size * hidden_size_;
}
weight_size_ = weight_size_ * num_directions_;
weight_h_size_ = weight_h_size_ * num_directions_;
dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional;
if (bidirectional_) {
direction = dnnl::rnn_direction::bidirectional_concat;
}
dim src_dims = {seq_len_, batch_size_, input_size_};
dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_};
weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_};
bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_};
dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_};
dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc);
dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc);
dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc);
dnnl::memory::desc bias_desc = formatted_md(bias_dims_, tag::ldgo);
dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc);
dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc);
dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc);
auto forward_desc = std::make_shared<dnnl::lstm_forward::desc>(
dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc,
formatted_md(weights_dims_, tag::any), formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, dst_h_desc,
dst_c_desc);
auto prim_forward_desc = dnnl::lstm_forward::primitive_desc(*forward_desc, eng);
auto backward_desc = std::make_shared<dnnl::lstm_backward::desc>(
dnnl::prop_kind::backward, direction, src_desc, src_h_desc, src_c_desc, formatted_md(weights_dims_, tag::any),
formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, dst_h_desc, dst_c_desc, src_desc, src_h_desc,
src_c_desc, formatted_md(weights_dims_, tag::any), formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc,
dst_h_desc, dst_c_desc);
prim_backward_desc_ = dnnl::lstm_backward::primitive_desc(*backward_desc, eng, prim_forward_desc);
primitive_ = std::make_shared<dnnl::lstm_backward>(prim_backward_desc_);
AddArgument(DNNL_ARG_SRC_LAYER, src_desc);
AddArgument(DNNL_ARG_SRC_ITER, src_h_desc);
AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc);
AddArgument(DNNL_ARG_WEIGHTS_LAYER, prim_backward_desc_.weights_layer_desc());
AddArgument(DNNL_ARG_WEIGHTS_ITER, prim_backward_desc_.weights_iter_desc());
AddArgument(DNNL_ARG_BIAS, bias_desc);
AddArgument(DNNL_ARG_DST_LAYER, dst_desc);
AddArgument(DNNL_ARG_DST_ITER, dst_h_desc);
AddArgument(DNNL_ARG_DST_ITER_C, dst_c_desc);
AddArgument(DNNL_ARG_WORKSPACE, prim_forward_desc.workspace_desc());
AddArgument(DNNL_ARG_DIFF_SRC_LAYER, src_desc);
AddArgument(DNNL_ARG_DIFF_SRC_ITER, src_h_desc);
AddArgument(DNNL_ARG_DIFF_SRC_ITER_C, src_c_desc);
AddArgument(DNNL_ARG_DIFF_WEIGHTS_LAYER, prim_backward_desc_.diff_weights_layer_desc());
AddArgument(DNNL_ARG_DIFF_WEIGHTS_ITER, prim_backward_desc_.diff_weights_iter_desc());
AddArgument(DNNL_ARG_DIFF_BIAS, bias_desc);
AddArgument(DNNL_ARG_DIFF_DST_LAYER, dst_desc);
AddArgument(DNNL_ARG_DIFF_DST_ITER, dst_h_desc);
AddArgument(DNNL_ARG_DIFF_DST_ITER_C, dst_c_desc);
}
bool LSTMGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
using dt = dnnl::memory::data_type;
using tag = dnnl::memory::format_tag;
auto eng = MKLKernelEngine::Get().engine();
// construct fw memory
auto user_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng);
auto user_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng);
auto weights_memory = dnnl::memory(prim_backward_desc_.weights_layer_desc(), eng);
auto weights_h_memory = dnnl::memory(prim_backward_desc_.weights_iter_desc(), eng);
auto bias_memory = dnnl::memory(prim_backward_desc_.bias_desc(), eng);
user_weights_memory.set_data_handle(inputs[3]->addr);
user_weights_h_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_);
Reorder(&user_weights_memory, &weights_memory);
Reorder(&user_weights_h_memory, &weights_h_memory);
if (has_bias_) {
bias_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_);
} else {
if (memset_s(bias_memory.get_data_handle(), prim_backward_desc_.bias_desc().get_size(), 0,
prim_backward_desc_.bias_desc().get_size())) {
MS_LOG(EXCEPTION) << "bias memset error";
}
}
// construct bw memory
auto diff_weights_memory = dnnl::memory(prim_backward_desc_.diff_weights_layer_desc(), eng);
auto diff_weights_h_memory = dnnl::memory(prim_backward_desc_.diff_weights_iter_desc(), eng);
auto diff_bias_memory = dnnl::memory(prim_backward_desc_.diff_bias_desc(), eng);
auto user_diff_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng);
auto user_diff_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng);
user_diff_weights_memory.set_data_handle(outputs[3]->addr);
user_diff_weights_h_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_);
if (memset_s(user_diff_weights_memory.get_data_handle(), user_diff_weights_memory.get_desc().get_size(), 0,
user_diff_weights_memory.get_desc().get_size())) {
MS_LOG(EXCEPTION) << "user weights grad memset error";
}
if (memset_s(user_diff_weights_h_memory.get_data_handle(), user_diff_weights_h_memory.get_desc().get_size(), 0,
user_diff_weights_h_memory.get_desc().get_size())) {
MS_LOG(EXCEPTION) << "user weights iter grad memset error";
}
if (has_bias_) {
diff_bias_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_ + weight_h_size_);
}
if (memset_s(diff_bias_memory.get_data_handle(), prim_backward_desc_.diff_bias_desc().get_size(), 0,
prim_backward_desc_.diff_bias_desc().get_size())) {
MS_LOG(EXCEPTION) << "bias grad memset error";
}
if (memset_s(diff_weights_memory.get_data_handle(), diff_weights_memory.get_desc().get_size(), 0,
diff_weights_memory.get_desc().get_size())) {
MS_LOG(EXCEPTION) << "weights grad memset error";
}
if (memset_s(diff_weights_h_memory.get_data_handle(), diff_weights_h_memory.get_desc().get_size(), 0,
diff_weights_h_memory.get_desc().get_size())) {
MS_LOG(EXCEPTION) << "weights iter grad memset error";
}
SetArgumentHandle(DNNL_ARG_SRC_LAYER, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_SRC_ITER, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_SRC_ITER_C, inputs[2]->addr);
SetArgumentHandle(DNNL_ARG_WEIGHTS_LAYER, weights_memory.get_data_handle());
SetArgumentHandle(DNNL_ARG_WEIGHTS_ITER, weights_h_memory.get_data_handle());
SetArgumentHandle(DNNL_ARG_BIAS, bias_memory.get_data_handle());
SetArgumentHandle(DNNL_ARG_DST_LAYER, inputs[4]->addr);
SetArgumentHandle(DNNL_ARG_DST_ITER, inputs[5]->addr);
SetArgumentHandle(DNNL_ARG_DST_ITER_C, inputs[6]->addr);
SetArgumentHandle(DNNL_ARG_WORKSPACE, inputs[10]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SRC_LAYER, outputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SRC_ITER, outputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SRC_ITER_C, outputs[2]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS_LAYER, diff_weights_memory.get_data_handle());
SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS_ITER, diff_weights_h_memory.get_data_handle());
SetArgumentHandle(DNNL_ARG_DIFF_BIAS, diff_bias_memory.get_data_handle());
SetArgumentHandle(DNNL_ARG_DIFF_DST_LAYER, inputs[7]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_DST_ITER, inputs[8]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_DST_ITER_C, inputs[9]->addr);
ExecutePrimitive();
Reorder(&diff_weights_memory, &user_diff_weights_memory);
Reorder(&diff_weights_h_memory, &user_diff_weights_h_memory);
return true;
}
} // namespace kernel
} // namespace mindspore

+ 71
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h View File

@@ -0,0 +1,71 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class LSTMGradCPUKernel : public MKLCPUKernel {
public:
LSTMGradCPUKernel() = default;
~LSTMGradCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
int weight_size_ = 0;
int weight_h_size_ = 0;
int input_size_;
int hidden_size_;
int num_layers_;
int batch_size_;
int seq_len_;
int num_directions_;
bool bidirectional_;
bool has_bias_;
dnnl::memory::dims weights_dims_;
dnnl::memory::dims weights_h_dims_;
dnnl::memory::dims bias_dims_;
dnnl::lstm_backward::primitive_desc prim_backward_desc_;
};
MS_REG_CPU_KERNEL(LSTMGrad,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
LSTMGradCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_

+ 71
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc View File

@@ -0,0 +1,71 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h"
#include <algorithm>
#include <utility>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "common/utils.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
void MatMulCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);

if (src_shape.size() != 2 || weight_shape.size() != 2 || dst_shape.size() != 2) {
MS_LOG(EXCEPTION) << "matmul invalid input size";
}
bool trans_a = AnfAlgo::GetNodeAttr<bool>(kernel_node, TRANSPOSE_A);
bool trans_b = AnfAlgo::GetNodeAttr<bool>(kernel_node, TRANSPOSE_B);
if (trans_a) {
trans_a_ = TRANSPOSE_YES;
dim_m_ = static_cast<dnnl_dim_t>(src_shape[1]);
dim_k_ = static_cast<dnnl_dim_t>(src_shape[0]);
} else {
dim_m_ = static_cast<dnnl_dim_t>(src_shape[0]);
dim_k_ = static_cast<dnnl_dim_t>(src_shape[1]);
}
if (trans_b) {
trans_b_ = TRANSPOSE_YES;
}
dim_n_ = static_cast<dnnl_dim_t>(dst_shape[1]);
}

bool MatMulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "matmul error input output size!";
}
dnnl_dim_t lda = dim_m_;
if (trans_a_ == TRANSPOSE_NO) {
lda = dim_k_;
}
dnnl_dim_t ldb = dim_k_;
if (trans_b_ == TRANSPOSE_NO) {
ldb = dim_n_;
}
auto input_a = reinterpret_cast<float *>(inputs[0]->addr);
auto input_b = reinterpret_cast<float *>(inputs[1]->addr);
auto output = reinterpret_cast<float *>(outputs[0]->addr);
(void)dnnl_sgemm(trans_a_, trans_b_, dim_m_, dim_n_, dim_k_, 1.f, input_a, lda, input_b, ldb, 0.f, output, dim_n_);
return true;
}
} // namespace kernel
} // namespace mindspore

+ 50
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h View File

@@ -0,0 +1,50 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"

namespace mindspore {
namespace kernel {
class MatMulCPUKernel : public MKLCPUKernel {
public:
MatMulCPUKernel() = default;
~MatMulCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
char trans_a_{TRANSPOSE_NO};
char trans_b_{TRANSPOSE_NO};
dnnl_dim_t dim_m_{0};
dnnl_dim_t dim_n_{0};
dnnl_dim_t dim_k_{0};
};

MS_REG_CPU_KERNEL(
MatMul,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
MatMulCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_

+ 106
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc View File

@@ -0,0 +1,106 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
#include <vector>
#include <string>
#include <algorithm>
#include "common/utils.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"

namespace mindspore {
namespace kernel {
void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode,
const std::vector<size_t> &src_shape, int kernel_size, int stride,
std::vector<int> *padding_l, std::vector<int> *padding_r) {
MS_EXCEPTION_IF_NULL(kernel_node);
if (src_shape.size() < 2) {
MS_LOG(EXCEPTION) << "set pad only support src dim >= 2!";
}
std::vector<int> weight_height;
weight_height.emplace_back(src_shape[src_shape.size() - 2]);
weight_height.emplace_back(src_shape[src_shape.size() - 1]);
int rad = kernel_size / 2;
int need_pad = kernel_size - 1;
MS_LOG(INFO) << "pad mode " << pad_mode;
if (pad_mode == PAD_MODE_LOWER_SAME || pad_mode == PAD_MODE_UPPER_SAME) {
for (auto wh : weight_height) {
int re = (wh - 1) % stride;
int pad = std::max(rad - (re / 2), 0);
padding_r->emplace_back(pad);
pad = std::max(need_pad - pad - re, 0);
padding_l->emplace_back(pad);
}
} else if (pad_mode == PAD_MODE_LOWER_VALID || pad_mode == PAD_MODE_UPPER_VALID) {
MS_LOG(INFO) << "pad valid";
padding_l->emplace_back(0);
padding_l->emplace_back(0);
padding_r->emplace_back(0);
padding_r->emplace_back(0);
} else {
std::vector<int> pad = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, PAD);
if (pad.size() != 4) {
MS_LOG(EXCEPTION) << "wrong pad size in max pooling " << pad.size();
}
padding_l->emplace_back(pad[0]);
padding_l->emplace_back(pad[1]);
padding_r->emplace_back(pad[2]);
padding_r->emplace_back(pad[3]);
}
}

dnnl::memory::format_tag MKLCPUKernel::GetDefaultFormatTag(const dnnl::memory::dims &dims) const {
dnnl::memory::format_tag mem_tag;
auto dim_size = dims.size();
if (dim_size == 4) {
mem_tag = dnnl::memory::format_tag::abcd;
} else if (dim_size == 3) {
mem_tag = dnnl::memory::format_tag::abc;
} else if (dim_size == 2) {
mem_tag = dnnl::memory::format_tag::ab;
} else if (dim_size == 1) {
mem_tag = dnnl::memory::format_tag::a;
} else {
MS_LOG(EXCEPTION) << "kernel dims invalid " << dim_size;
}
return mem_tag;
}

dnnl::memory::desc MKLCPUKernel::GetDefaultMemDesc(const std::vector<size_t> &shape) {
dnnl::memory::dims dims;
dims.insert(dims.end(), shape.begin(), shape.end());
dnnl::memory::format_tag mem_tag = GetDefaultFormatTag(dims);
dnnl::memory::desc mem_desc(dims, dnnl::memory::data_type::f32, mem_tag);
return mem_desc;
}

void MKLCPUKernel::AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc) {
arguments_[arg_key] = MKLKernelEngine::Get().CreateMemory(mem_desc, alloc);
}

void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) {
auto arg_iter = arguments_.find(arg_key);
if (arg_iter != arguments_.end()) {
arg_iter->second.set_data_handle(ptr);
}
}

void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); }

void MKLCPUKernel::Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem) {
MKLKernelEngine::Get().Reorder(src_mem, dst_mem);
}
} // namespace kernel
} // namespace mindspore

+ 52
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h View File

@@ -0,0 +1,52 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_

#include <string>
#include <unordered_map>
#include <memory>
#include <vector>
#include "dnnl.hpp"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"

namespace mindspore {
namespace kernel {
class MKLCPUKernel : public CPUKernel {
public:
MKLCPUKernel() = default;
~MKLCPUKernel() override = default;

protected:
void GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, const std::vector<size_t> &src_shape,
int kernel_size, int stride, std::vector<int> *padding_l, std::vector<int> *padding_r);
void AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc = false);
void SetArgumentHandle(int arg_key, void *ptr);
dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const;
dnnl::memory::desc GetDefaultMemDesc(const std::vector<size_t> &shape);
void ExecutePrimitive();
std::unordered_map<int, dnnl::memory> arguments_;
std::shared_ptr<dnnl::primitive> primitive_{nullptr};
inline dnnl::memory::desc formatted_md(const dnnl::memory::dims &dimensions, dnnl::memory::format_tag layout) {
return dnnl::memory::desc{{dimensions}, dnnl::memory::data_type::f32, layout};
}
void Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem);
};
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_

+ 40
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.cc View File

@@ -0,0 +1,40 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "utils/log_adapter.h"
#include "dnnl.hpp"

namespace mindspore {
namespace kernel {
void MKLKernelEngine::Execute(const std::shared_ptr<dnnl::primitive> &primitive,
const std::unordered_map<int, dnnl::memory> &arguments) {
MS_EXCEPTION_IF_NULL(primitive);
primitive->execute(stream_, arguments);
(void)stream_.wait();
}

dnnl::memory MKLKernelEngine::CreateMemory(const dnnl::memory::desc &mem_desc, bool alloc) {
if (alloc) {
return dnnl::memory(mem_desc, engine_);
} else {
return dnnl::memory(mem_desc, engine_, nullptr);
}
}
void MKLKernelEngine::Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem) {
dnnl::reorder(*src_mem, *dst_mem).execute(stream_, *src_mem, *dst_mem);
}
} // namespace kernel
} // namespace mindspore

mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h → mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h View File


+ 61
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc View File

@@ -0,0 +1,61 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"

namespace mindspore {
namespace kernel {
void MulCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) {
MS_LOG(EXCEPTION) << "mul only support same dim input or tensor * scalar " << src0_shape.size() << " vs "
<< src1_shape.size();
}
if (src1_shape.size() < src0_shape.size()) {
for (size_t i = src1_shape.size(); i < src0_shape.size(); ++i) {
src1_shape.emplace_back(1);
}
}
dnnl::memory::desc src0_mem_desc = GetDefaultMemDesc(src0_shape);
dnnl::memory::desc src1_mem_desc = GetDefaultMemDesc(src1_shape);
dnnl::memory::desc dst_mem_desc = GetDefaultMemDesc(dst_shape);
dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_mul, src0_mem_desc, src1_mem_desc, dst_mem_desc);
auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine());
primitive_ = std::make_shared<dnnl::binary>(prim_desc);
AddArgument(DNNL_ARG_SRC_0, src0_mem_desc);
AddArgument(DNNL_ARG_SRC_1, src1_mem_desc);
AddArgument(DNNL_ARG_DST, dst_mem_desc);
}

bool MulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "mul error input output size!";
}
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();
return true;
}
} // namespace kernel
} // namespace mindspore

+ 42
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h View File

@@ -0,0 +1,42 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"

namespace mindspore {
namespace kernel {
class MulCPUKernel : public MKLCPUKernel {
public:
MulCPUKernel() = default;
~MulCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};

MS_REG_CPU_KERNEL(
Mul, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
MulCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_

+ 69
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc View File

@@ -0,0 +1,69 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h"
#include <string>
#include <algorithm>
#include "common/utils.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
if (kernel_sizes.size() != 4 || strides.size() != 4) {
MS_LOG(EXCEPTION) << "invalid kernel size " << kernel_sizes.size() << " or stride size " << strides.size();
}
dnnl::memory::dims strides_dims{strides[2], strides[3]};
dnnl::memory::dims kernels_dims{kernel_sizes[2], kernel_sizes[3]};
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING);
std::vector<int> int_padding_l;
std::vector<int> int_padding_r;
GetPadding(kernel_node, pad_mode, src_shape, kernel_sizes[3], strides[3], &int_padding_l, &int_padding_r);
if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {
MS_LOG(EXCEPTION) << "pooling get padding failed";
}
dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]};
dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]};
dnnl::pooling_forward::desc desc =
dnnl::pooling_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::pooling_max, src_desc, dst_desc,
strides_dims, kernels_dims, padding_l, padding_r);
auto prim_desc = dnnl::pooling_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
primitive_ = std::make_shared<dnnl::pooling_forward>(prim_desc);
AddArgument(DNNL_ARG_SRC, src_desc);
AddArgument(DNNL_ARG_DST, dst_desc);
AddArgument(DNNL_ARG_WORKSPACE, prim_desc.workspace_desc());
}

bool PoolingCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "error input output size!";
}
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();
return true;
}
} // namespace kernel
} // namespace mindspore

+ 41
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h View File

@@ -0,0 +1,41 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"

namespace mindspore {
namespace kernel {
class PoolingCPUKernel : public MKLCPUKernel {
public:
PoolingCPUKernel() = default;
~PoolingCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};

MS_REG_CPU_KERNEL(MaxPool, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
PoolingCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_

+ 124
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc View File

@@ -0,0 +1,124 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h"
#include <string>
#include <utility>
#include <algorithm>
#include "common/utils.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"

namespace mindspore {
namespace kernel {
void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
src_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
dst_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE);
std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES);
if (kernel_sizes.size() != 4 || strides.size() != 4 || src_shape_.size() != 4 || dst_shape_.size() != 4) {
MS_LOG(EXCEPTION) << "pooling grad invalid input size";
}
std::vector<int> padding_r;
const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING);
kernel_size_ = kernel_sizes[3];
stride_ = strides[3];
GetPadding(kernel_node, pad_mode, src_shape_, kernel_size_, stride_, &padding_l_, &padding_r);
}

void PoolingGradCPUKernel::RowPoolingGrad(const float *input, float *output, float diff,
const std::vector<std::pair<size_t, size_t>> &box,
std::vector<std::pair<size_t, float>> *row_max_pair) {
float max_value = 0;
size_t max_index = box[1].second;
size_t src_width = src_shape_[3];
size_t index_start;
size_t index;
for (size_t i = box[1].first; i < box[1].second; ++i) {
if ((*row_max_pair)[i].first == 0) {
index_start = box[0].first * src_width;
for (size_t j = box[0].first; j < box[0].second; ++j) {
index = index_start + i;
if (input[index] > (*row_max_pair)[i].second || j == box[0].first) {
(*row_max_pair)[i].second = input[index];
(*row_max_pair)[i].first = index;
}
index_start += src_width;
}
}
if ((*row_max_pair)[i].second > max_value || max_index == box[1].second) {
max_value = (*row_max_pair)[i].second;
max_index = i;
}
}

output[(*row_max_pair)[max_index].first] += diff;
}

void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *diff, float *output) {
int src_width = SizeToInt(src_shape_[3]);
int src_height = SizeToInt(src_shape_[2]);
std::vector<std::pair<size_t, float>> row_max_pair(src_shape_[3]);
std::vector<std::pair<size_t, size_t>> box(2);
int h_start = -padding_l_[0];
size_t diff_index = 0;
for (size_t h = 0; h < dst_shape_[2]; ++h) {
box[0].first = IntToSize(std::max(h_start, 0));
box[0].second = IntToSize(std::min(h_start + kernel_size_, src_height));
for (size_t w = 0; w < src_shape_[3]; ++w) {
row_max_pair[w].first = 0;
row_max_pair[w].second = 0;
}
int w_start = -padding_l_[1];
for (size_t w = 0; w < dst_shape_[3]; ++w) {
box[1].first = IntToSize(std::max(w_start, 0));
box[1].second = IntToSize(std::min(w_start + kernel_size_, src_width));
RowPoolingGrad(input, output, diff[diff_index], box, &row_max_pair);
diff_index += 1;
w_start += stride_;
}
h_start += stride_;
}
}

bool PoolingGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 3 || outputs.empty()) {
MS_LOG(EXCEPTION) << "pooling grad error input output size!";
}

auto input = reinterpret_cast<float *>(inputs[0]->addr);
auto diff = reinterpret_cast<float *>(inputs[2]->addr);
auto output = reinterpret_cast<float *>(outputs[0]->addr);
auto ret = memset_s(output, outputs[0]->size, 0, outputs[0]->size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "pooling grad memset error";
}
size_t src_wh = src_shape_[2] * src_shape_[3];
size_t dst_wh = dst_shape_[2] * dst_shape_[3];
for (size_t n = 0; n < src_shape_[0]; ++n) {
for (size_t c = 0; c < src_shape_[1]; ++c) {
ChannelPoolingGrad(input, diff, output);
input = input + src_wh;
output = output + src_wh;
diff = diff + dst_wh;
}
}
return true;
}
} // namespace kernel
} // namespace mindspore

+ 56
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h View File

@@ -0,0 +1,56 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include <utility>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"

namespace mindspore {
namespace kernel {
class PoolingGradCPUKernel : public MKLCPUKernel {
public:
PoolingGradCPUKernel() = default;
~PoolingGradCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

private:
void RowPoolingGrad(const float *input, float *output, float diff, const std::vector<std::pair<size_t, size_t>> &box,
std::vector<std::pair<size_t, float>> *row_max_pair);
void ChannelPoolingGrad(const float *input, const float *diff, float *output);
int stride_{0}, kernel_size_{0};
std::vector<int> padding_l_;
std::vector<size_t> src_shape_;
std::vector<size_t> dst_shape_;
};

MS_REG_CPU_KERNEL(MaxPoolGrad,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
PoolingGradCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_

+ 52
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc View File

@@ -0,0 +1,52 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"

namespace mindspore {
namespace kernel {
void ReluCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (src_shape.size() != 4 && src_shape.size() != 2) {
MS_LOG(EXCEPTION) << "relu kernel dims invalid " << src_shape.size();
}
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);

dnnl::eltwise_forward::desc desc =
dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0);
auto prim_desc = dnnl::eltwise_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
primitive_ = std::make_shared<dnnl::eltwise_forward>(prim_desc);

AddArgument(DNNL_ARG_SRC, src_desc);
AddArgument(DNNL_ARG_DST, src_desc);
}

bool ReluCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "error input output size!";
}
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();
return true;
}
} // namespace kernel
} // namespace mindspore

+ 40
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h View File

@@ -0,0 +1,40 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"

namespace mindspore {
namespace kernel {
class ReluCPUKernel : public MKLCPUKernel {
public:
ReluCPUKernel() = default;
~ReluCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};

MS_REG_CPU_KERNEL(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), ReluCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_

+ 69
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc View File

@@ -0,0 +1,69 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"

namespace mindspore {
namespace kernel {
void ReluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (src_shape.size() != 4 && src_shape.size() != 2) {
MS_LOG(EXCEPTION) << "relu grad kernel dims invalid " << src_shape.size();
}
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);

dnnl::eltwise_forward::desc forward_desc =
dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0);
auto forward_prim_desc = dnnl::eltwise_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine());

dnnl::eltwise_backward::desc backward_desc =
dnnl::eltwise_backward::desc(dnnl::algorithm::eltwise_relu, src_desc, src_desc, 0.0, 0.0);
auto backward_prim_desc =
dnnl::eltwise_backward::primitive_desc(backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc);
primitive_ = std::make_shared<dnnl::eltwise_backward>(backward_prim_desc);

AddArgument(DNNL_ARG_SRC, src_desc);
AddArgument(DNNL_ARG_DIFF_SRC, src_desc);
AddArgument(DNNL_ARG_DIFF_DST, src_desc);
}

bool ReluGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "relu grad error input output size!";
}
if (inputs[0]->size != outputs[0]->size) {
MS_LOG(EXCEPTION) << "relu grad error input output data size!";
}

SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr);
ExecutePrimitive();
size_t mem_bits = outputs[0]->size;
auto ret = memcpy_s(outputs[0]->addr, mem_bits, inputs[0]->addr, mem_bits);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno " << ret;
return false;
}
return true;
}
} // namespace kernel
} // namespace mindspore

+ 43
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h View File

@@ -0,0 +1,43 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_

#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"

namespace mindspore {
namespace kernel {
class ReluGradCPUKernel : public MKLCPUKernel {
public:
ReluGradCPUKernel() = default;
~ReluGradCPUKernel() override = default;

void InitKernel(const CNodePtr &kernel_node) override;

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};

MS_REG_CPU_KERNEL(
ReluGrad,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReluGradCPUKernel);
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_

+ 54
- 0
mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.cc View File

@@ -0,0 +1,54 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"

namespace mindspore {
namespace kernel {
void SoftmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<int> axis_list = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, AXIS);
if (axis_list.size() != 1) {
MS_LOG(EXCEPTION) << "cpu softmax only support input axis size 1";
}
int axis = axis_list[0];
if (axis == -1 || axis >= SizeToInt(src_shape.size())) {
axis = SizeToInt(src_shape.size()) - 1;
}
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::softmax_forward::desc desc = dnnl::softmax_forward::desc(dnnl::prop_kind::forward_training, src_desc, axis);
auto prim_desc = dnnl::softmax_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
primitive_ = std::make_shared<dnnl::softmax_forward>(prim_desc);
AddArgument(DNNL_ARG_SRC, src_desc);
AddArgument(DNNL_ARG_DST, src_desc);
}

bool SoftmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "softmax error input output size!";
}
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();
return true;
}
} // namespace kernel
} // namespace mindspore

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save