| @@ -17,6 +17,10 @@ else() | |||||
| set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2") | set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2") | ||||
| endif() | endif() | ||||
| if (ENABLE_PYTHON) | |||||
| add_compile_definitions(ENABLE_PYTHON) | |||||
| endif() | |||||
| set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp") | set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp") | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC") | ||||
| @@ -70,6 +70,22 @@ Alexey Shevlyakov, avakh, baihuawei, BowenK, buxue, caifubi, caojian05, Cathy Wo | |||||
| Contributions of any kind are welcome! | Contributions of any kind are welcome! | ||||
| # Release 0.3.1-alpha | |||||
| ## Major Features and Improvements | |||||
| ### Ascend 910 Training and Inference Framework | |||||
| * Frontend and User Interface | |||||
| * Independent model init interface. | |||||
| * Data processing, augmentation, and save format | |||||
| * Support sample padding for minddataset. | |||||
| ## Bugfixes | |||||
| * Python API | |||||
| * Fix bugs in the lars optimizer([!1894](https://gitee.com/mindspore/mindspore/pulls/1894)) | |||||
| * Data processing | |||||
| * Fix accuracy problem of RandomCropDecodeResize ([!2340](https://gitee.com/mindspore/mindspore/pulls/2340)) | |||||
| # Release 0.3.0-alpha | # Release 0.3.0-alpha | ||||
| ## Major Features and Improvements | ## Major Features and Improvements | ||||
| @@ -24,8 +24,8 @@ usage() | |||||
| { | { | ||||
| echo "Usage:" | echo "Usage:" | ||||
| echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" | echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" | ||||
| echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]" | |||||
| echo " [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]" | |||||
| echo "" | echo "" | ||||
| echo "Options:" | echo "Options:" | ||||
| echo " -d Debug mode" | echo " -d Debug mode" | ||||
| @@ -48,6 +48,7 @@ usage() | |||||
| echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" | echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" | ||||
| echo " -Q Enable dump memory, default off" | echo " -Q Enable dump memory, default off" | ||||
| echo " -D Enable dumping of function graph ir, default on" | echo " -D Enable dumping of function graph ir, default on" | ||||
| echo " -S Enable async data dump, default off" | |||||
| echo " -z Compile dataset & mindrecord, default on" | echo " -z Compile dataset & mindrecord, default on" | ||||
| echo " -M Enable MPI and NCCL for GPU training, gpu default on" | echo " -M Enable MPI and NCCL for GPU training, gpu default on" | ||||
| echo " -V Specify the minimum required cuda version, default CUDA 10.1" | echo " -V Specify the minimum required cuda version, default CUDA 10.1" | ||||
| @@ -56,6 +57,7 @@ usage() | |||||
| echo " -s Enable serving module, default off" | echo " -s Enable serving module, default off" | ||||
| echo " -B Enable debugger, default off" | echo " -B Enable debugger, default off" | ||||
| echo " -E Enable IBVERBS for parameter server, default off" | echo " -E Enable IBVERBS for parameter server, default off" | ||||
| echo " -l Compile with python dependency, default on" | |||||
| } | } | ||||
| # check value of input is 'on' or 'off' | # check value of input is 'on' or 'off' | ||||
| @@ -87,6 +89,7 @@ checkopts() | |||||
| ENABLE_TIMELINE="off" | ENABLE_TIMELINE="off" | ||||
| ENABLE_DUMP2PROTO="on" | ENABLE_DUMP2PROTO="on" | ||||
| ENABLE_DUMPE2E="off" | ENABLE_DUMPE2E="off" | ||||
| ENABLE_DATA_DUMP="off" | |||||
| ENABLE_DUMP_IR="on" | ENABLE_DUMP_IR="on" | ||||
| COMPILE_MINDDATA="on" | COMPILE_MINDDATA="on" | ||||
| ENABLE_MPI="off" | ENABLE_MPI="off" | ||||
| @@ -98,9 +101,10 @@ checkopts() | |||||
| ENABLE_SERVING="off" | ENABLE_SERVING="off" | ||||
| ENABLE_DEBUGGER="off" | ENABLE_DEBUGGER="off" | ||||
| ENABLE_IBVERBS="off" | ENABLE_IBVERBS="off" | ||||
| ENABLE_PYTHON="on" | |||||
| # Process the options | # Process the options | ||||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt | |||||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt | |||||
| do | do | ||||
| OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') | OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') | ||||
| case "${opt}" in | case "${opt}" in | ||||
| @@ -151,6 +155,10 @@ checkopts() | |||||
| check_on_off $OPTARG p | check_on_off $OPTARG p | ||||
| ENABLE_PROFILE="$OPTARG" | ENABLE_PROFILE="$OPTARG" | ||||
| ;; | ;; | ||||
| l) | |||||
| check_on_off $OPTARG l | |||||
| ENABLE_PYTHON="$OPTARG" | |||||
| ;; | |||||
| i) | i) | ||||
| INC_BUILD="on" | INC_BUILD="on" | ||||
| ;; | ;; | ||||
| @@ -212,6 +220,11 @@ checkopts() | |||||
| ENABLE_DUMPE2E="$OPTARG" | ENABLE_DUMPE2E="$OPTARG" | ||||
| echo "enable dump end to end" | echo "enable dump end to end" | ||||
| ;; | ;; | ||||
| S) | |||||
| check_on_off $OPTARG S | |||||
| ENABLE_DATA_DUMP="$OPTARG" | |||||
| echo "enable data dump" | |||||
| ;; | |||||
| D) | D) | ||||
| check_on_off $OPTARG D | check_on_off $OPTARG D | ||||
| ENABLE_DUMP_IR="$OPTARG" | ENABLE_DUMP_IR="$OPTARG" | ||||
| @@ -315,7 +328,11 @@ build_mindspore() | |||||
| if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then | if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then | ||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" | CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" | ||||
| fi | fi | ||||
| if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then | |||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON" | |||||
| fi | |||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" | CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" | ||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" | |||||
| if [[ "X$ENABLE_MPI" = "Xon" ]]; then | if [[ "X$ENABLE_MPI" = "Xon" ]]; then | ||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON" | CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON" | ||||
| fi | fi | ||||
| @@ -9,11 +9,11 @@ else() | |||||
| LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N} | LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N} | ||||
| URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz | URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz | ||||
| MD5 0c2662a2b0bc80b0eb56495205247c8f | MD5 0c2662a2b0bc80b0eb56495205247c8f | ||||
| CONFIGURE_COMMAND ./icu4c/source/runConfigureICU Linux --enable-rpath --disable-tests --disable-samples --disable-icuio --disable-extras ICU_DATA_FILTER_FILE=${CMAKE_SOURCE_DIR}/third_party/icu4c/filter.json | |||||
| CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/scripts/build_icu4c.sh | |||||
| ) | ) | ||||
| include_directories(${icu4c_INC}) | include_directories(${icu4c_INC}) | ||||
| add_library(mindspore::icuuc ALIAS icu4c::${LIB_ICU_COMMON}) | add_library(mindspore::icuuc ALIAS icu4c::${LIB_ICU_COMMON}) | ||||
| add_library(mindspore::icudata ALIAS icu4c::${LIB_ICU_DATA}) | add_library(mindspore::icudata ALIAS icu4c::${LIB_ICU_DATA}) | ||||
| add_library(mindspore::icui18n ALIAS icu4c::${LIB_ICU_I18N}) | add_library(mindspore::icui18n ALIAS icu4c::${LIB_ICU_I18N}) | ||||
| add_definitions(-D ENABLE_ICU4C) | add_definitions(-D ENABLE_ICU4C) | ||||
| endif() | |||||
| endif() | |||||
| @@ -15,7 +15,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake) | |||||
| include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake) | ||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake) | ||||
| if (ENABLE_DEBUGGER) | |||||
| if (ENABLE_DEBUGGER OR ENABLE_SERVING) | |||||
| # build dependencies of gRPC | # build dependencies of gRPC | ||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake) | ||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake) | ||||
| @@ -30,7 +30,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake) | |||||
| if(USE_GLOG) | if(USE_GLOG) | ||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake) | ||||
| endif() | endif() | ||||
| if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows") | |||||
| if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows" AND NOT ENABLE_GE) | |||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake) | ||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake) | ||||
| endif() | endif() | ||||
| @@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF) | |||||
| option(ENABLE_AKG "enable akg" OFF) | option(ENABLE_AKG "enable akg" OFF) | ||||
| option(ENABLE_DEBUGGER "enable debugger" OFF) | option(ENABLE_DEBUGGER "enable debugger" OFF) | ||||
| option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF) | option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF) | ||||
| option(ENABLE_PYTHON "Enable python" ON) | |||||
| if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") | if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") | ||||
| if (WIN32) | if (WIN32) | ||||
| @@ -115,6 +116,10 @@ if(ENABLE_DUMP_E2E) | |||||
| add_compile_definitions(ENABLE_DUMP_E2E) | add_compile_definitions(ENABLE_DUMP_E2E) | ||||
| endif() | endif() | ||||
| if(ENABLE_DATA_DUMP) | |||||
| add_compile_definitions(ENABLE_DATA_DUMP) | |||||
| endif() | |||||
| if(ENABLE_DEBUGGER) | if(ENABLE_DEBUGGER) | ||||
| add_compile_definitions(ENABLE_DEBUGGER) | add_compile_definitions(ENABLE_DEBUGGER) | ||||
| endif() | endif() | ||||
| @@ -213,7 +213,6 @@ install( | |||||
| ${CMAKE_SOURCE_DIR}/mindspore/parallel | ${CMAKE_SOURCE_DIR}/mindspore/parallel | ||||
| ${CMAKE_SOURCE_DIR}/mindspore/mindrecord | ${CMAKE_SOURCE_DIR}/mindspore/mindrecord | ||||
| ${CMAKE_SOURCE_DIR}/mindspore/train | ${CMAKE_SOURCE_DIR}/mindspore/train | ||||
| ${CMAKE_SOURCE_DIR}/mindspore/model_zoo | |||||
| ${CMAKE_SOURCE_DIR}/mindspore/common | ${CMAKE_SOURCE_DIR}/mindspore/common | ||||
| ${CMAKE_SOURCE_DIR}/mindspore/ops | ${CMAKE_SOURCE_DIR}/mindspore/ops | ||||
| ${CMAKE_SOURCE_DIR}/mindspore/communication | ${CMAKE_SOURCE_DIR}/mindspore/communication | ||||
| @@ -261,3 +260,17 @@ if (EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset) | |||||
| COMPONENT mindspore | COMPONENT mindspore | ||||
| ) | ) | ||||
| endif () | endif () | ||||
| if (ENABLE_SERVING) | |||||
| install( | |||||
| TARGETS ms_serving | |||||
| DESTINATION ${INSTALL_BASE_DIR} | |||||
| COMPONENT mindspore | |||||
| ) | |||||
| install( | |||||
| TARGETS inference | |||||
| DESTINATION ${INSTALL_LIB_DIR} | |||||
| COMPONENT mindspore | |||||
| ) | |||||
| endif () | |||||
| @@ -0,0 +1,15 @@ | |||||
| { | |||||
| "DumpSettings": { | |||||
| "net_name": "ResNet50", | |||||
| "mode": 1, | |||||
| "iteration": 0, | |||||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | |||||
| }, | |||||
| "DumpSettingsSpec": { | |||||
| "net_name": "net name eg:ResNet50", | |||||
| "mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||||
| "iteration": "specified iteration ", | |||||
| "kernels": "op's full scope name which need to be dump" | |||||
| } | |||||
| } | |||||
| @@ -1 +1 @@ | |||||
| Subproject commit 4084909d62c159da6ba316f61ad3d02a4857b34b | |||||
| Subproject commit 31aa96ef41067a0ecdc4113ef245f8ede48f3457 | |||||
| @@ -20,7 +20,7 @@ | |||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | #include <vector> | ||||
| #include <memory> | #include <memory> | ||||
| #include "ir/dtype/type_id.h" | |||||
| #include "mindspore/core/ir/dtype/type_id.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| #define MS_API __attribute__((visibility("default"))) | #define MS_API __attribute__((visibility("default"))) | ||||
| @@ -334,7 +334,7 @@ class Parser: | |||||
| def __init__(self, fn: (types.FunctionType, types.MethodType), parse_method=None) -> None: | def __init__(self, fn: (types.FunctionType, types.MethodType), parse_method=None) -> None: | ||||
| self.fn = fn | self.fn = fn | ||||
| self.parse_method = parse_method | self.parse_method = parse_method | ||||
| _, self.line_offset = inspect.getsourcelines(self.fn) | |||||
| self.line_offset = 0 | |||||
| self.filename: str = inspect.getfile(self.fn) | self.filename: str = inspect.getfile(self.fn) | ||||
| # Used to resolve the function's globals Namespace. | # Used to resolve the function's globals Namespace. | ||||
| @@ -350,7 +350,8 @@ class Parser: | |||||
| logger.debug("fn = %r", self.fn) | logger.debug("fn = %r", self.fn) | ||||
| tree = None | tree = None | ||||
| if isinstance(self.fn, (types.FunctionType, types.MethodType)): | if isinstance(self.fn, (types.FunctionType, types.MethodType)): | ||||
| original_src = inspect.getsource(self.fn) | |||||
| lines, self.line_offset = inspect.getsourcelines(self.fn) | |||||
| original_src = ''.join(lines) | |||||
| hexstr = hashlib.sha256(original_src.encode()).hexdigest() | hexstr = hashlib.sha256(original_src.encode()).hexdigest() | ||||
| tree = Parser.ast_cache.get(hexstr) | tree = Parser.ast_cache.get(hexstr) | ||||
| if not tree: | if not tree: | ||||
| @@ -108,7 +108,8 @@ def enumerate_(x, start=0): | |||||
| """Enumerate list or tuple.""" | """Enumerate list or tuple.""" | ||||
| x_type = F.typeof(x) | x_type = F.typeof(x) | ||||
| ret = () | ret = () | ||||
| if check_is_tuple_or_list(x_type, "enumerate"): | |||||
| op_name = "enumerate" | |||||
| if check_is_tuple_or_list(x_type, op_name, "first input") and check_is_const_int(start, op_name, "start"): | |||||
| ret = zip(range(start, start + len(x)), x) | ret = zip(range(start, start + len(x)), x) | ||||
| return ret | return ret | ||||
| @@ -123,11 +124,22 @@ def while_cond(x): | |||||
| @constexpr | @constexpr | ||||
| def check_is_tuple_or_list(x, op_name): | |||||
| def check_is_tuple_or_list(x, op_name, arg_name): | |||||
| """check whether x is list or tuple.""" | """check whether x is list or tuple.""" | ||||
| if isinstance(x, (mstype.list_type, mstype.tuple_type)): | if isinstance(x, (mstype.list_type, mstype.tuple_type)): | ||||
| return True | return True | ||||
| raise TypeError(f"For '{op_name}', the input parameter should be tuple or list, but got {x}.") | |||||
| raise TypeError(f"For '{op_name}', the '{arg_name}' should be tuple or list, but got {x}.") | |||||
| @constexpr | |||||
| def check_is_const_int(x, op_name, arg_name): | |||||
| """check whether x is const int.""" | |||||
| if x is None: | |||||
| raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got not const.") | |||||
| if not isinstance(x, int): | |||||
| raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got {x}.") | |||||
| return True | |||||
| @constexpr | @constexpr | ||||
| def check_is_tensor_bool_cond(shp): | def check_is_tensor_bool_cond(shp): | ||||
| @@ -1,4 +1,5 @@ | |||||
| ## common setting | ## common setting | ||||
| include_directories(${CMAKE_SOURCE_DIR}/mindspore/core) | |||||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}) | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) | ||||
| include_directories(${CMAKE_BINARY_DIR}) | include_directories(${CMAKE_BINARY_DIR}) | ||||
| link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine) | link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine) | ||||
| @@ -35,20 +36,20 @@ if(ENABLE_GPU) | |||||
| include_directories(${CUDNN_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS}) | include_directories(${CUDNN_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS}) | ||||
| file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | ||||
| "device/gpu/*.cc" | |||||
| "device/gpu/*.cu" | |||||
| "kernel/gpu/*.cu" | |||||
| "kernel/akg/gpu/*.cc" | |||||
| "kernel/akg/akg_kernel_build.cc" | |||||
| "kernel/akg/akg_kernel_attrs_process.cc" | |||||
| "runtime/device/gpu/*.cc" | |||||
| "runtime/device/gpu/*.cu" | |||||
| "backend/kernel_compiler/gpu/*.cu" | |||||
| "backend/kernel_compiler/akg/gpu/*.cc" | |||||
| "backend/kernel_compiler/akg/akg_kernel_build.cc" | |||||
| "backend/kernel_compiler/akg/akg_kernel_attrs_process.cc" | |||||
| ) | ) | ||||
| list(APPEND CUDA_NVCC_FLAGS -arch=sm_53) | list(APPEND CUDA_NVCC_FLAGS -arch=sm_53) | ||||
| list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc") | |||||
| list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/mpi/mpi_initializer.cc" | |||||
| "device/gpu/distribution/collective_wrapper.cc" | |||||
| "device/gpu/distribution/mpi_wrapper.cc" | |||||
| "device/gpu/distribution/nccl_wrapper.cc" | |||||
| list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/blocking_queue.cc" "runtime/device/gpu/gpu_buffer_mgr.cc") | |||||
| list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/mpi/mpi_initializer.cc" | |||||
| "runtime/device/gpu/distribution/collective_wrapper.cc" | |||||
| "runtime/device/gpu/distribution/mpi_wrapper.cc" | |||||
| "runtime/device/gpu/distribution/nccl_wrapper.cc" | |||||
| ) | ) | ||||
| set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) | set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) | ||||
| @@ -56,6 +57,7 @@ if(ENABLE_GPU) | |||||
| set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) | set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) | ||||
| cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST}) | cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST}) | ||||
| set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS}) | set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS}) | ||||
| add_compile_definitions(ENABLE_GPU) | |||||
| endif () | endif () | ||||
| ## make flatuffer files | ## make flatuffer files | ||||
| @@ -101,16 +103,20 @@ if (ENABLE_DUMP_PROTO) | |||||
| endif () | endif () | ||||
| if (ENABLE_D) | if (ENABLE_D) | ||||
| include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu") | |||||
| include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu") | |||||
| include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir") | include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir") | ||||
| file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/aicpu/proto/*.proto") | |||||
| file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto") | |||||
| ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN}) | ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN}) | ||||
| file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto") | file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto") | ||||
| ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER}) | ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER}) | ||||
| file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto") | |||||
| ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP}) | |||||
| list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS}) | list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS}) | ||||
| list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS}) | list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS}) | ||||
| list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS}) | |||||
| add_compile_definitions(ENABLE_D) | add_compile_definitions(ENABLE_D) | ||||
| endif () | endif () | ||||
| @@ -121,18 +127,36 @@ if (MINDSPORE_PROTO_LIST) | |||||
| endif() | endif() | ||||
| ## make sub objects | ## make sub objects | ||||
| set(SUB_COMP | |||||
| transform pre_activate parallel pipeline device kernel common debug gvar ir onnx operator optimizer predict | |||||
| pybind_api pynative session utils vm | |||||
| set(SUB_COMP | |||||
| transform/graph_ir | |||||
| transform/onnx | |||||
| backend/optimizer | |||||
| backend/kernel_compiler | |||||
| backend/session | |||||
| runtime/device | |||||
| frontend/optimizer | |||||
| frontend/parallel | |||||
| frontend/operator | |||||
| pipeline/jit | |||||
| pipeline/pynative | |||||
| common debug gvar predict pybind_api utils vm | |||||
| ) | ) | ||||
| foreach (_comp ${SUB_COMP}) | foreach (_comp ${SUB_COMP}) | ||||
| add_subdirectory(${_comp}) | add_subdirectory(${_comp}) | ||||
| if (TARGET _mindspore_${_comp}_obj) | |||||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${_comp}_obj>) | |||||
| add_dependencies(_mindspore_${_comp}_obj proto_input flat_input) | |||||
| string(REPLACE "/" "_" sub ${_comp}) | |||||
| if (TARGET _mindspore_${sub}_obj) | |||||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>) | |||||
| add_dependencies(_mindspore_${sub}_obj proto_input flat_input) | |||||
| endif () | endif () | ||||
| endforeach () | endforeach () | ||||
| add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base) | |||||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_base_obj>) | |||||
| add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/abstract abstract) | |||||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_abstract_obj>) | |||||
| add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir) | |||||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>) | |||||
| add_dependencies(_mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input flat_input) | |||||
| set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) | set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) | ||||
| add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) | add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) | ||||
| @@ -204,8 +228,8 @@ endif() | |||||
| # set c_expression building | # set c_expression building | ||||
| set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) | set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) | ||||
| set_property(SOURCE "pipeline/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE) | |||||
| pybind11_add_module(_c_expression "pipeline/init.cc") | |||||
| set_property(SOURCE "pipeline/jit/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE) | |||||
| pybind11_add_module(_c_expression "pipeline/jit/init.cc") | |||||
| MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}") | MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}") | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "Linux") | if (CMAKE_SYSTEM_NAME MATCHES "Linux") | ||||
| @@ -231,9 +255,11 @@ else () | |||||
| target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive) | target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive) | ||||
| target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module) | target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module) | ||||
| target_link_libraries(_c_expression PRIVATE mindspore_gvar) | target_link_libraries(_c_expression PRIVATE mindspore_gvar) | ||||
| target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a) | |||||
| if (${ENABLE_IBVERBS} STREQUAL "ON") | |||||
| target_link_libraries(_c_expression PRIVATE ibverbs rdmacm) | |||||
| if (NOT ENABLE_GE) | |||||
| target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a) | |||||
| if (${ENABLE_IBVERBS} STREQUAL "ON") | |||||
| target_link_libraries(_c_expression PRIVATE ibverbs rdmacm) | |||||
| endif() | |||||
| endif() | endif() | ||||
| endif () | endif () | ||||
| @@ -260,8 +286,8 @@ if (ENABLE_CPU) | |||||
| endif () | endif () | ||||
| if (ENABLE_MINDDATA) | if (ENABLE_MINDDATA) | ||||
| add_subdirectory(mindrecord) | |||||
| add_subdirectory(dataset) | |||||
| add_subdirectory(minddata/mindrecord) | |||||
| add_subdirectory(minddata/dataset) | |||||
| endif () | endif () | ||||
| # build inference | # build inference | ||||
| @@ -270,7 +296,7 @@ set(LOAD_ONNX_SRC | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc | ${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc | ||||
| ) | ) | ||||
| add_library(inference SHARED | add_library(inference SHARED | ||||
| ${CMAKE_CURRENT_SOURCE_DIR}/session/session.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/backend/session/session.cc | |||||
| ${LOAD_ONNX_SRC} | ${LOAD_ONNX_SRC} | ||||
| ) | ) | ||||
| target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY} | target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY} | ||||
| @@ -0,0 +1,66 @@ | |||||
| file(GLOB_RECURSE KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "kernel_build_info.cc" | |||||
| "kash/*.cc" | |||||
| "common_utils.cc" | |||||
| "oplib/*.cc" | |||||
| ) | |||||
| if (ENABLE_D) | |||||
| file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "kernel_query.cc" | |||||
| "kernel_fusion.cc" | |||||
| "akg/ascend/*.cc" | |||||
| "akg/akg_kernel_build.cc" | |||||
| "akg/akg_kernel_attrs_process.cc" | |||||
| "akg/akg_kernel_metadata.cc" | |||||
| "tbe/*.cc" | |||||
| "aicpu/*.cc" | |||||
| "rts/*.cc" | |||||
| "hccl/*.cc" | |||||
| ) | |||||
| add_compile_definitions(ENABLE_D) | |||||
| endif () | |||||
| if (ENABLE_CPU) | |||||
| file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "cpu/*.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/push_kernel.cc" | |||||
| "cpu/ps/pull_kernel.cc" | |||||
| "cpu/ps/embedding_look_up_ps_kernel.cc" | |||||
| "cpu/ps/embedding_look_up_proxy_kernel.cc" | |||||
| "cpu/ps/apply_momentum_ps_kernel.cc" | |||||
| "cpu/ps/sparse_apply_adam_ps_kernel.cc" | |||||
| "cpu/ps/sparse_apply_ftrl_ps_kernel.cc") | |||||
| if (NOT ENABLE_MPI) | |||||
| list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc") | |||||
| list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc") | |||||
| list(REMOVE_ITEM CPU_SRC_LIST "cpu/embedding_look_up_comm_grad_cpu_kernel.cc") | |||||
| endif () | |||||
| endif () | |||||
| if (ENABLE_GPU) | |||||
| file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "gpu/*.cu" | |||||
| "akg/gpu/*.cc" | |||||
| "akg/akg_kernel_build.cc" | |||||
| "akg/akg_kernel_attrs_process.cc" | |||||
| ) | |||||
| file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc") | |||||
| list(REMOVE_ITEM GPU_SRC_LIST "gpu/nccl/nccl_gpu_kernel.cc") | |||||
| if (ENABLE_MPI) | |||||
| include(ExternalProject) | |||||
| file(GLOB_RECURSE GPU_NCCL_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/nccl/*.cc") | |||||
| list(APPEND GPU_SRC_LIST ${GPU_NCCL_LIST}) | |||||
| endif () | |||||
| # add_library(_mindspore_kernel_cuda_obj OBJECT ${CUDA_SRC_LIST}) | |||||
| endif() | |||||
| set_property(SOURCE ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST} | |||||
| PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_KERNEL) | |||||
| add_library(_mindspore_backend_kernel_compiler_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST}) | |||||
| @@ -0,0 +1,312 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h" | |||||
| #include <google/protobuf/text_format.h> | |||||
| #include <fstream> | |||||
| #include <utility> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include <algorithm> | |||||
| #include <map> | |||||
| #include "runtime/device/kernel_runtime.h" | |||||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h" | |||||
| #include "backend/kernel_compiler/akg/akg_kernel_build.h" | |||||
| #include "proto/tensor.pb.h" | |||||
| #include "proto/tensor_shape.pb.h" | |||||
| #include "proto/attr.pb.h" | |||||
| #include "proto/node_def.pb.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "common/utils.h" | |||||
| #include "backend/kernel_compiler/aicpu/aicpu_util.h" | |||||
| #include "backend/session/kernel_graph.h" | |||||
| #include "backend/kernel_compiler/common_utils.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| using FNodeAttrHandle = std::function<void(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto)>; | |||||
| bool SetIOIputSize(const std::shared_ptr<AnfNode> &anf_node, const size_t &input_num, | |||||
| std::vector<size_t> *input_size_list) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(input_size_list); | |||||
| for (size_t i = 0; i < input_num; i++) { | |||||
| std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i); | |||||
| if (AnfAlgo::GetInputDeviceDataType(anf_node, i) == kObjectTypeString) { | |||||
| if (!anf_node->isa<CNode>()) { | |||||
| MS_LOG(EXCEPTION) << "anf_node is not CNode."; | |||||
| } | |||||
| auto cnode = anf_node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (cnode->inputs().size() < (i + 1)) { | |||||
| MS_LOG(ERROR) << "cnode inputs size " << cnode->inputs().size() << " is smaller than " << i + 1; | |||||
| return false; | |||||
| } | |||||
| auto input_node = cnode->inputs()[i + 1]; | |||||
| MS_EXCEPTION_IF_NULL(input_node); | |||||
| if (input_node->isa<ValueNode>()) { | |||||
| auto value_ptr = GetValueNode(input_node); | |||||
| auto value = GetValue<std::string>(value_ptr); | |||||
| input_size_list->push_back(value.size()); | |||||
| } | |||||
| } else { | |||||
| auto type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i)); | |||||
| MS_EXCEPTION_IF_NULL(type_ptr); | |||||
| int64_t size_i = 1; | |||||
| for (size_t j = 0; j < shape_i.size(); j++) { | |||||
| size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j])); | |||||
| } | |||||
| size_t type_byte = GetTypeByte(type_ptr); | |||||
| if (type_byte == 0) { | |||||
| return false; | |||||
| } | |||||
| size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte)); | |||||
| input_size_list->push_back(LongToSize(size_i)); | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool SetIOSize(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(kernel_mod_ptr); | |||||
| std::vector<size_t> input_size_list; | |||||
| std::vector<size_t> output_size_list; | |||||
| size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); | |||||
| size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); | |||||
| if (!SetIOIputSize(anf_node, input_num, &input_size_list)) { | |||||
| return false; | |||||
| } | |||||
| kernel_mod_ptr->SetInputSizeList(input_size_list); | |||||
| for (size_t i = 0; i < output_num; i++) { | |||||
| std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i); | |||||
| TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i)); | |||||
| MS_EXCEPTION_IF_NULL(type_ptr); | |||||
| int64_t size_i = 1; | |||||
| for (size_t j = 0; j < shape_i.size(); j++) { | |||||
| size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j])); | |||||
| } | |||||
| size_t type_byte = GetTypeByte(type_ptr); | |||||
| if (type_byte == 0) { | |||||
| return false; | |||||
| } | |||||
| size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte)); | |||||
| output_size_list.push_back(LongToSize(size_i)); | |||||
| } | |||||
| kernel_mod_ptr->SetOutputSizeList(output_size_list); | |||||
| return true; | |||||
| } | |||||
| void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value, | |||||
| ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) { | |||||
| MS_EXCEPTION_IF_NULL(node_attr); | |||||
| MS_EXCEPTION_IF_NULL(value); | |||||
| if (type == "int") { | |||||
| auto attr_value = GetValue<int>(value); | |||||
| (*node_attr)[attr_name].set_i(attr_value); | |||||
| } else if (type == "str") { | |||||
| auto attr_value = GetValue<std::string>(value); | |||||
| (*node_attr)[attr_name].set_s(attr_value); | |||||
| } else if (type == "bool") { | |||||
| auto attr_value = GetValue<bool>(value); | |||||
| (*node_attr)[attr_name].set_b(attr_value); | |||||
| } else if (type == "float") { | |||||
| auto attr_value = GetValue<float>(value); | |||||
| (*node_attr)[attr_name].set_f(attr_value); | |||||
| } else if (type == "listInt") { | |||||
| std::vector<int> attr_value; | |||||
| auto value_type = value->type(); | |||||
| MS_EXCEPTION_IF_NULL(value_type); | |||||
| auto value_type_str = value_type->ToString(); | |||||
| if (value_type_str == "Int32") { | |||||
| int data = GetValue<int>(value); | |||||
| attr_value.push_back(data); | |||||
| } else { | |||||
| attr_value = GetValue<std::vector<int>>(value); | |||||
| } | |||||
| mindspore::AttrValue input_shape_attr; | |||||
| mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array(); | |||||
| MS_EXCEPTION_IF_NULL(input_shape_attr_list); | |||||
| for (const auto shape : attr_value) { | |||||
| input_shape_attr_list->add_i(shape); | |||||
| } | |||||
| (*node_attr)[attr_name] = input_shape_attr; | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "type: " << type << "not support"; | |||||
| } | |||||
| } | |||||
| void SetNodeAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(proto); | |||||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||||
| if (op_name == kInitDataSetQueue) { | |||||
| op_name = kInitData; | |||||
| } | |||||
| if (op_name == kPrint) { | |||||
| return; | |||||
| } | |||||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU); | |||||
| MS_EXCEPTION_IF_NULL(op_info_ptr); | |||||
| auto attrs_ptr = op_info_ptr->attrs_ptr(); | |||||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(primitive); | |||||
| ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); | |||||
| for (const auto &attr_ptr : attrs_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(attr_ptr); | |||||
| std::string attr_name = attr_ptr->name(); | |||||
| auto value = primitive->GetAttr(attr_name); | |||||
| if (value != nullptr) { | |||||
| if (attr_name == kQueueName || attr_name == kSharedName) { | |||||
| attr_name = kChannelName; | |||||
| } else if (attr_name == kSeed0) { | |||||
| attr_name = kSeed; | |||||
| } else if (attr_name == kSeed1) { | |||||
| attr_name = kSeed2; | |||||
| } | |||||
| std::string type = attr_ptr->type(); | |||||
| ParseAttrValue(type, attr_name, value, node_attr); | |||||
| } | |||||
| } | |||||
| MS_LOG(INFO) << "Set node attr end!"; | |||||
| } | |||||
| void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||||
| MS_EXCEPTION_IF_NULL(proto); | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); | |||||
| if (input_num == 0) { | |||||
| MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have input."; | |||||
| return; | |||||
| } | |||||
| for (size_t input_index = 0; input_index < input_num; input_index++) { | |||||
| ::mindspore::Tensor *node_inputs = proto->add_inputs(); | |||||
| MS_EXCEPTION_IF_NULL(node_inputs); | |||||
| TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index); | |||||
| std::vector<size_t> input_shape; | |||||
| int32_t input_data_type; | |||||
| if (input_type == kObjectTypeString) { | |||||
| auto cnode = anf_node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| auto input_node = cnode->inputs()[input_index + 1]; | |||||
| auto value_ptr = GetValueNode(input_node); | |||||
| auto value = GetValue<std::string>(value_ptr); | |||||
| input_shape.push_back(1); | |||||
| input_shape.push_back(value.size()); | |||||
| input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown); | |||||
| } else { | |||||
| input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); | |||||
| input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type); | |||||
| } | |||||
| mindspore::TensorShape *tensorShape = node_inputs->mutable_tensor_shape(); | |||||
| for (auto item : input_shape) { | |||||
| mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); | |||||
| dim->set_size((::google::protobuf::int64)item); | |||||
| } | |||||
| node_inputs->set_tensor_type((mindspore::DataType)input_data_type); | |||||
| node_inputs->set_mem_device("HBM"); | |||||
| } | |||||
| } | |||||
| void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||||
| MS_EXCEPTION_IF_NULL(proto); | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); | |||||
| if (output_num == 0) { | |||||
| MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. "; | |||||
| return; | |||||
| } | |||||
| for (size_t output_index = 0; output_index < output_num; output_index++) { | |||||
| ::mindspore::Tensor *node_outputs = proto->add_outputs(); | |||||
| MS_EXCEPTION_IF_NULL(node_outputs); | |||||
| std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); | |||||
| mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape(); | |||||
| MS_EXCEPTION_IF_NULL(tensorShape); | |||||
| for (auto item : output_shape) { | |||||
| mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); | |||||
| MS_EXCEPTION_IF_NULL(dim); | |||||
| dim->set_size((::google::protobuf::int64)item); | |||||
| } | |||||
| TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); | |||||
| int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); | |||||
| node_outputs->set_tensor_type((mindspore::DataType)output_data_type); | |||||
| node_outputs->set_mem_device("HBM"); | |||||
| } | |||||
| } | |||||
| void SetNodedefProto(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(proto); | |||||
| MS_LOG(INFO) << "SetNodedefProto entry"; | |||||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||||
| if (op_name == kInitDataSetQueue) { | |||||
| op_name = kInitData; | |||||
| } | |||||
| // set op name | |||||
| proto->set_op(op_name); | |||||
| // set inputs tensor | |||||
| SetNodeInputs(anf_node, proto); | |||||
| // set outputs tensor | |||||
| SetNodeOutputs(anf_node, proto); | |||||
| // set node attr | |||||
| SetNodeAttr(anf_node, proto); | |||||
| MS_LOG(INFO) << "SetNodedefProto end!"; | |||||
| } | |||||
| bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node, | |||||
| const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_mod_ptr); | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| MS_LOG(INFO) << "CreateNodeDefBytes entry"; | |||||
| mindspore::NodeDef proto; | |||||
| SetNodedefProto(anf_node, &proto); | |||||
| std::string nodeDefStr; | |||||
| if (!proto.SerializeToString(&nodeDefStr)) { | |||||
| MS_LOG(ERROR) << "Serialize nodeDef to string failed."; | |||||
| return false; | |||||
| } | |||||
| kernel_mod_ptr->SetNodeDef(nodeDefStr); | |||||
| MS_LOG(INFO) << "CreateNodeDefBytes end!"; | |||||
| return true; | |||||
| } | |||||
| KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||||
| if (op_name == kInitDataSetQueue) { | |||||
| op_name = kInitData; | |||||
| } | |||||
| auto kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>(); | |||||
| MS_EXCEPTION_IF_NULL(kernel_mod_ptr); | |||||
| kernel_mod_ptr->SetAnfNode(anf_node); | |||||
| kernel_mod_ptr->SetNodeName(op_name); | |||||
| if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) { | |||||
| MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!"; | |||||
| } | |||||
| if (!SetIOSize(anf_node, kernel_mod_ptr)) { | |||||
| MS_LOG(EXCEPTION) << "Set input output size list failed."; | |||||
| } | |||||
| return kernel_mod_ptr; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,27 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_ | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_ | |||||
| @@ -0,0 +1,73 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h" | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "backend/kernel_compiler/oplib/oplib.h" | |||||
| #include "backend/kernel_compiler/common_utils.h" | |||||
| #include "backend/kernel_compiler/aicpu/aicpu_util.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) { | |||||
| MS_LOG(INFO) << "AicpuMetadataInfo."; | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| MS_EXCEPTION_IF_NULL(kernel_info_list); | |||||
| std::string op_name = AnfAlgo::GetCNodeName(kernel_node); | |||||
| if (op_name == kInitDataSetQueue) { | |||||
| op_name = kInitData; | |||||
| } | |||||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU); | |||||
| if (op_info_ptr == nullptr) { | |||||
| MS_LOG(DEBUG) << "Aicpu does not have op [" << op_name << "]"; | |||||
| return; | |||||
| } | |||||
| // For compatibility with the current framework | |||||
| if (op_name == kPrint || op_name == kGetNext || op_name == kPack) { | |||||
| std::vector<std::string> inputs_format{}; | |||||
| std::vector<TypeId> inputs_type{}; | |||||
| if (op_name == kPrint || op_name == kPack) { | |||||
| for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { | |||||
| inputs_format.emplace_back(kOpFormat_DEFAULT); | |||||
| inputs_type.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index)); | |||||
| } | |||||
| } | |||||
| std::vector<std::string> outputs_format; | |||||
| std::vector<TypeId> outputs_type; | |||||
| for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) { | |||||
| outputs_format.emplace_back(kOpFormat_DEFAULT); | |||||
| outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index)); | |||||
| } | |||||
| auto builder = KernelBuildInfo::KernelBuildInfoBuilder(); | |||||
| builder.SetInputsFormat(inputs_format); | |||||
| builder.SetInputsDeviceType(inputs_type); | |||||
| builder.SetOutputsFormat(outputs_format); | |||||
| builder.SetOutputsDeviceType(outputs_type); | |||||
| builder.SetProcessor(AICPU); | |||||
| builder.SetKernelType(AICPU_KERNEL); | |||||
| builder.SetFusionType(OPAQUE); | |||||
| kernel_info_list->push_back(builder.Build()); | |||||
| return; | |||||
| } | |||||
| if (!ParseMetadata(kernel_node, op_info_ptr, AICPU, kernel_info_list)) { | |||||
| MS_LOG(WARNING) << "Aicpu parsed metadata op [" << op_name << "] failed"; | |||||
| return; | |||||
| } | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,30 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_ | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/kernel_build_info.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_ | |||||
| @@ -0,0 +1,156 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h" | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include <algorithm> | |||||
| #include "runtime/mem.h" | |||||
| #include "runtime/rt.h" | |||||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h" | |||||
| #include "utils/convert_utils.h" | |||||
| #include "backend/kernel_compiler/aicpu/aicpu_util.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>; | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| constexpr auto AICPU_OPS_SO_NAME = "libaicpu_kernels.so"; | |||||
| AicpuOpKernelMod::AicpuOpKernelMod() : anf_node_(nullptr) {} | |||||
| AicpuOpKernelMod::~AicpuOpKernelMod() { | |||||
| args_.clear(); | |||||
| inputList_.clear(); | |||||
| outputList_.clear(); | |||||
| anf_node_ = nullptr; | |||||
| input_size_list_.clear(); | |||||
| output_size_list_.clear(); | |||||
| workspace_size_list_.clear(); | |||||
| } | |||||
| void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; } | |||||
| const std::vector<size_t> &AicpuOpKernelMod::GetInputSizeList() const { return input_size_list_; } | |||||
| void AicpuOpKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; } | |||||
| const std::vector<size_t> &AicpuOpKernelMod::GetOutputSizeList() const { return output_size_list_; } | |||||
| void AicpuOpKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; } | |||||
| const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } | |||||
| void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; } | |||||
| void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; } | |||||
| void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); } | |||||
| void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; } | |||||
| void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| anf_node_ = anf_node; | |||||
| } | |||||
| void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs, | |||||
| const std::vector<AddressPtr> &outputs) { | |||||
| MS_LOG(INFO) << "CreateCpuKernelInfoOffline start"; | |||||
| node_so_ = AICPU_OPS_SO_NAME; | |||||
| // InputOutputAddr | |||||
| vector<void *> io_addrs; | |||||
| (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(io_addrs), | |||||
| [](const AddressPtr &input) -> void * { return input->addr; }); | |||||
| (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(io_addrs), | |||||
| [](const AddressPtr &output) -> void * { return output->addr; }); | |||||
| auto io_addrs_num = io_addrs.size(); | |||||
| // calculate paramLen: AicpuParamHead.len + ioAddrsSize + notifyId.len + customizedAttr.len | |||||
| auto param_len = sizeof(AicpuParamHead); | |||||
| // get input and output addrs size, no need to check overflow | |||||
| auto io_addrs_size = io_addrs_num * sizeof(uint64_t); | |||||
| // refresh paramLen, no need to check overflow | |||||
| param_len += io_addrs_size; | |||||
| auto node_def_len = node_def_str_.length(); | |||||
| param_len += node_def_len; | |||||
| // Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr | |||||
| AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)}; | |||||
| args_.clear(); | |||||
| (void)args_.append(reinterpret_cast<const char *>(¶mHead), sizeof(AicpuParamHead)); | |||||
| // TaskArgs append ioAddrs | |||||
| if (io_addrs_size != 0) { | |||||
| (void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size); | |||||
| } | |||||
| // When it's aicpu customized ops, taskArgs should append customized attr | |||||
| if (node_def_len != 0) { | |||||
| (void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len); | |||||
| } | |||||
| MS_LOG(INFO) << "CreateCpuKernelInfoOffline end"; | |||||
| } | |||||
| bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) { | |||||
| if (stream_ptr == nullptr) { | |||||
| MS_LOG(ERROR) << "stream_ptr should not be nullptr."; | |||||
| return false; | |||||
| } | |||||
| CreateCpuKernelInfo(inputs, outputs); | |||||
| if (node_name_ == kTopK) { | |||||
| node_name_ = kTopKV2; | |||||
| } | |||||
| MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_ | |||||
| << ", args_size:" << args_.length(); | |||||
| if (rtCpuKernelLaunch(reinterpret_cast<const void *>(node_so_.c_str()), | |||||
| reinterpret_cast<const void *>(node_name_.c_str()), 1, | |||||
| reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()), nullptr, | |||||
| stream_ptr) != RT_ERROR_NONE) { | |||||
| MS_LOG(ERROR) << "Aicpu op launch failed!"; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> &inputs, | |||||
| const std::vector<AddressPtr> &, | |||||
| const std::vector<AddressPtr> &outputs, uint32_t stream_id) { | |||||
| MS_LOG(INFO) << "AicpuOpKernelMod GenTask start"; | |||||
| stream_id_ = stream_id; | |||||
| node_so_ = AICPU_OPS_SO_NAME; | |||||
| std::vector<void *> input_data_addrs; | |||||
| (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs), | |||||
| [](const AddressPtr &input) -> void * { return input->addr; }); | |||||
| std::vector<void *> output_data_addrs; | |||||
| (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs), | |||||
| [](const AddressPtr &output) -> void * { return output->addr; }); | |||||
| if (node_name_ == kTopK) { | |||||
| node_name_ = kTopKV2; | |||||
| } | |||||
| AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>( | |||||
| kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump()); | |||||
| MS_LOG(INFO) << "AicpuOpKernelMod GenTask end"; | |||||
| return {task_info_ptr}; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,75 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "backend/kernel_compiler/ascend_kernel_mod.h" | |||||
| #include "backend/kernel_compiler/aicpu/aicpu_util.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class AicpuOpKernelMod : public AscendKernelMod { | |||||
| public: | |||||
| AicpuOpKernelMod(); | |||||
| ~AicpuOpKernelMod() override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override; | |||||
| std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs, uint32_t stream_id) override; | |||||
| void SetInputList(const std::vector<int64_t> &inputList); | |||||
| void SetOutputList(const std::vector<int64_t> &outputList); | |||||
| void SetAnfNode(const AnfNodePtr &anf_node); | |||||
| void SetNodeDef(const std::string &nodeDef); | |||||
| void SetNodeName(const std::string &node_name); | |||||
| /** | |||||
| * @brief Build AICPU Engine kernel structure, and allocate device memory for offline task generate | |||||
| * @return SUCCESS | |||||
| * @return FAIL | |||||
| * | |||||
| */ | |||||
| void CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||||
| void SetInputSizeList(const std::vector<size_t> &size_list); | |||||
| void SetOutputSizeList(const std::vector<size_t> &size_list); | |||||
| void SetWorkspaceSizeList(const std::vector<size_t> &size_list); | |||||
| const std::vector<size_t> &GetInputSizeList() const override; | |||||
| const std::vector<size_t> &GetOutputSizeList() const override; | |||||
| const std::vector<size_t> &GetWorkspaceSizeList() const override; | |||||
| private: | |||||
| std::string args_; | |||||
| std::string node_def_str_; | |||||
| std::string node_name_; | |||||
| std::string node_so_; | |||||
| std::vector<int64_t> inputList_; | |||||
| std::vector<int64_t> outputList_; | |||||
| AnfNodePtr anf_node_; | |||||
| std::vector<size_t> input_size_list_; | |||||
| std::vector<size_t> output_size_list_; | |||||
| std::vector<size_t> workspace_size_list_; | |||||
| }; | |||||
| using AicpuOpKernelModPtr = std::shared_ptr<AicpuOpKernelMod>; | |||||
| using AicputOpKernelModPtrList = std::vector<AicpuOpKernelModPtr>; | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_ | |||||
| @@ -0,0 +1,56 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/aicpu/aicpu_util.h" | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include "proto/types.pb.h" | |||||
| #include "runtime/mem.h" | |||||
| #include "runtime/rt.h" | |||||
| #include "utils/convert_utils.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| static std::map<int32_t, int32_t> MS_PROTO_DATA_TYPE_MAP = { | |||||
| {mindspore::TypeId::kTypeUnknown, mindspore::DataType::MS_UNKNOWN}, | |||||
| {mindspore::TypeId::kNumberTypeBool, mindspore::DataType::MS_BOOL}, | |||||
| {mindspore::TypeId::kNumberTypeInt, mindspore::DataType::MS_INT32}, | |||||
| {mindspore::TypeId::kNumberTypeInt8, mindspore::DataType::MS_INT8}, | |||||
| {mindspore::TypeId::kNumberTypeInt16, mindspore::DataType::MS_INT16}, | |||||
| {mindspore::TypeId::kNumberTypeInt32, mindspore::DataType::MS_INT32}, | |||||
| {mindspore::TypeId::kNumberTypeInt64, mindspore::DataType::MS_INT64}, | |||||
| {mindspore::TypeId::kNumberTypeUInt, mindspore::DataType::MS_UINT32}, | |||||
| {mindspore::TypeId::kNumberTypeUInt8, mindspore::DataType::MS_UINT8}, | |||||
| {mindspore::TypeId::kNumberTypeUInt16, mindspore::DataType::MS_UINT16}, | |||||
| {mindspore::TypeId::kNumberTypeUInt32, mindspore::DataType::MS_UINT32}, | |||||
| {mindspore::TypeId::kNumberTypeUInt64, mindspore::DataType::MS_UINT64}, | |||||
| {mindspore::TypeId::kNumberTypeFloat16, mindspore::DataType::MS_FLOAT16}, | |||||
| {mindspore::TypeId::kNumberTypeFloat, mindspore::DataType::MS_FLOAT32}, | |||||
| {mindspore::TypeId::kNumberTypeFloat32, mindspore::DataType::MS_FLOAT32}, | |||||
| {mindspore::TypeId::kNumberTypeFloat64, mindspore::DataType::MS_FLOAT64}, | |||||
| }; | |||||
| int AicpuOpUtil::MsTypeToProtoType(TypeId ms_type) { | |||||
| auto iter = MS_PROTO_DATA_TYPE_MAP.find(ms_type); | |||||
| if (iter != MS_PROTO_DATA_TYPE_MAP.end()) { | |||||
| return MS_PROTO_DATA_TYPE_MAP[ms_type]; | |||||
| } else { | |||||
| MS_LOG(ERROR) << "UnSupported ms_type value" << static_cast<int>(ms_type); | |||||
| return -1; | |||||
| } | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,64 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_ | |||||
| #include <cstdint> | |||||
| #include <vector> | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include "backend/kernel_compiler/kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| constexpr auto kInitDataSetQueue = "InitDataSetQueue"; | |||||
| constexpr auto kInitData = "InitData"; | |||||
| constexpr auto kGetNext = "GetNext"; | |||||
| constexpr auto kPrint = "Print"; | |||||
| constexpr auto kPack = "Pack"; | |||||
| constexpr auto kOutputTypes = "output_types"; | |||||
| constexpr auto kOutputShapes = "output_shapes"; | |||||
| constexpr auto kChannelName = "channel_name"; | |||||
| constexpr auto kSharedName = "shared_name"; | |||||
| constexpr auto kShapes = "shapes"; | |||||
| constexpr auto kTypes = "types"; | |||||
| constexpr auto kQueueName = "queue_name"; | |||||
| constexpr auto kSeed = "seed"; | |||||
| constexpr auto kSeed0 = "Seed0"; | |||||
| constexpr auto kSeed1 = "Seed1"; | |||||
| constexpr auto kSeed2 = "seed2"; | |||||
| constexpr auto kTopK = "TopK"; | |||||
| constexpr auto kTopKV2 = "TopKV2"; | |||||
| struct AicpuParamHead { | |||||
| uint32_t length; // Total length: include cunstom message | |||||
| uint32_t ioAddrNum; // Input and output address number | |||||
| uint32_t extInfoLength; // extInfo struct Length | |||||
| uint64_t extInfoAddr; // extInfo address | |||||
| } __attribute__((packed)); | |||||
| class AicpuOpUtil { | |||||
| public: | |||||
| static int MsTypeToProtoType(TypeId ms_type); | |||||
| private: | |||||
| // kernel id | |||||
| static uint64_t KernelId_; | |||||
| }; | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_ | |||||
| @@ -0,0 +1,180 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h" | |||||
| #include <algorithm> | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "backend/optimizer/common/helper.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void SetAkgAttrsForFour2Five(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| // The x and output are akg op input and output param. | |||||
| std::vector<std::string> input_names = {"x"}; | |||||
| std::vector<std::string> output_names = {"output"}; | |||||
| AnfAlgo::SetNodeAttr("input_names", MakeValue(input_names), anf_node); | |||||
| AnfAlgo::SetNodeAttr("output_names", MakeValue(output_names), anf_node); | |||||
| TypeId dst_type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, 0); | |||||
| std::string dst_type; | |||||
| if (dst_type_id == kFloat32->type_id()) { | |||||
| dst_type = "float32"; | |||||
| } else if (dst_type_id == kFloat16->type_id()) { | |||||
| dst_type = "float16"; | |||||
| } | |||||
| AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node); | |||||
| } | |||||
| void SetAkgAttrsForFive2Four(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::vector<std::string> input_names = {"x"}; | |||||
| std::vector<std::string> output_names = {"output"}; | |||||
| AnfAlgo::SetNodeAttr("input_names", MakeValue(input_names), anf_node); | |||||
| AnfAlgo::SetNodeAttr("output_names", MakeValue(output_names), anf_node); | |||||
| std::vector<size_t> origin_shape = AnfAlgo::GetOutputInferShape(anf_node, 0); | |||||
| if (origin_shape.size() != kShape4dDims) { | |||||
| MS_LOG(EXCEPTION) << "The dim of origin_shape is not equal to 4, but it's dim is " << origin_shape.size() << "."; | |||||
| } | |||||
| std::vector<int> shape_transform; | |||||
| (void)std::transform(origin_shape.begin(), origin_shape.end(), std::back_inserter(shape_transform), | |||||
| [](const int &origin_shape) { return static_cast<int>(origin_shape); }); | |||||
| AnfAlgo::SetNodeAttr("shape4d", MakeValue(shape_transform), anf_node); | |||||
| AnfAlgo::SetNodeAttr("output_format", MakeValue(kOpFormat_NCHW), anf_node); | |||||
| TypeId dst_type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, 0); | |||||
| std::string dst_type; | |||||
| if (dst_type_id == kFloat32->type_id()) { | |||||
| dst_type = "float32"; | |||||
| } else if (dst_type_id == kFloat16->type_id()) { | |||||
| dst_type = "float16"; | |||||
| } | |||||
| AnfAlgo::SetNodeAttr("dstType", MakeValue(dst_type), anf_node); | |||||
| } | |||||
| void SetAkgAttrsForCast(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| // The x and output are akg op input and output param. | |||||
| std::vector<std::string> input_names = {"x", "dst_type"}; | |||||
| std::vector<std::string> output_names = {"output"}; | |||||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node); | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node); | |||||
| std::string dst_type; | |||||
| TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, 0); | |||||
| if (output_type == kFloat32->type_id()) { | |||||
| dst_type = "float32"; | |||||
| } else if (output_type == kFloat16->type_id()) { | |||||
| dst_type = "float16"; | |||||
| } else if (output_type == kInt32->type_id()) { | |||||
| dst_type = "int32"; | |||||
| } else { | |||||
| MS_LOG(WARNING) << "Unknown cast_to type: " << TypeIdToType(output_type)->ToString(); | |||||
| } | |||||
| AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node); | |||||
| } | |||||
| void SetAkgAttrsForBNGrad1(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::vector<std::string> input_names{"dy", "data", "mean"}; | |||||
| std::vector<std::string> output_names{"dgamma_red_hw", "dbeta_red_hw", "data_minus_mean"}; | |||||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node); | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node); | |||||
| } | |||||
| void SetAkgAttrsForBNGrad2(const AnfNodePtr &anf_node) { | |||||
| const size_t kBNGrad2InputSize = 5; | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::vector<std::string> input_names{"dgamma_red_hw", "dbeta_red_hw", "variance", "gamma"}; | |||||
| std::vector<std::string> output_names{"bn_scale", "bn_bias", "rs", "dgamma_dx", "dbeta_dx"}; | |||||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node); | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node); | |||||
| auto cnode = anf_node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (cnode->inputs().size() < kBNGrad2InputSize) { | |||||
| MS_LOG(EXCEPTION) << "The inputs size of BNGrad2 is less then " << kBNGrad2InputSize; | |||||
| } | |||||
| auto input1 = cnode->input(1); | |||||
| MS_EXCEPTION_IF_NULL(input1); | |||||
| auto tuple_getitem = input1->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(tuple_getitem); | |||||
| if (tuple_getitem->inputs().size() < kTupleGetItemInputSize) { | |||||
| MS_LOG(EXCEPTION) << "The inputs size of tuple_getitem is less then " << kTupleGetItemInputSize; | |||||
| } | |||||
| auto bn_grad1 = tuple_getitem->input(kRealInputNodeIndexInTupleGetItem); | |||||
| std::vector<size_t> data_shape = AnfAlgo::GetInputDeviceShape(bn_grad1, 0); | |||||
| AnfAlgo::SetNodeAttr(kAttrDataShape, MakeValue(opt::Convert2Int(data_shape)), anf_node); | |||||
| } | |||||
| void SetAkgAttrsForBNGrad3(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::vector<std::string> input_names{"dy", "rs", "dgamma_dx", "dbeta_dx", "data_minus_mean"}; | |||||
| std::vector<std::string> output_names{"dx"}; | |||||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node); | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node); | |||||
| } | |||||
| void SetAkgAttrsForFusedBN1(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| // Set attr for fused_bn1 | |||||
| std::vector<std::string> fused_bn1_input_names{"data"}; | |||||
| std::vector<std::string> fused_bn1_output_names{"mean", "var_part"}; | |||||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn1_input_names), anf_node); | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn1_output_names), anf_node); | |||||
| } | |||||
| void SetAkgAttrsForFusedBN2(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| // Set attr for fused_bn2 | |||||
| std::vector<std::string> fused_bn2_input_names{"mean", "var_part", "running_mean", "running_var"}; | |||||
| std::vector<std::string> fused_bn2_output_names{"variance", "running_mean", "running_variance"}; | |||||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn2_input_names), anf_node); | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn2_output_names), anf_node); | |||||
| } | |||||
| void SetAkgAttrsForFusedBN3(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| // Set attr for fused_bn3 | |||||
| std::vector<std::string> fused_bn3_input_names{"data", "mean", "variance", "gamma", "beta"}; | |||||
| std::vector<std::string> fused_bn3_output_names{"y"}; | |||||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn3_input_names), anf_node); | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn3_output_names), anf_node); | |||||
| } | |||||
| void SetAkgAttrsForConvBN1(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::vector<std::string> conv_bn1_output_names{"data", "var_part", "mean"}; | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(conv_bn1_output_names), anf_node); | |||||
| } | |||||
| void SetAkgAttrsForBN2AddRelu(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::vector<std::string> bn2_add_relu_input_names{"data", "var_part", "mean", "other_branch_data", | |||||
| "gamma", "beta", "running_mean", "running_var"}; | |||||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(bn2_add_relu_input_names), anf_node); | |||||
| std::vector<std::string> bn2_add_relu_output_names{"output", "running_mean", "running_variance", "save_inv_variance"}; | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(bn2_add_relu_output_names), anf_node); | |||||
| } | |||||
| void SetAkgAttrsForBN2Relu(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::vector<std::string> bn2_input_names{"data", "var_part", "mean", "gamma", "beta", "running_mean", "running_var"}; | |||||
| std::vector<std::string> bn2_output_names{"y", "running_mean", "running_variance", "save_inv_variance"}; | |||||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(bn2_input_names), anf_node); | |||||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(bn2_output_names), anf_node); | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,58 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H | |||||
| #define MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include "ir/anf.h" | |||||
| #include "utils/utils.h" | |||||
| #include "frontend/operator/ops.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void SetAkgAttrsForFour2Five(const AnfNodePtr &anf_node); | |||||
| void SetAkgAttrsForFive2Four(const AnfNodePtr &anf_node); | |||||
| void SetAkgAttrsForCast(const AnfNodePtr &anf_node); | |||||
| void SetAkgAttrsForBNGrad1(const AnfNodePtr &anf_node); | |||||
| void SetAkgAttrsForBNGrad2(const AnfNodePtr &anf_node); | |||||
| void SetAkgAttrsForBNGrad3(const AnfNodePtr &anf_node); | |||||
| void SetAkgAttrsForFusedBN1(const AnfNodePtr &anf_node); | |||||
| void SetAkgAttrsForFusedBN2(const AnfNodePtr &anf_node); | |||||
| void SetAkgAttrsForFusedBN3(const AnfNodePtr &anf_node); | |||||
| void SetAkgAttrsForConvBN1(const AnfNodePtr &anf_node); | |||||
| void SetAkgAttrsForBN2AddRelu(const AnfNodePtr &anf_node); | |||||
| void SetAkgAttrsForBN2Relu(const AnfNodePtr &anf_node); | |||||
| const std::unordered_map<std::string, std::function<void(const AnfNodePtr &anf_node)>> kAkgKernelAttrsProcessMap = { | |||||
| {kFour2FiveOpName, SetAkgAttrsForFour2Five}, | |||||
| {kFive2FourOpName, SetAkgAttrsForFive2Four}, | |||||
| {"Cast", SetAkgAttrsForCast}, | |||||
| {kBNGrad1OpName, SetAkgAttrsForBNGrad1}, | |||||
| {kBNGrad2OpName, SetAkgAttrsForBNGrad2}, | |||||
| {kBNGrad3OpName, SetAkgAttrsForBNGrad3}, | |||||
| {kFusedBN1OpName, SetAkgAttrsForFusedBN1}, | |||||
| {kFusedBN2OpName, SetAkgAttrsForFusedBN2}, | |||||
| {kFusedBN3OpName, SetAkgAttrsForFusedBN3}, | |||||
| {kConvBN1OpName, SetAkgAttrsForConvBN1}, | |||||
| {kBN2AddReluOpName, SetAkgAttrsForBN2AddRelu}, | |||||
| {kBN2ReLUOpName, SetAkgAttrsForBN2Relu}, | |||||
| }; | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H | |||||
| @@ -0,0 +1,623 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/akg/akg_kernel_build.h" | |||||
| #include <Python.h> | |||||
| #include <sys/types.h> | |||||
| #include <signal.h> | |||||
| #include <unistd.h> | |||||
| #include <dirent.h> | |||||
| #include <cctype> | |||||
| #include <cstdint> | |||||
| #include <memory> | |||||
| #include <map> | |||||
| #include <utility> | |||||
| #include <algorithm> | |||||
| #include <functional> | |||||
| #include <sstream> | |||||
| #include <iterator> | |||||
| #include <numeric> | |||||
| #include <unordered_set> | |||||
| #include "common/utils.h" | |||||
| #include "utils/convert_utils.h" | |||||
| #include "utils/any.h" | |||||
| #include "utils/utils.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| constexpr int ME_MAX_KERNEL_NAME_LENGTH = 200; | |||||
| constexpr int32_t ARGS_SIZE = 1; | |||||
| constexpr auto kCompileWithJsonFunc = "compilewithjson"; | |||||
| // json key | |||||
| constexpr auto kOpDesc = "op_desc"; | |||||
| constexpr auto kInputDesc = "input_desc"; | |||||
| constexpr auto kShape = "shape"; | |||||
| constexpr auto kDataType = "data_type"; | |||||
| constexpr auto kOutputDesc = "output_desc"; | |||||
| constexpr auto kName = "name"; | |||||
| constexpr auto kTensorName = "tensor_name"; | |||||
| constexpr auto kValue = "value"; | |||||
| constexpr auto KDynInputSizes = "dyn_input_sizes"; | |||||
| constexpr auto KInputNames = "input_names"; | |||||
| constexpr auto KInput = "input"; | |||||
| constexpr auto KDtype = "dtype"; | |||||
| namespace { | |||||
| template <typename T> | |||||
| std::string Vector2Str(const std::vector<T> &inputs) { | |||||
| if (!inputs.empty()) { | |||||
| std::ostringstream oss; | |||||
| (void)std::copy(inputs.begin(), inputs.end() - 1, std::ostream_iterator<T>(oss, ", ")); | |||||
| oss << inputs.back(); | |||||
| return oss.str(); | |||||
| } | |||||
| return ""; | |||||
| } | |||||
| } // namespace | |||||
| std::string AkgKernelBuild::PyObjectToStr(PyObject *const PyObj) { | |||||
| char *pChar = nullptr; | |||||
| std::string str_res; | |||||
| if (PyObj == nullptr) { | |||||
| MS_LOG(ERROR) << "Input parameter is nullptr."; | |||||
| return str_res; | |||||
| } | |||||
| PyObject *strArgs = PyObject_Str(PyObj); | |||||
| if (strArgs != nullptr) { | |||||
| (void)PyArg_Parse(strArgs, "s", &pChar); | |||||
| } | |||||
| if (pChar == nullptr) { | |||||
| MS_LOG(ERROR) << "pChar is nullptr."; | |||||
| return str_res; | |||||
| } | |||||
| str_res = pChar; | |||||
| return str_res; | |||||
| } | |||||
| std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag, | |||||
| const std::pair<size_t, size_t> &position) { | |||||
| if (node_json.count(tag) == 0) { | |||||
| MS_LOG(ERROR) << "Node [" << node_json.dump() << "] has no key [" << tag << "]."; | |||||
| return ""; | |||||
| } | |||||
| auto const &tag_desc = node_json[tag]; | |||||
| nlohmann::json first_index; | |||||
| if (tag == kOutputDesc) { | |||||
| first_index = tag_desc; | |||||
| } else if (!tag_desc.is_array() || tag_desc.size() <= position.first) { | |||||
| MS_LOG(ERROR) << "Node [" << tag_desc.dump() << "] has no enough value [" << position.first << "]."; | |||||
| return ""; | |||||
| } else { | |||||
| first_index = tag_desc[position.first]; | |||||
| } | |||||
| if (!first_index.is_array() || first_index.size() <= position.second) { | |||||
| MS_LOG(ERROR) << "Node [" << first_index.dump() << "] has no enough value [" << position.second << "]."; | |||||
| return ""; | |||||
| } | |||||
| auto const &second_index = first_index[position.second]; | |||||
| if (second_index.count(kTensorName) == 0) { | |||||
| MS_LOG(ERROR) << "Node [" << second_index.dump() << "] has no key [" << kTensorName << "]."; | |||||
| return ""; | |||||
| } | |||||
| return second_index[kTensorName]; | |||||
| } | |||||
| void SetTensorName(const std::string &tag, const std::string &new_name, const std::pair<size_t, size_t> &position, | |||||
| nlohmann::json *const node_json) { | |||||
| MS_EXCEPTION_IF_NULL(node_json); | |||||
| if (node_json->count(tag) == 0) { | |||||
| MS_LOG(ERROR) << "Node [" << node_json->dump() << "] has no key [" << tag << "]."; | |||||
| return; | |||||
| } | |||||
| nlohmann::json *tag_desc = &((*node_json)[tag]); | |||||
| nlohmann::json *first_index; | |||||
| if (tag == kOutputDesc) { | |||||
| first_index = tag_desc; | |||||
| } else if (!tag_desc->is_array() || tag_desc->size() <= position.first) { | |||||
| MS_LOG(ERROR) << "Node [" << tag_desc->dump() << "] has no enough value [" << position.first << "]."; | |||||
| return; | |||||
| } else { | |||||
| first_index = &((*tag_desc)[position.first]); | |||||
| } | |||||
| if (!first_index->is_array() || first_index->size() <= position.second) { | |||||
| MS_LOG(ERROR) << "Node [" << first_index->dump() << "] has no enough value [" << position.second << "]."; | |||||
| return; | |||||
| } | |||||
| nlohmann::json *second_index = &((*first_index)[position.second]); | |||||
| if (second_index->count(kTensorName) == 0) { | |||||
| MS_LOG(ERROR) << "Node [" << second_index->dump() << "] has no key [" << kTensorName << "]."; | |||||
| return; | |||||
| } | |||||
| (*second_index)[kTensorName] = new_name; | |||||
| return; | |||||
| } | |||||
| int AkgKernelBuild::op_cnt_ = 0; | |||||
| std::mutex AkgKernelBuild::op_cnt_mtx_; | |||||
| std::string AkgKernelBuild::GetProcessor(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::string device; | |||||
| switch (AnfAlgo::GetProcessor(anf_node)) { | |||||
| case Processor::AICORE: | |||||
| device = kProcessorAiCore; | |||||
| break; | |||||
| case Processor::AICPU: | |||||
| device = kProcessorAiCpu; | |||||
| break; | |||||
| case Processor::CUDA: | |||||
| device = kProcessorCuda; | |||||
| break; | |||||
| default: | |||||
| MS_LOG(ERROR) << "Unknown processor type."; | |||||
| break; | |||||
| } | |||||
| return device; | |||||
| } | |||||
| bool GetIOSize(const nlohmann::json &node_json, std::vector<size_t> *const input_size, | |||||
| std::vector<size_t> *const output_size) { | |||||
| if (input_size == nullptr || output_size == nullptr) { | |||||
| MS_LOG(ERROR) << "input size or output size is nullptr"; | |||||
| return false; | |||||
| } | |||||
| input_size->clear(); | |||||
| output_size->clear(); | |||||
| for (size_t i = 0; i < node_json[kInputDesc].size(); i++) { | |||||
| for (size_t m = 0; m < node_json[kInputDesc][i].size(); m++) { | |||||
| std::string dtype = node_json[kInputDesc][i][m][kDataType]; | |||||
| size_t nbyte = GetDtypeNbyte(dtype); | |||||
| size_t size_i = std::accumulate(node_json[kInputDesc][i][m][kShape].begin(), | |||||
| node_json[kInputDesc][i][m][kShape].end(), nbyte, std::multiplies<size_t>()); | |||||
| input_size->push_back(size_i); | |||||
| } | |||||
| } | |||||
| for (size_t i = 0; i < node_json[kOutputDesc].size(); i++) { | |||||
| std::string dtype = node_json[kOutputDesc][i][kDataType]; | |||||
| size_t nbyte = GetDtypeNbyte(dtype); | |||||
| size_t size_i = std::accumulate(node_json[kOutputDesc][i][kShape].begin(), node_json[kOutputDesc][i][kShape].end(), | |||||
| nbyte, std::multiplies<size_t>()); | |||||
| output_size->push_back(size_i); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| int AkgKernelBuild::GetOpCntInc() { | |||||
| op_cnt_mtx_.lock(); | |||||
| int cnt = op_cnt_++; | |||||
| op_cnt_mtx_.unlock(); | |||||
| return cnt; | |||||
| } | |||||
| bool AkgKernelBuild::CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const inputs_json) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(inputs_json); | |||||
| // for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input. | |||||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||||
| auto op_info = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG); | |||||
| if (op_info == nullptr) { | |||||
| MS_LOG(ERROR) << "Apply kernel [" << op_name << "] op_info is nullptr"; | |||||
| return false; | |||||
| } | |||||
| std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr = op_info->inputs_ptr(); | |||||
| if (inputs_ptr.empty()) { | |||||
| MS_LOG(INFO) << "Apply kernel [" << op_name << "] regist info has no input info"; | |||||
| return true; | |||||
| } | |||||
| auto op_info_input_num = inputs_ptr.size(); | |||||
| // for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input. | |||||
| std::vector<int> dyn_input_sizes; | |||||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(primitive); | |||||
| if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) { | |||||
| dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes)); | |||||
| } | |||||
| size_t real_input_index = 0; | |||||
| std::vector<nlohmann::json> input_list; | |||||
| for (size_t i = 0; i < op_info_input_num; i++) { | |||||
| size_t input_tensor_num; | |||||
| std::shared_ptr<OpIOInfo> input_ptr = inputs_ptr[i]; | |||||
| std::string op_input_name; | |||||
| if (input_ptr == nullptr) { | |||||
| MS_LOG(ERROR) << "Apply kernel [" << op_name << "] regist input[" << i << "] is nullptr"; | |||||
| return false; | |||||
| } | |||||
| op_input_name = input_ptr->name(); | |||||
| if (dyn_input_sizes.empty()) { | |||||
| input_tensor_num = 1; | |||||
| } else { | |||||
| input_tensor_num = IntToSize(dyn_input_sizes[i]); | |||||
| } | |||||
| input_list.clear(); | |||||
| for (size_t input_i = 0; input_i < input_tensor_num; input_i++) { | |||||
| // dtype : float16 | |||||
| auto type_id = AnfAlgo::GetInputDeviceDataType(anf_node, real_input_index); | |||||
| std::string dtype = TypeId2String(type_id); | |||||
| if (dtype.empty()) { | |||||
| MS_LOG(ERROR) << "Op [" << op_name << "] input [" << input_i << "] data type is null. "; | |||||
| return false; | |||||
| } | |||||
| nlohmann::json input_desc_json; | |||||
| input_desc_json[kDataType] = dtype; | |||||
| input_desc_json[kName] = op_input_name; | |||||
| input_desc_json[kTensorName] = "input_" + std::to_string(GetInputTensorIdxInc(anf_node, real_input_index)); | |||||
| auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, real_input_index); | |||||
| if (anf_node->func_graph() != nullptr && anf_node->func_graph()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL) && | |||||
| GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) { | |||||
| MS_LOG(WARNING) << "we take input[" << real_input_index << "] of [" << anf_node->DebugString(2) | |||||
| << "] as const tensor, shape: [" << Vector2Str(input_shape) | |||||
| << "], value: " << input_desc_json[kValue]; | |||||
| input_shape.clear(); | |||||
| } | |||||
| if (input_shape.empty()) { | |||||
| input_shape.push_back(1); | |||||
| } | |||||
| input_desc_json[kShape] = input_shape; | |||||
| input_list.emplace_back(input_desc_json); | |||||
| real_input_index++; | |||||
| } | |||||
| inputs_json->emplace_back(input_list); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool AkgKernelBuild::CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const outputs_json) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(outputs_json); | |||||
| size_t output_tensor_num = AnfAlgo::GetOutputTensorNum(anf_node); | |||||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG); | |||||
| auto outputs = op_info_ptr->outputs_ptr(); | |||||
| for (size_t i = 0; i < output_tensor_num; i++) { | |||||
| nlohmann::json output_json; | |||||
| auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, i); | |||||
| std::string dtype = TypeId2String(type_id); | |||||
| if (dtype.empty()) { | |||||
| MS_LOG(ERROR) << "Op [" << op_name << "] output [" << i << "] data type is null. "; | |||||
| return false; | |||||
| } | |||||
| std::string output_name = outputs[i]->name(); | |||||
| output_json[kDataType] = dtype; | |||||
| output_json[kName] = output_name; | |||||
| output_json[kTensorName] = "output_" + std::to_string(i) + "_" + std::to_string(GetOutputTensorIdxInc()); | |||||
| output_json[kShape] = AnfAlgo::GetOutputDeviceShape(anf_node, i); | |||||
| outputs_json->push_back(output_json); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| void GetJson(const AnfNodePtr &anf_node, const std::vector<int> &dyn_input_sizes, | |||||
| const std::shared_ptr<OpAttr> &op_attr, nlohmann::json *const attr_json, const ValuePtr &attr_value) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(op_attr); | |||||
| MS_EXCEPTION_IF_NULL(attr_json); | |||||
| std::string type = op_attr->type(); | |||||
| if (type == "int") { | |||||
| (*attr_json)[kValue] = GetValue<int>(attr_value); | |||||
| } else if (type == "str") { | |||||
| (*attr_json)[kValue] = GetValue<std::string>(attr_value); | |||||
| } else if (type == "bool") { | |||||
| (*attr_json)[kValue] = GetValue<bool>(attr_value); | |||||
| } else if (type == "float") { | |||||
| (*attr_json)[kValue] = GetValue<float>(attr_value); | |||||
| } else if (type == "listInt") { | |||||
| (*attr_json)[kValue] = GetValue<std::vector<int>>(attr_value); | |||||
| } else if (type == "listStr") { | |||||
| std::vector<std::string> data_format; | |||||
| if (op_attr->name() == kArgDataformat) { | |||||
| size_t tensor_args_num = !dyn_input_sizes.empty() ? dyn_input_sizes.size() : AnfAlgo::GetInputTensorNum(anf_node); | |||||
| for (size_t format_i = 0; format_i < tensor_args_num; format_i++) { | |||||
| auto input_format = AnfAlgo::GetInputFormat(anf_node, format_i); | |||||
| data_format.push_back(input_format); | |||||
| } | |||||
| } else { | |||||
| data_format = GetValue<std::vector<std::string>>(attr_value); | |||||
| } | |||||
| (*attr_json)[kValue] = data_format; | |||||
| } else { | |||||
| MS_LOG(WARNING) << "attr type:" << type; | |||||
| } | |||||
| } | |||||
| bool AkgKernelBuild::CreateAttrDescJson(const AnfNodePtr &anf_node, const std::string &op_name, | |||||
| const std::shared_ptr<OpInfo> &op_info, nlohmann::json *const attrs_json) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(attrs_json); | |||||
| MS_EXCEPTION_IF_NULL(op_info); | |||||
| std::vector<std::shared_ptr<OpAttr>> attrs = op_info->attrs_ptr(); | |||||
| if (attrs.empty()) { | |||||
| MS_LOG(INFO) << "Apply kernel [" << op_name << "] op info attrs is empty"; | |||||
| return true; | |||||
| } | |||||
| std::vector<std::shared_ptr<OpIOInfo>> inputs = op_info->inputs_ptr(); | |||||
| std::vector<int> dyn_input_sizes; | |||||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(primitive); | |||||
| if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) { | |||||
| dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes)); | |||||
| } | |||||
| if (inputs.empty()) { | |||||
| MS_LOG(ERROR) << "Apply kernel [" << op_name << "] op info inputs is empty"; | |||||
| return false; | |||||
| } | |||||
| // create input name list for atch "x_shape" in att with "x" in primitive. | |||||
| std::map<size_t, std::string> op_info_shape_name; | |||||
| for (size_t op_info_input_i = 0; op_info_input_i < inputs.size(); op_info_input_i++) { | |||||
| std::string input_name = inputs[op_info_input_i]->name(); | |||||
| std::string x_shape_name = input_name + "_shape"; | |||||
| (void)op_info_shape_name.insert(make_pair(op_info_input_i, x_shape_name)); | |||||
| } | |||||
| for (const auto &op_attr : attrs) { | |||||
| nlohmann::json attr_json; | |||||
| ValuePtr attr_value = primitive->GetAttr(op_attr->name()); | |||||
| if (attr_value == nullptr && op_attr->name() != kArgDataformat) { | |||||
| if (op_attr->param_type() == "required") { | |||||
| // match "x_shape" in att with "x" in primitive. | |||||
| std::string attr_name = op_attr->name(); | |||||
| auto find_item = std::find_if( | |||||
| op_info_shape_name.begin(), op_info_shape_name.end(), | |||||
| [attr_name](const std::map<size_t, std::string>::value_type item) { return item.second == attr_name; }); | |||||
| if (find_item != op_info_shape_name.end()) { | |||||
| if (!dyn_input_sizes.empty()) { | |||||
| if (find_item->first >= dyn_input_sizes.size() - 1) { | |||||
| MS_LOG(EXCEPTION) << "dyn_input_sizes list index:" << find_item->first | |||||
| << " is out of range:" << dyn_input_sizes.size() - 1 << "."; | |||||
| return false; | |||||
| } | |||||
| size_t tensor_idx = IntToSize(std::accumulate(&dyn_input_sizes[0], &dyn_input_sizes[find_item->first], 0)); | |||||
| for (int input_i = 0; input_i < dyn_input_sizes[find_item->first]; input_i++) { | |||||
| attr_json[kValue] = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, tensor_idx); | |||||
| attr_json[kName] = op_attr->name(); | |||||
| attrs_json->push_back(attr_json); | |||||
| tensor_idx++; | |||||
| } | |||||
| } else { | |||||
| attr_json[kValue] = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, find_item->first); | |||||
| attr_json[kName] = op_attr->name(); | |||||
| attrs_json->push_back(attr_json); | |||||
| } | |||||
| } else { | |||||
| MS_LOG(ERROR) << "op [" << op_name << "] should have attr :" << op_attr->name(); | |||||
| return false; | |||||
| } | |||||
| } | |||||
| continue; | |||||
| } | |||||
| GetJson(anf_node, dyn_input_sizes, op_attr, &attr_json, attr_value); | |||||
| attr_json[kName] = op_attr->name(); | |||||
| attrs_json->push_back(attr_json); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool AkgKernelBuild::GenerateSingleKernelJson(const AnfNodePtr &anf_node, const std::string &op_name, | |||||
| nlohmann::json *const node_json) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(node_json); | |||||
| int op_cnt = GetOpCntInc(); | |||||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG); | |||||
| MS_EXCEPTION_IF_NULL(op_info_ptr); | |||||
| // get basic params from currentNodeOpDesc | |||||
| (*node_json)[kName] = op_name; | |||||
| (*node_json)["impl_path"] = op_info_ptr->impl_path(); | |||||
| (*node_json)["process"] = AkgKernelBuild::GetProcessor(anf_node); | |||||
| (*node_json)["composite"] = false; | |||||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(primitive); | |||||
| ValuePtr input_names_v = primitive->GetAttr(KInputNames); | |||||
| if (input_names_v == nullptr) { | |||||
| MS_LOG(ERROR) << "ApplyKernel has no input_names, op[" << op_name << "]."; | |||||
| return false; | |||||
| } | |||||
| std::vector<std::string> prim_input_names = GetValue<const std::vector<std::string>>(input_names_v); | |||||
| std::string inputs_name; | |||||
| for (const auto &prim_input_name : prim_input_names) { | |||||
| (void)inputs_name.append("_input_").append(prim_input_name).append("_"); | |||||
| } | |||||
| // input desc | |||||
| nlohmann::json inputs_json; | |||||
| if (!CreateInputDescJson(anf_node, &inputs_json)) { | |||||
| MS_LOG(ERROR) << "Create input desc json failed, op[" << op_name << "]."; | |||||
| return false; | |||||
| } | |||||
| (*node_json)[kInputDesc] = inputs_json; | |||||
| MS_LOG(INFO) << "Akg create input desc json success."; | |||||
| std::string inputs_shape = "inputs_shape_"; | |||||
| for (auto &i : inputs_json) { | |||||
| for (auto &m : i) { | |||||
| std::string data_type = m[kDataType]; | |||||
| (void)inputs_shape.append("_").append(data_type).append("_"); | |||||
| for (auto &j : m[kShape]) { | |||||
| size_t n = j; | |||||
| (void)inputs_shape.append(std::to_string(n)).append("_"); | |||||
| } | |||||
| } | |||||
| } | |||||
| // output desc | |||||
| nlohmann::json outputs_json; | |||||
| if (!CreateOutputDescJson(anf_node, &outputs_json)) { | |||||
| MS_LOG(ERROR) << "Create output desc json failed, op[" << op_name << "]."; | |||||
| return false; | |||||
| } | |||||
| (*node_json)[kOutputDesc] = outputs_json; | |||||
| MS_LOG(INFO) << "Akg create output desc json success."; | |||||
| std::string outputs_shape = "outputs_shape_"; | |||||
| for (auto &i : outputs_json) { | |||||
| std::string data_type = i[kDataType]; | |||||
| (void)outputs_shape.append("_").append(data_type).append("_"); | |||||
| for (auto &j : i[kShape]) { | |||||
| size_t m = j; | |||||
| (void)outputs_shape.append(std::to_string(m)).append("_"); | |||||
| } | |||||
| } | |||||
| // attribute desc | |||||
| nlohmann::json attrs_json; | |||||
| if (!CreateAttrDescJson(anf_node, op_name, op_info_ptr, &attrs_json)) { | |||||
| MS_LOG(ERROR) << "Create attr desc json failed, op[" << op_name << "]."; | |||||
| return false; | |||||
| } | |||||
| (*node_json)["attr"] = attrs_json; | |||||
| std::string json_str = node_json->dump(); | |||||
| size_t hash_id = std::hash<std::string>()(json_str); | |||||
| json_name_ = op_name + "_"; | |||||
| (void)json_name_.append(std::to_string(hash_id)); | |||||
| MS_LOG(INFO) << "full scope name is : " << anf_node->fullname_with_scope() << ", json info name is : " << json_name_; | |||||
| json_info_ = json_str; | |||||
| (*node_json)["id"] = op_cnt; | |||||
| (*node_json)["op"] = json_name_; | |||||
| MS_LOG(INFO) << "Akg create node desc json success."; | |||||
| return true; | |||||
| } | |||||
| KernelPackPtr AkgKernelBuild::OpBuild(const std::string &node_json, const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| auto processor = AkgKernelBuild::GetProcessor(anf_node); | |||||
| auto cached_kernel_pack = SearchCache(json_name_, processor); | |||||
| if (cached_kernel_pack != nullptr) { | |||||
| MS_LOG(INFO) << "Use cached kernel, json_name_[" << json_name_ << "], fullname_with_scope[" | |||||
| << anf_node->fullname_with_scope() << "]."; | |||||
| return cached_kernel_pack; | |||||
| } | |||||
| PyObject *pModule = nullptr; | |||||
| PyObject *pFunc = nullptr; | |||||
| PyObject *pArg = nullptr; | |||||
| PyObject *pRes = nullptr; | |||||
| pModule = PyImport_ImportModule(kAkgModule); | |||||
| if (pModule == nullptr) { | |||||
| MS_LOG(ERROR) << "Failed to import [" << kAkgModule << "]."; | |||||
| return nullptr; | |||||
| } | |||||
| pFunc = PyObject_GetAttrString(pModule, kCompileWithJsonFunc); | |||||
| pArg = PyTuple_New(ARGS_SIZE); | |||||
| (void)PyTuple_SetItem(pArg, 0, Py_BuildValue("s", node_json.c_str())); | |||||
| (void)alarm(AUTODIFF_COMPILE_OVERTIME); | |||||
| pRes = PyEval_CallObject(pFunc, pArg); | |||||
| (void)alarm(0); | |||||
| if (pRes == nullptr) { | |||||
| MS_LOG(ERROR) << "No ret got, failed to call function [" << kCompileWithJsonFunc << "], args:\n(" | |||||
| << AkgKernelBuild::PyObjectToStr(pArg) << ")."; | |||||
| return nullptr; | |||||
| } | |||||
| if (PyObject_IsTrue(pRes) != 1) { | |||||
| MS_LOG(ERROR) << "Illegal ret, failed to call function [" << kCompileWithJsonFunc << "], args:\n(" | |||||
| << AkgKernelBuild::PyObjectToStr(pArg) << ")."; | |||||
| return nullptr; | |||||
| } | |||||
| auto new_kernel_pack = InsertCache(json_name_, processor); | |||||
| kernel::SaveJsonInfo(json_name_, json_info_); | |||||
| if (new_kernel_pack == nullptr) { | |||||
| MS_LOG(ERROR) << "Insert to cache failed, json_name_[" << json_name_ << "], fullname_with_scope[" | |||||
| << anf_node->fullname_with_scope() << "]."; | |||||
| return nullptr; | |||||
| } | |||||
| return new_kernel_pack; | |||||
| } | |||||
| KernelPackPtr AkgKernelBuild::BuildByJson(const AnfNodePtr &anf_node, std::vector<size_t> *const input_size, | |||||
| std::vector<size_t> *const output_size) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||||
| auto it = kAkgKernelAttrsProcessMap.find(op_name); | |||||
| if (it != kAkgKernelAttrsProcessMap.end()) { | |||||
| it->second(anf_node); | |||||
| } | |||||
| MS_LOG(INFO) << "Akg start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; | |||||
| nlohmann::json node_json; | |||||
| if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { | |||||
| MS_LOG(ERROR) << "Op[" << op_name << "] create single kernel json failed."; | |||||
| } | |||||
| std::string json_str = node_json.dump(); | |||||
| auto kernel_pack = OpBuild(json_str, anf_node); | |||||
| if (kernel_pack == nullptr) { | |||||
| MS_LOG(ERROR) << "Akg build failed op[" << op_name << "], json:" << json_str; | |||||
| return nullptr; | |||||
| } | |||||
| if (!GetIOSize(node_json, input_size, output_size)) { | |||||
| MS_LOG(ERROR) << "Cal mem size failed."; | |||||
| return nullptr; | |||||
| } | |||||
| MS_LOG(INFO) << "Akg compile success, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) | |||||
| << "]"; | |||||
| return kernel_pack; | |||||
| } | |||||
| size_t AkgKernelBuild::GetInputTensorIdxInc(const AnfNodePtr &anf_node, size_t input_idx) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| auto cnode = anf_node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (input_idx + 1 >= cnode->inputs().size()) { | |||||
| MS_EXCEPTION(ArgumentError) << "input_idx [" << input_idx << "] is out of index of inputs of [" | |||||
| << cnode->inputs().size() - 1 << "][" << cnode->DebugString() << "]"; | |||||
| } | |||||
| auto input_node = cnode->input(input_idx + 1); | |||||
| if (input_tensor_idx_.find(input_node) == input_tensor_idx_.end()) { | |||||
| size_t index = input_tensor_idx_.size(); | |||||
| input_tensor_idx_[input_node] = index; | |||||
| } | |||||
| return input_tensor_idx_[input_node]; | |||||
| } | |||||
| size_t AkgKernelBuild::GetOutputTensorIdxInc() { | |||||
| size_t idx = output_tensor_idx_++; | |||||
| return idx; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,76 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_ | |||||
| #include <unordered_map> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include <map> | |||||
| #include <utility> | |||||
| #include "backend/kernel_compiler/kernel.h" | |||||
| #include "ir/dtype.h" | |||||
| #include <nlohmann/json.hpp> | |||||
| #include "backend/kernel_compiler/common_utils.h" | |||||
| #include "backend/kernel_compiler/oplib/oplib.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class AkgKernelBuild { | |||||
| public: | |||||
| AkgKernelBuild() { | |||||
| input_tensor_idx_ = {}; | |||||
| output_tensor_idx_ = 0; | |||||
| } | |||||
| ~AkgKernelBuild() = default; | |||||
| KernelPackPtr BuildByJson(const AnfNodePtr &anf_node, std::vector<size_t> *const input_size, | |||||
| std::vector<size_t> *const output_size); | |||||
| static std::string GetProcessor(const AnfNodePtr &anf_node); | |||||
| static std::string PyObjectToStr(PyObject *const PyObj); | |||||
| protected: | |||||
| bool CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const inputs_json); | |||||
| bool CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const outputs_json); | |||||
| bool CreateAttrDescJson(const AnfNodePtr &anf_node, const std::string &op_name, | |||||
| const std::shared_ptr<OpInfo> &op_info, nlohmann::json *const attrs_json); | |||||
| KernelPackPtr OpBuild(const std::string &node_json, const AnfNodePtr &anf_node); | |||||
| int GetOpCntInc(); | |||||
| size_t GetInputTensorIdxInc(const AnfNodePtr &anf_node, size_t input_idx); | |||||
| size_t GetOutputTensorIdxInc(); | |||||
| bool GenerateSingleKernelJson(const AnfNodePtr &anf_node, const std::string &op_name, | |||||
| nlohmann::json *const node_json); | |||||
| static int op_cnt_; | |||||
| // lock for variable fusionOpCnt in singleton mode | |||||
| static std::mutex op_cnt_mtx_; | |||||
| std::string json_name_; | |||||
| std::string json_info_; | |||||
| std::unordered_map<AnfNodePtr, size_t> input_tensor_idx_; | |||||
| size_t output_tensor_idx_; | |||||
| }; | |||||
| bool GetIOSize(const nlohmann::json &node_json, std::vector<size_t> *const input_size, | |||||
| std::vector<size_t> *const output_size); | |||||
| void SetTensorName(const std::string &tag, const std::string &new_name, const std::pair<size_t, size_t> &position, | |||||
| nlohmann::json *const node_json); | |||||
| std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag, | |||||
| const std::pair<size_t, size_t> &position); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_ | |||||
| @@ -0,0 +1,50 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/akg/akg_kernel_metadata.h" | |||||
| #include <memory> | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "backend/kernel_compiler/oplib/oplib.h" | |||||
| #include "backend/kernel_compiler/common_utils.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void AkgMetadataInfo(const CNodePtr &kernel_node, | |||||
| std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| MS_EXCEPTION_IF_NULL(kernel_info_list); | |||||
| std::string op_name = AnfAlgo::GetCNodeName(kernel_node); | |||||
| for (size_t i = 0; i < support_devices.size(); i++) { | |||||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG); | |||||
| if (op_info_ptr == nullptr) { | |||||
| continue; | |||||
| } | |||||
| if (!ParseMetadata(kernel_node, op_info_ptr, Processor(i), kernel_info_list)) { | |||||
| MS_LOG(WARNING) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "] failed."; | |||||
| } else { | |||||
| MS_LOG(DEBUG) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "]."; | |||||
| break; | |||||
| } | |||||
| } | |||||
| if (kernel_info_list->empty()) { | |||||
| MS_LOG(WARNING) << "Akg dose not has metadata of op[" << op_name << "]."; | |||||
| } | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,31 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <unordered_map> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/kernel_build_info.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void AkgMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ | |||||
| @@ -0,0 +1,422 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h" | |||||
| #include <algorithm> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <unordered_set> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include <Python.h> | |||||
| #include "ir/dtype.h" | |||||
| #include "ir/func_graph.h" | |||||
| #include "backend/kernel_compiler/kernel.h" | |||||
| #include "backend/kernel_compiler/common_utils.h" | |||||
| #include "backend/kernel_compiler/tbe/tbe_utils.h" | |||||
| #include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h" | |||||
| #include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| constexpr int32_t PARALLEL_ARGS_SIZE = 3; | |||||
| constexpr int32_t PROCESS_NUM = 16; | |||||
| constexpr int32_t TIME_OUT = 300; | |||||
| constexpr auto kOpDesc = "op_desc"; | |||||
| constexpr auto kShape = "shape"; | |||||
| constexpr auto kDataType = "data_type"; | |||||
| constexpr auto kInputDesc = "input_desc"; | |||||
| constexpr auto kOutputDesc = "output_desc"; | |||||
| constexpr auto kTensorName = "tensor_name"; | |||||
| constexpr auto kCompileAkgKernelParallelFunc = "compile_akg_kernel_parallel"; | |||||
| constexpr auto kMultiProcModule = "mindspore._extends.parallel_compile.akg_compiler.multi_process_compiler"; | |||||
| namespace { | |||||
| void UpdateTensorNameInJson(const std::vector<AnfNodePtr> &anf_nodes, | |||||
| std::map<AnfNodePtr, nlohmann::json> *node_json_map) { | |||||
| for (auto const &anf_node : anf_nodes) { | |||||
| std::vector<int> dyn_input_sizes; | |||||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(primitive); | |||||
| if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) { | |||||
| dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes)); | |||||
| } | |||||
| bool is_dynamic_input = !dyn_input_sizes.empty(); | |||||
| size_t input_num = is_dynamic_input ? dyn_input_sizes.size() : AnfAlgo::GetInputTensorNum(anf_node); | |||||
| size_t real_input_index = 0; | |||||
| for (size_t i = 0; i < input_num; ++i) { | |||||
| size_t input_tensor_num = is_dynamic_input ? IntToSize(dyn_input_sizes[i]) : 1; | |||||
| for (size_t j = 0; j < input_tensor_num; ++j) { | |||||
| auto tmp_input = GetKernelInput(anf_node, real_input_index); | |||||
| std::string tensor_name = GetTensorName((*node_json_map)[anf_node], kInputDesc, std::make_pair(i, j)); | |||||
| if (node_json_map->find(tmp_input.first) != node_json_map->end()) { | |||||
| std::string new_tensor_name = | |||||
| GetTensorName((*node_json_map)[tmp_input.first], kOutputDesc, std::make_pair(0, tmp_input.second)); | |||||
| SetTensorName(kInputDesc, new_tensor_name, std::make_pair(i, j), &((*node_json_map)[anf_node])); | |||||
| MS_LOG(DEBUG) << "Update [" << real_input_index << "] input [" << tensor_name << "] of [" | |||||
| << anf_node->fullname_with_scope() << "] to [" << tmp_input.second << "] output [" | |||||
| << new_tensor_name << "] of [" << tmp_input.first->fullname_with_scope() << "]."; | |||||
| } else { | |||||
| MS_LOG(DEBUG) << "[" << real_input_index << "] input " << tensor_name << "] of [" | |||||
| << anf_node->fullname_with_scope() << "] is out input."; | |||||
| } | |||||
| real_input_index++; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| nlohmann::json GetInputsJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list, | |||||
| std::map<AnfNodePtr, nlohmann::json> *node_json_map) { | |||||
| nlohmann::json inputs_json; | |||||
| auto input_index = GetInputIndex(anf_nodes, input_list); | |||||
| for (size_t i = 0; i < input_index.size(); ++i) { | |||||
| auto tmp_input = input_index[i]; | |||||
| auto type_id = AnfAlgo::GetInputDeviceDataType(tmp_input.first, tmp_input.second.first); | |||||
| std::string dtype = TypeId2String(type_id); | |||||
| nlohmann::json input_desc_json; | |||||
| input_desc_json[kTensorName] = GetTensorName((*node_json_map)[tmp_input.first], kInputDesc, tmp_input.second); | |||||
| input_desc_json[kDataType] = dtype; | |||||
| input_desc_json[kShape] = AnfAlgo::GetInputDeviceShape(tmp_input.first, tmp_input.second.first); | |||||
| inputs_json.emplace_back(std::vector<nlohmann::json>{input_desc_json}); | |||||
| } | |||||
| return inputs_json; | |||||
| } | |||||
| nlohmann::json GetOutputsJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list, | |||||
| const std::vector<AnfNodePtr> &output_list, const nlohmann::json &inputs_json, | |||||
| std::map<AnfNodePtr, nlohmann::json> *node_json_map) { | |||||
| nlohmann::json outputs_json; | |||||
| auto output_index = GetOutputIndex(anf_nodes, input_list, output_list); | |||||
| for (size_t i = 0; i < output_index.size(); ++i) { | |||||
| auto tmp_output = output_index[i]; | |||||
| bool found = false; | |||||
| nlohmann::json output_desc_json; | |||||
| for (size_t input_i = 0; input_i < input_list.size(); ++input_i) { | |||||
| if (tmp_output.first == input_list[input_i]) { | |||||
| output_desc_json = inputs_json[input_i][0]; | |||||
| found = true; | |||||
| break; | |||||
| } | |||||
| } | |||||
| if (!found) { | |||||
| auto type_id = AnfAlgo::GetOutputDeviceDataType(tmp_output.first, tmp_output.second); | |||||
| std::string dtype = TypeId2String(type_id); | |||||
| output_desc_json[kTensorName] = | |||||
| GetTensorName((*node_json_map)[tmp_output.first], kOutputDesc, std::make_pair(0, tmp_output.second)); | |||||
| output_desc_json[kDataType] = dtype; | |||||
| auto output_shape = AnfAlgo::GetOutputDeviceShape(tmp_output.first, tmp_output.second); | |||||
| if (output_shape.empty()) { | |||||
| output_shape.push_back(1); | |||||
| } | |||||
| output_desc_json[kShape] = output_shape; | |||||
| } | |||||
| outputs_json.emplace_back(output_desc_json); | |||||
| } | |||||
| return outputs_json; | |||||
| } | |||||
| std::pair<std::vector<std::string>, std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>>> PreProcessJsonForBuild( | |||||
| const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args) { | |||||
| // Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess. | |||||
| std::vector<std::string> jsons; | |||||
| std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> repeat_nodes; | |||||
| std::unordered_set<std::string> json_name_set; | |||||
| for (const auto &[builder, anf_node] : build_args) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| auto json_name = builder.json_name(); | |||||
| MS_LOG(DEBUG) << "Akg start compile op: " << json_name; | |||||
| auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); | |||||
| if (cached_kernel_pack != nullptr) { | |||||
| MS_LOG(DEBUG) << "Use cached kernel, json_name_[" << json_name << "], fullname_with_scope[" | |||||
| << anf_node->fullname_with_scope() << "]."; | |||||
| auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(cached_kernel_pack); | |||||
| kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); | |||||
| kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); | |||||
| AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); | |||||
| continue; | |||||
| } | |||||
| if (json_name_set.count(json_name) != 0) { | |||||
| repeat_nodes.push_back({builder, anf_node}); | |||||
| continue; | |||||
| } | |||||
| json_name_set.insert(json_name); | |||||
| auto node_json = builder.kernel_json(); | |||||
| kernel::SaveJsonInfo(json_name, node_json); | |||||
| jsons.push_back(node_json); | |||||
| } | |||||
| return std::make_pair(jsons, repeat_nodes); | |||||
| } | |||||
| bool PostProcessAfterCompile(const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args, | |||||
| const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &repeat_nodes) { | |||||
| for (const auto &[builder, anf_node] : build_args) { | |||||
| auto json_name = builder.json_name(); | |||||
| auto new_kernel_pack = tbe::TbeUtils::InsertCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); | |||||
| if (new_kernel_pack == nullptr) { | |||||
| MS_LOG(ERROR) << "Insert to cache failed, json_name_[" << json_name << "], fullname_with_scope[" | |||||
| << anf_node->fullname_with_scope() << "]."; | |||||
| return false; | |||||
| } | |||||
| auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(new_kernel_pack); | |||||
| kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); | |||||
| kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); | |||||
| AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); | |||||
| MS_LOG(DEBUG) << "Akg compile " << json_name << " kernel and insert cache successfully!"; | |||||
| } | |||||
| for (const auto &[builder, anf_node] : repeat_nodes) { | |||||
| auto node_json = builder.kernel_json(); | |||||
| auto json_name = builder.json_name(); | |||||
| auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); | |||||
| if (cached_kernel_pack == nullptr) { | |||||
| return false; | |||||
| } | |||||
| MS_LOG(INFO) << "Use just compiled kernel, json_name_[" << json_name << "], fullname_with_scope[" | |||||
| << anf_node->fullname_with_scope() << "]."; | |||||
| auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(cached_kernel_pack); | |||||
| kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); | |||||
| kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); | |||||
| AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace | |||||
| bool AkgAscendKernelBuilder::CollectJson(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||||
| MS_LOG(INFO) << "AKG start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; | |||||
| auto it = kAkgKernelAttrsProcessMap.find(op_name); | |||||
| if (it != kAkgKernelAttrsProcessMap.end()) { | |||||
| it->second(anf_node); | |||||
| } | |||||
| MS_LOG(INFO) << "Akg start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; | |||||
| nlohmann::json node_json; | |||||
| if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { | |||||
| MS_LOG(ERROR) << "Op[" << op_name << "] create single kernel json failed."; | |||||
| } | |||||
| kernel_json_ = node_json.dump(); | |||||
| if (!GetIOSize(node_json, &input_size_list_, &output_size_list_)) { | |||||
| MS_LOG(ERROR) << "Cal mem size failed."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool AkgAscendKernelBuilder::GenJsonAndPreprocess4Fused(const std::vector<AnfNodePtr> &anf_nodes, | |||||
| std::map<AnfNodePtr, nlohmann::json> *node_json_map) { | |||||
| for (auto const &anf_node : anf_nodes) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||||
| if (!AnfAlgo::IsRealKernel(anf_node)) { | |||||
| MS_LOG(ERROR) << "Invalid anf node to build [" << anf_node->fullname_with_scope() << "]."; | |||||
| return false; | |||||
| } | |||||
| auto it = kAkgKernelAttrsProcessMap.find(op_name); | |||||
| if (it != kAkgKernelAttrsProcessMap.end()) { | |||||
| it->second(anf_node); | |||||
| } | |||||
| nlohmann::json node_json; | |||||
| if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { | |||||
| MS_LOG(ERROR) << "Op [" << op_name << "] create single kernel json failed."; | |||||
| return false; | |||||
| } | |||||
| // No need for composite op. | |||||
| node_json.erase("id"); | |||||
| node_json.erase("op"); | |||||
| node_json.erase("composite"); | |||||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||||
| MS_EXCEPTION_IF_NULL(primitive); | |||||
| if (primitive->GetAttr("fusion") != nullptr) { | |||||
| node_json["fusion"] = primitive->GetAttr("fusion")->ToString(); | |||||
| } | |||||
| (*node_json_map)[anf_node] = node_json; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector<AnfNodePtr> &anf_nodes, | |||||
| const std::vector<AnfNodePtr> &input_list, | |||||
| const std::vector<AnfNodePtr> &output_list) { | |||||
| if (anf_nodes.empty() || input_list.empty()) { | |||||
| MS_LOG(ERROR) << "Invalid input size, anf_nodes [" << anf_nodes.size() << "], input_list [" << input_list.size() | |||||
| << "]."; | |||||
| return false; | |||||
| } | |||||
| MS_LOG(INFO) << "anf_nodes [" << output_list.size() << "], input_list [" << anf_nodes.size() << "], output_list [" | |||||
| << input_list.size() << "]."; | |||||
| std::map<AnfNodePtr, nlohmann::json> node_json_map; | |||||
| if (!GenJsonAndPreprocess4Fused(anf_nodes, &node_json_map)) { | |||||
| return false; | |||||
| } | |||||
| UpdateTensorNameInJson(anf_nodes, &node_json_map); | |||||
| nlohmann::json fused_node_json; | |||||
| std::vector<nlohmann::json> node_json_desc; | |||||
| std::transform(anf_nodes.begin(), anf_nodes.end(), std::back_inserter(node_json_desc), | |||||
| [&node_json_map](const AnfNodePtr &anf_node) { return node_json_map[anf_node]; }); | |||||
| fused_node_json[kOpDesc] = node_json_desc; | |||||
| fused_node_json[kInputDesc] = GetInputsJson(anf_nodes, input_list, &node_json_map); | |||||
| fused_node_json[kOutputDesc] = | |||||
| GetOutputsJson(anf_nodes, input_list, output_list, fused_node_json[kInputDesc], &node_json_map); | |||||
| size_t hash_id = std::hash<std::string>()(fused_node_json.dump()); | |||||
| json_name_ = "Fused_"; | |||||
| auto fg = anf_nodes[0]->func_graph(); | |||||
| MS_EXCEPTION_IF_NULL(fg); | |||||
| auto attr_val = fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); | |||||
| if (attr_val != nullptr) { | |||||
| auto fg_attr = GetValue<std::string>(attr_val); | |||||
| (void)json_name_.append(fg_attr).append("_"); | |||||
| } | |||||
| (void)json_name_.append(std::to_string(hash_id)); | |||||
| fused_node_json["composite_graph"] = fg->ToString(); | |||||
| fused_node_json["op"] = json_name_; | |||||
| fused_node_json["platform"] = "AKG"; | |||||
| fused_node_json["process"] = "aicore"; | |||||
| fused_node_json["composite"] = true; | |||||
| kernel_json_ = fused_node_json.dump(); | |||||
| if (!GetIOSize(fused_node_json, &input_size_list_, &output_size_list_)) { | |||||
| MS_LOG(ERROR) << "Cal mem size failed."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| void GenParallelCompileFuncArgs(const std::vector<std::string> &kernel_jsons, PyObject **p_args) { | |||||
| MS_EXCEPTION_IF_NULL(p_args); | |||||
| *p_args = PyTuple_New(PARALLEL_ARGS_SIZE); | |||||
| PyObject *arg1 = PyList_New(kernel_jsons.size()); | |||||
| for (int i = 0; i < PyList_Size(arg1); ++i) { | |||||
| PyList_SetItem(arg1, i, Py_BuildValue("s", kernel_jsons[i].c_str())); | |||||
| } | |||||
| PyObject *arg2 = Py_BuildValue("i", PROCESS_NUM); | |||||
| PyObject *arg3 = Py_BuildValue("i", TIME_OUT); | |||||
| (void)PyTuple_SetItem(*p_args, 0, arg1); | |||||
| (void)PyTuple_SetItem(*p_args, 1, arg2); | |||||
| (void)PyTuple_SetItem(*p_args, 2, arg3); | |||||
| } | |||||
| bool AkgOpParallelBuild(const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args) { | |||||
| auto [jsons, repeat_nodes] = PreProcessJsonForBuild(build_args); | |||||
| if (jsons.empty()) { | |||||
| return true; | |||||
| } | |||||
| // Try to call python method to compile nodes parallely. | |||||
| PyObject *p_module = nullptr; | |||||
| PyObject *p_func = nullptr; | |||||
| PyObject *p_arg = nullptr; | |||||
| PyObject *p_res = nullptr; | |||||
| p_module = PyImport_ImportModule(kMultiProcModule); | |||||
| if (p_module == nullptr) { | |||||
| MS_LOG(ERROR) << "Failed to import [" << kMultiProcModule << "]."; | |||||
| return false; | |||||
| } | |||||
| p_func = PyObject_GetAttrString(p_module, kCompileAkgKernelParallelFunc); | |||||
| GenParallelCompileFuncArgs(jsons, &p_arg); | |||||
| MS_LOG(DEBUG) << "Call function [" << kCompileAkgKernelParallelFunc << "], try to compile " << jsons.size() | |||||
| << " Akg kernels parallelly."; | |||||
| p_res = PyEval_CallObject(p_func, p_arg); | |||||
| if (p_res == nullptr) { | |||||
| PyErr_Print(); | |||||
| MS_LOG(ERROR) << "No ret got, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n(" | |||||
| << AkgKernelBuild::PyObjectToStr(p_arg) << ")."; | |||||
| return false; | |||||
| } | |||||
| if (PyObject_IsTrue(p_res) != 1) { | |||||
| PyErr_Print(); | |||||
| MS_LOG(ERROR) << "Illegal ret, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n(" | |||||
| << AkgKernelBuild::PyObjectToStr(p_arg) << ")."; | |||||
| return false; | |||||
| } | |||||
| if (!PostProcessAfterCompile(build_args, repeat_nodes)) { | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool AkgAscendKernelParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | |||||
| std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> json_and_node; | |||||
| for (const auto &anf_node : anf_nodes) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| AkgAscendKernelBuilder akg_cce_kernel_builder; | |||||
| KernelPackPtr kernel_pack = nullptr; | |||||
| auto cnode = anf_node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| if (AnfAlgo::IsGraphKernel(cnode)) { | |||||
| auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(cnode); | |||||
| auto mng = func_graph->manager(); | |||||
| if (mng == nullptr) { | |||||
| mng = Manage(func_graph, true); | |||||
| func_graph->set_manager(mng); | |||||
| } | |||||
| MS_EXCEPTION_IF_NULL(func_graph); | |||||
| std::vector<AnfNodePtr> node_list; | |||||
| std::vector<AnfNodePtr> input_list; | |||||
| std::vector<AnfNodePtr> output_list; | |||||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||||
| MS_LOG(INFO) << "Akg start compile composite op[" << op_name << "]"; | |||||
| GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); | |||||
| if (!akg_cce_kernel_builder.CollectFusedJson(node_list, input_list, output_list)) { | |||||
| MS_EXCEPTION(UnknownError) << "Akg build failed composite op[" << op_name << "]."; | |||||
| } | |||||
| } else { | |||||
| if (!akg_cce_kernel_builder.CollectJson(anf_node)) { | |||||
| MS_EXCEPTION(UnknownError) << "Akg build failed op[" << AnfAlgo::GetCNodeName(anf_node) << "]."; | |||||
| } | |||||
| } | |||||
| json_and_node.push_back({akg_cce_kernel_builder, anf_node}); | |||||
| } | |||||
| if (json_and_node.empty()) { | |||||
| MS_LOG(DEBUG) << "There is no kernel needed to be compiled."; | |||||
| return true; | |||||
| } | |||||
| return AkgOpParallelBuild(json_and_node); | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,56 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ | |||||
| #include <string> | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include <map> | |||||
| #include "ir/anf.h" | |||||
| #include "backend/kernel_compiler/kernel.h" | |||||
| #include "backend/kernel_compiler/akg/akg_kernel_build.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class AkgAscendKernelBuilder : public AkgKernelBuild { | |||||
| public: | |||||
| AkgAscendKernelBuilder() = default; | |||||
| ~AkgAscendKernelBuilder() = default; | |||||
| bool CollectJson(const AnfNodePtr &anf_node); | |||||
| bool CollectFusedJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list, | |||||
| const std::vector<AnfNodePtr> &output_list); | |||||
| std::string json_name() const { return json_name_; } | |||||
| std::string kernel_json() const { return kernel_json_; } | |||||
| const std::vector<size_t> &input_size_list() const { return input_size_list_; } | |||||
| const std::vector<size_t> &output_size_list() const { return output_size_list_; } | |||||
| private: | |||||
| bool GenJsonAndPreprocess4Fused(const std::vector<AnfNodePtr> &anf_nodes, | |||||
| std::map<AnfNodePtr, nlohmann::json> *node_json_map); | |||||
| std::string kernel_json_; | |||||
| std::vector<size_t> input_size_list_; | |||||
| std::vector<size_t> output_size_list_; | |||||
| }; | |||||
| bool AkgAscendKernelParallelBuild(const std::vector<AnfNodePtr> &anf_nodes); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ | |||||
| @@ -0,0 +1,132 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h" | |||||
| #include <algorithm> | |||||
| #include <fstream> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <mutex> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include "nlohmann/json.hpp" | |||||
| #include "runtime/rt.h" | |||||
| #include "utils/log_adapter.h" | |||||
| #include "utils/convert_utils.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| using std::fstream; | |||||
| using std::map; | |||||
| using std::mutex; | |||||
| using std::string; | |||||
| using TbeTaskInfoPtr = std::shared_ptr<ge::model_runner::TbeTaskInfo>; | |||||
| using tbe::KernelManager; | |||||
| constexpr uint32_t DEFAULT_BLOCK_DIM = 1; | |||||
| /** | |||||
| * @brief infotable contain func_stub\blockdim\kernel file buffer | |||||
| */ | |||||
| AkgKernelMod::AkgKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {} | |||||
| void AkgKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; } | |||||
| void AkgKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; } | |||||
| void AkgKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; } | |||||
| const std::vector<size_t> &AkgKernelMod::GetInputSizeList() const { return input_size_list_; } | |||||
| const std::vector<size_t> &AkgKernelMod::GetOutputSizeList() const { return output_size_list_; } | |||||
| const std::vector<size_t> &AkgKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } | |||||
| bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) { | |||||
| if (stream_ptr == nullptr) { | |||||
| MS_LOG(ERROR) << "stream_ptr should not be nullptr."; | |||||
| return false; | |||||
| } | |||||
| if (kernel_pack_ == nullptr) { | |||||
| MS_LOG(ERROR) << "kernel pack should not be nullptr."; | |||||
| return false; | |||||
| } | |||||
| uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1. | |||||
| auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); | |||||
| if (func_stub == 0) { | |||||
| MS_LOG(ERROR) << "GenFuncStub failed."; | |||||
| return false; | |||||
| } | |||||
| // pack all addresses into a vector. | |||||
| std::vector<void *> runtime_args; | |||||
| (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtime_args), | |||||
| [](const AddressPtr &input) -> void * { return input->addr; }); | |||||
| (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtime_args), | |||||
| [](const AddressPtr &output) -> void * { return output->addr; }); | |||||
| rtL2Ctrl_t *l2ctrl = nullptr; | |||||
| auto stream = reinterpret_cast<rtStream_t *>(stream_ptr); | |||||
| if (RT_ERROR_NONE != rtKernelLaunch(reinterpret_cast<void *>(func_stub), block_dim, runtime_args.data(), | |||||
| SizeToUint(sizeof(void *) * runtime_args.size()), l2ctrl, stream)) { | |||||
| MS_LOG(ERROR) << "Call runtime rtKernelLaunch error."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||||
| const std::vector<AddressPtr> &outputs, uint32_t stream_id) { | |||||
| if (kernel_pack_ == nullptr) { | |||||
| MS_LOG(EXCEPTION) << "kernel pack should not be nullptr."; | |||||
| } | |||||
| std::vector<uint8_t> args; | |||||
| const uint32_t args_size = 0; | |||||
| std::vector<uint8_t> sm_desc; | |||||
| void *binary = nullptr; | |||||
| const uint32_t binary_size = 0; | |||||
| std::vector<uint8_t> meta_data; | |||||
| std::vector<void *> input_data_addrs; | |||||
| std::vector<void *> output_data_addrs; | |||||
| std::vector<void *> workspace_addrs; | |||||
| // pack all addresses into a vector. | |||||
| (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs), | |||||
| [](const AddressPtr &input) -> void * { return input->addr; }); | |||||
| (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs), | |||||
| [](const AddressPtr &output) -> void * { return output->addr; }); | |||||
| uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1. | |||||
| auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); | |||||
| if (func_stub == 0) { | |||||
| MS_LOG(EXCEPTION) << "GenFuncStub failed."; | |||||
| } | |||||
| std::string stub_func = KernelManager::GetStubFuncName(kernel_pack_); | |||||
| MS_LOG(DEBUG) << "The block_dim is:" << block_dim; | |||||
| TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>( | |||||
| kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, | |||||
| input_data_addrs, output_data_addrs, workspace_addrs, NeedDump()); | |||||
| return {task_info_ptr}; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,54 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/ascend_kernel_mod.h" | |||||
| #include "backend/kernel_compiler/tbe/tbe_utils.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class AkgKernelMod : public AscendKernelMod { | |||||
| public: | |||||
| explicit AkgKernelMod(const KernelPackPtr &kernel_pack); | |||||
| ~AkgKernelMod() final {} | |||||
| void SetInputSizeList(const std::vector<size_t> &size_list); | |||||
| void SetOutputSizeList(const std::vector<size_t> &size_list); | |||||
| void SetWorkspaceSizeList(const std::vector<size_t> &size_list); | |||||
| const std::vector<size_t> &GetInputSizeList() const override; | |||||
| const std::vector<size_t> &GetOutputSizeList() const override; | |||||
| const std::vector<size_t> &GetWorkspaceSizeList() const override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override; | |||||
| std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs, uint32_t stream_id) override; | |||||
| private: | |||||
| KernelPackPtr kernel_pack_; | |||||
| std::vector<size_t> input_size_list_; | |||||
| std::vector<size_t> output_size_list_; | |||||
| std::vector<size_t> workspace_size_list_; | |||||
| }; | |||||
| using AkgKernelModPtr = std::shared_ptr<AkgKernelMod>; | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ | |||||
| @@ -0,0 +1,43 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h" | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/kernel.h" | |||||
| #include "backend/kernel_compiler/akg/akg_kernel_build.h" | |||||
| #include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h" | |||||
| #include "common/utils.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| KernelModPtr AkgGpuKernelBuild(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| AkgKernelBuild akg_kernel_build; | |||||
| std::vector<size_t> input_size_list; | |||||
| std::vector<size_t> output_size_list; | |||||
| KernelPackPtr kernel_pack = akg_kernel_build.BuildByJson(anf_node, &input_size_list, &output_size_list); | |||||
| MS_EXCEPTION_IF_NULL(kernel_pack); | |||||
| auto kernel_mod_ptr = std::make_shared<GpuKernelMod>(kernel_pack); | |||||
| MS_EXCEPTION_IF_NULL(kernel_mod_ptr); | |||||
| kernel_mod_ptr->SetInputSizeList(input_size_list); | |||||
| kernel_mod_ptr->SetOutputSizeList(output_size_list); | |||||
| return kernel_mod_ptr; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,28 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_ | |||||
| #include "backend/kernel_compiler/kernel.h" | |||||
| #include "base/base.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| KernelModPtr AkgGpuKernelBuild(const AnfNodePtr &anf_node); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_ | |||||
| @@ -0,0 +1,116 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h" | |||||
| #include <fstream> | |||||
| #include <algorithm> | |||||
| #include "nlohmann/json.hpp" | |||||
| #include "common/utils.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| using std::fstream; | |||||
| using std::string; | |||||
| using std::vector; | |||||
| GpuKernelManagerPtr GpuKernelMod::kernelmanager_ = std::make_shared<GpuKernelManager>(); | |||||
| GpuKernelManager::GpuKernelManager() {} | |||||
| CUresult GpuKernelManager::GetFunction(const KernelPackPtr &kernel_pack, bool force_reload, | |||||
| vector<uint32_t> *thread_info, CUfunction *func) { | |||||
| if (kernel_pack->GetJson() == nullptr || kernel_pack->GetJson()->contents == nullptr || | |||||
| kernel_pack->GetKernel() == nullptr || kernel_pack->GetKernel()->contents == nullptr) { | |||||
| MS_LOG(ERROR) << "GPU:Invalid kernel pack, json or kernel is nullptr."; | |||||
| return CUDA_ERROR_INVALID_IMAGE; | |||||
| } | |||||
| auto js = nlohmann::json::parse(kernel_pack->GetJson()->contents, | |||||
| kernel_pack->GetJson()->contents + kernel_pack->GetJson()->len); | |||||
| string fn = js["kernelName"]; | |||||
| if (!force_reload) { | |||||
| auto iter = infotable_.find(fn); | |||||
| if (iter != infotable_.end()) { | |||||
| auto kernelmeta = iter->second; | |||||
| *thread_info = kernelmeta->thread_info_; | |||||
| *func = kernelmeta->func_addr_; | |||||
| return CUDA_SUCCESS; | |||||
| } | |||||
| } | |||||
| thread_info->emplace_back(js["blockIdx.x"]); | |||||
| thread_info->emplace_back(js["blockIdx.y"]); | |||||
| thread_info->emplace_back(js["blockIdx.z"]); | |||||
| thread_info->emplace_back(js["threadIdx.x"]); | |||||
| thread_info->emplace_back(js["threadIdx.y"]); | |||||
| thread_info->emplace_back(js["threadIdx.z"]); | |||||
| CUmodule module; | |||||
| CUresult result = cuModuleLoadData(&module, kernel_pack->GetKernel()->contents); | |||||
| if (result != CUDA_SUCCESS) { | |||||
| MS_LOG(ERROR) << "cuModuleLoadData failed."; | |||||
| return result; | |||||
| } | |||||
| result = cuModuleGetFunction(func, module, fn.c_str()); | |||||
| if (result != CUDA_SUCCESS) { | |||||
| MS_LOG(ERROR) << "cuModuleGetFunction failed."; | |||||
| return result; | |||||
| } | |||||
| infotable_[fn] = std::make_shared<GpuKernelMeta>(*func, module, *thread_info); | |||||
| return result; | |||||
| } | |||||
| GpuKernelMod::GpuKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {} | |||||
| void GpuKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; } | |||||
| void GpuKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; } | |||||
| const std::vector<size_t> &GpuKernelMod::GetInputSizeList() const { return input_size_list_; } | |||||
| const std::vector<size_t> &GpuKernelMod::GetOutputSizeList() const { return output_size_list_; } | |||||
| const std::vector<size_t> &GpuKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } | |||||
| bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) { | |||||
| if (stream_ptr == 0) { | |||||
| MS_LOG(ERROR) << "stream_ptr should not be nullptr."; | |||||
| return false; | |||||
| } | |||||
| if (kernel_pack_ == nullptr) { | |||||
| MS_LOG(ERROR) << "kernel pack should not be nullptr."; | |||||
| return false; | |||||
| } | |||||
| vector<uint32_t> thread_info; | |||||
| CUfunction kernel_addr; | |||||
| CUresult result = kernelmanager_->GetFunction(kernel_pack_, false, &thread_info, &kernel_addr); | |||||
| if (result != CUDA_SUCCESS) { | |||||
| MS_LOG(ERROR) << "GetFunction failed."; | |||||
| return false; | |||||
| } | |||||
| std::vector<void *> runtimeargs; | |||||
| (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtimeargs), | |||||
| [](const AddressPtr &input) -> void * { return reinterpret_cast<void *>(&(input->addr)); }); | |||||
| (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs), | |||||
| [](const AddressPtr &output) -> void * { return reinterpret_cast<void *>(&(output->addr)); }); | |||||
| result = cuLaunchKernel(kernel_addr, thread_info[0], thread_info[1], thread_info[2], thread_info[3], thread_info[4], | |||||
| thread_info[5], 0, reinterpret_cast<CUstream>(stream_ptr), | |||||
| reinterpret_cast<void **>(&runtimeargs[0]), 0); | |||||
| if (result != CUDA_SUCCESS) { | |||||
| MS_LOG(ERROR) << "Launch Kernel failed."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,82 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_ | |||||
| #include <cuda.h> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <unordered_map> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| struct GpuKernelMeta { | |||||
| CUfunction func_addr_; | |||||
| CUmodule module_; | |||||
| std::vector<uint32_t> thread_info_; | |||||
| GpuKernelMeta(CUfunction funcAddr, CUmodule module, const std::vector<uint32_t> &thread_info) | |||||
| : func_addr_(funcAddr), module_(module), thread_info_(thread_info) {} | |||||
| }; | |||||
| using GpuKernelMetaPtr = std::shared_ptr<GpuKernelMeta>; | |||||
| class GpuKernelManager { | |||||
| public: | |||||
| GpuKernelManager(); | |||||
| virtual ~GpuKernelManager() { | |||||
| for (auto iter = infotable_.begin(); iter != infotable_.end(); ++iter) { | |||||
| CUresult ret = cuModuleUnload(iter->second->module_); | |||||
| if (ret != CUDA_SUCCESS && ret != CUDA_ERROR_DEINITIALIZED) { | |||||
| MS_LOG(ERROR) << "Unload GPU Module failed."; | |||||
| } | |||||
| } | |||||
| } | |||||
| CUresult GetFunction(const KernelPackPtr &kernel_pack, bool force_reload, std::vector<uint32_t> *thread_info, | |||||
| CUfunction *func); | |||||
| private: | |||||
| std::unordered_map<std::string, GpuKernelMetaPtr> infotable_; | |||||
| }; | |||||
| using GpuKernelManagerPtr = std::shared_ptr<GpuKernelManager>; | |||||
| class GpuKernelMod : public KernelMod { | |||||
| public: | |||||
| explicit GpuKernelMod(const KernelPackPtr &kernel_pack); | |||||
| virtual ~GpuKernelMod() {} | |||||
| void SetInputSizeList(const std::vector<size_t> &size_list); | |||||
| void SetOutputSizeList(const std::vector<size_t> &size_list); | |||||
| const std::vector<size_t> &GetInputSizeList() const override; | |||||
| const std::vector<size_t> &GetOutputSizeList() const override; | |||||
| const std::vector<size_t> &GetWorkspaceSizeList() const override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override; | |||||
| static GpuKernelManagerPtr kernelmanager_; | |||||
| private: | |||||
| KernelPackPtr kernel_pack_; | |||||
| std::vector<size_t> input_size_list_; | |||||
| std::vector<size_t> output_size_list_; | |||||
| std::vector<size_t> workspace_size_list_; | |||||
| }; | |||||
| using GpuKernelModPtr = std::shared_ptr<GpuKernelMod>; | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_ | |||||
| @@ -0,0 +1,52 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "framework/ge_runtime/task_info.h" | |||||
| #include "backend/kernel_compiler/kernel.h" | |||||
| #ifdef ENABLE_DATA_DUMP | |||||
| #include "debug/data_dump_parser.h" | |||||
| #endif | |||||
| using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>; | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class AscendKernelMod : public KernelMod { | |||||
| public: | |||||
| virtual std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &, | |||||
| const std::vector<AddressPtr> &, uint32_t) = 0; | |||||
| uint32_t block_dim() { return block_dim_; } | |||||
| uint32_t stream_id() { return stream_id_; } | |||||
| virtual bool NeedDump() { | |||||
| #ifdef ENABLE_DATA_DUMP | |||||
| return DataDumpParser::GetInstance().NeedDump(kernel_name_); | |||||
| #else | |||||
| return false; | |||||
| #endif | |||||
| } | |||||
| protected: | |||||
| uint32_t block_dim_{1}; | |||||
| uint32_t stream_id_{0}; | |||||
| }; | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_ | |||||
| @@ -0,0 +1,145 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_ | |||||
| #include <dirent.h> | |||||
| #include <memory> | |||||
| #include <unordered_map> | |||||
| #include <unordered_set> | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <utility> | |||||
| #include <nlohmann/json.hpp> | |||||
| #include "backend/kernel_compiler/kernel.h" | |||||
| #include "backend/kernel_compiler/oplib/opinfo.h" | |||||
| #include "backend/kernel_compiler/kernel_build_info.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| constexpr auto kCceKernelMeta = "./kernel_meta/"; | |||||
| constexpr auto kGpuKernelMeta = "./cuda_meta"; | |||||
| constexpr auto kProcessorAiCore = "aicore"; | |||||
| constexpr auto kProcessorAiCpu = "aicpu"; | |||||
| constexpr auto kProcessorCuda = "cuda"; | |||||
| constexpr auto kJsonSuffix = ".json"; | |||||
| constexpr auto kInfoSuffix = ".info"; | |||||
| constexpr unsigned int AUTODIFF_COMPILE_OVERTIME = 600; | |||||
| constexpr auto kAkgModule = "_akg"; | |||||
| constexpr auto kArgDataformat = "data_format"; | |||||
| const std::vector<std::string> support_devices = {"aicore", "aicpu", "cuda"}; | |||||
| struct KernelMetaInfo { | |||||
| uintptr_t func_stub_; | |||||
| uint32_t block_dim_; | |||||
| }; | |||||
| using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>; | |||||
| class KernelMeta { | |||||
| public: | |||||
| KernelMeta() = default; | |||||
| void Initialize(); | |||||
| void RemoveKernelCache(); | |||||
| std::string Search(const std::string &kernel_name) const; | |||||
| bool Insert(const std::string &kernel_name, const std::string &kernel_json); | |||||
| std::string GetKernelMetaPath() { return kernel_meta_path_; } | |||||
| static KernelMeta *GetInstance() { | |||||
| static KernelMeta kernel_meta; | |||||
| return &kernel_meta; | |||||
| } | |||||
| ~KernelMeta() = default; | |||||
| private: | |||||
| bool initialized_ = false; | |||||
| std::string kernel_meta_path_; | |||||
| std::unordered_map<std::string, std::string> kernel_meta_map_; | |||||
| }; | |||||
| struct SparseGradient { | |||||
| float *value_; | |||||
| int *indices_; | |||||
| size_t indices_size_; | |||||
| }; | |||||
| struct MultiThreadComputeParams { | |||||
| float *var_; | |||||
| float *accum_; | |||||
| float *linear_; | |||||
| float *m_; | |||||
| float *m_t_; | |||||
| float *v_; | |||||
| float lr_; | |||||
| float l1_; | |||||
| float l2_; | |||||
| float lr_power_; | |||||
| float beta1_; | |||||
| float beta2_; | |||||
| float epsilon_; | |||||
| SparseGradient sparse_grad_; | |||||
| size_t var_first_dim_size_; | |||||
| size_t var_outer_dim_size_; | |||||
| bool use_nesterov_; | |||||
| }; | |||||
| using MultiThreadComputeFunc = std::function<void(MultiThreadComputeParams *param, size_t start, size_t end)>; | |||||
| bool CheckCache(const std::string &kernel_name); | |||||
| KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor); | |||||
| KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor); | |||||
| TypeId DtypeToTypeId(const std::string &dtypes); | |||||
| std::string Dtype2ShortType(const std::string &dtypes); | |||||
| std::string TypeId2String(TypeId type_id); | |||||
| size_t GetDtypeNbyte(const std::string &dtypes); | |||||
| bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpInfo> &op_info_ptr, Processor processor, | |||||
| std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list); | |||||
| void SaveJsonInfo(const std::string &json_name, const std::string &info); | |||||
| std::string GetProcessor(const AnfNodePtr &anf_node); | |||||
| bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b); | |||||
| int Sign(float x); | |||||
| void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim, | |||||
| size_t outer_dim); | |||||
| void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim, | |||||
| size_t outer_dim, bool use_multi_threads = true); | |||||
| std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index); | |||||
| std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list, | |||||
| const std::vector<AnfNodePtr> &input_list); | |||||
| std::vector<std::pair<AnfNodePtr, size_t>> GetOutputIndex(const std::vector<AnfNodePtr> &node_list, | |||||
| const std::vector<AnfNodePtr> &input_list, | |||||
| const std::vector<AnfNodePtr> &output_list); | |||||
| void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list, | |||||
| std::vector<AnfNodePtr> *input_list, std::vector<AnfNodePtr> *output_list); | |||||
| void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list); | |||||
| bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json); | |||||
| void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list); | |||||
| bool IsWeightBoundary(const AnfNodePtr &node); | |||||
| void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, | |||||
| size_t total_compute_size); | |||||
| void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, | |||||
| size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices, | |||||
| std::vector<size_t> *slice_positions); | |||||
| void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads, | |||||
| SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim, | |||||
| size_t outer_dim); | |||||
| void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad, | |||||
| SparseGradient *unique_grad, size_t first_dim, size_t outer_dim); | |||||
| std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_ | |||||
| @@ -0,0 +1,65 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/addn_cpu_kernel.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void AddNCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| CheckParam(kernel_node); | |||||
| input_num_ = AnfAlgo::GetInputTensorNum(kernel_node); | |||||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||||
| CPUKernelUtils::ExpandDimsTo4(&output_shape_); | |||||
| } | |||||
| bool AddNCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| size_t offset = 0; | |||||
| for (size_t i = 0; i < output_shape_[0]; ++i) { | |||||
| for (size_t j = 0; j < output_shape_[1]; ++j) { | |||||
| for (size_t k = 0; k < output_shape_[2]; ++k) { | |||||
| for (size_t m = 0; m < output_shape_[3]; ++m) { | |||||
| float sum = 0; | |||||
| for (size_t index = 0; index < input_num_; ++index) { | |||||
| auto input_addr = reinterpret_cast<float *>(inputs[index]->addr); | |||||
| sum += input_addr[offset]; | |||||
| } | |||||
| output_addr[offset++] = sum; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| void AddNCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||||
| if (input_shape.size() > 4) { | |||||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but AddNCPUKernel olny support 4d or lower."; | |||||
| } | |||||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||||
| if (output_num != 1) { | |||||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but AddNCPUKernel needs 1 output."; | |||||
| } | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class AddNCPUKernel : public CPUKernel { | |||||
| public: | |||||
| AddNCPUKernel() : input_num_(0) {} | |||||
| ~AddNCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| void CheckParam(const CNodePtr &kernel_node); | |||||
| size_t input_num_; | |||||
| std::vector<size_t> output_shape_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(AddN, | |||||
| KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| AddNCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,53 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/allgather_cpu_kernel.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| #include "runtime/device/cpu/mpi/mpi_adapter.h" | |||||
| #include "utils/log_adapter.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| namespace { | |||||
| constexpr auto kRanksGroup = "group"; | |||||
| constexpr auto kAllGatherInputNum = 1; | |||||
| } // namespace | |||||
| void AllGatherCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||||
| if (input_num != kAllGatherInputNum) { | |||||
| MS_LOG(EXCEPTION) << "allgather input num:" << input_num; | |||||
| } | |||||
| auto ranks_group = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kRanksGroup); | |||||
| if (ranks_group != nullptr) { | |||||
| ranks_group_ = GetValue<std::vector<int>>(ranks_group); | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "Miss attribute " << kRanksGroup; | |||||
| } | |||||
| } | |||||
| bool AllGatherCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| auto input_data_num = inputs[0]->size / sizeof(float); | |||||
| auto mpi_instance = device::cpu::MPIAdapter::Instance(); | |||||
| MS_EXCEPTION_IF_NULL(mpi_instance); | |||||
| return mpi_instance->AllGather(input_addr, output_addr, ranks_group_, input_data_num); | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,44 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class AllGatherCPUKernel : public CPUKernel { | |||||
| public: | |||||
| AllGatherCPUKernel() = default; | |||||
| ~AllGatherCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| std::vector<int> ranks_group_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(_HostAllGather, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| AllGatherCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,47 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| #include "common/utils.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void ApplyMomentumCPUKernel::InitKernel(const CNodePtr & /*kernel_node*/) {} | |||||
| bool ApplyMomentumCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> & /*outputs*/) { | |||||
| if (inputs.size() < 5) { | |||||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||||
| } | |||||
| if (inputs[0]->size != inputs[1]->size || inputs[0]->size != inputs[3]->size) { | |||||
| MS_LOG(EXCEPTION) << "error input data size!"; | |||||
| } | |||||
| auto weight = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| auto accumulate = reinterpret_cast<float *>(inputs[1]->addr); | |||||
| float learning_rate = reinterpret_cast<float *>(inputs[2]->addr)[0]; | |||||
| auto gradient = reinterpret_cast<float *>(inputs[3]->addr); | |||||
| float moment = reinterpret_cast<float *>(inputs[4]->addr)[0]; | |||||
| size_t elem_num = inputs[0]->size / sizeof(float); | |||||
| for (size_t i = 0; i < elem_num; ++i) { | |||||
| accumulate[i] = accumulate[i] * moment + gradient[i]; | |||||
| weight[i] -= accumulate[i] * learning_rate; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,58 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class ApplyMomentumCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| ApplyMomentumCPUKernel() = default; | |||||
| ~ApplyMomentumCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(ApplyMomentum, | |||||
| KernelAttr() | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32), | |||||
| ApplyMomentumCPUKernel); | |||||
| MS_REG_CPU_KERNEL(ApplyMomentum, | |||||
| KernelAttr() | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32), | |||||
| ApplyMomentumCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,67 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/argmax_cpu_kernel.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void ArgmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| if (shape.size() != 2) { | |||||
| MS_LOG(EXCEPTION) << "argmax kernel dims invalid " << shape.size(); | |||||
| } | |||||
| batch_size_ = shape[0]; | |||||
| class_num_ = shape[1]; | |||||
| int axis = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS); | |||||
| if (axis != -1 && axis != 1) { | |||||
| MS_LOG(EXCEPTION) << "argmax kernel not support axis " << axis; | |||||
| } | |||||
| } | |||||
| bool ArgmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspaces*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.empty() || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "input or output empty!"; | |||||
| } | |||||
| size_t batch_float_size = batch_size_ * sizeof(float); | |||||
| size_t batch_class_float_size = class_num_ * batch_float_size; | |||||
| if (inputs[0]->size != batch_class_float_size || outputs[0]->size != batch_float_size) { | |||||
| MS_LOG(EXCEPTION) << "invalid input or output data size!"; | |||||
| } | |||||
| auto input = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| auto output = reinterpret_cast<int *>(outputs[0]->addr); | |||||
| size_t row_start = 0; | |||||
| for (size_t i = 0; i < batch_size_; ++i) { | |||||
| size_t max_index = 0; | |||||
| float max_value = input[row_start]; | |||||
| for (size_t j = 1; j < class_num_; ++j) { | |||||
| size_t index = row_start + j; | |||||
| if (input[index] > max_value) { | |||||
| max_value = input[index]; | |||||
| max_index = j; | |||||
| } | |||||
| } | |||||
| output[i] = SizeToInt(max_index); | |||||
| row_start += class_num_; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,45 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class ArgmaxCPUKernel : public CPUKernel { | |||||
| public: | |||||
| ArgmaxCPUKernel() = default; | |||||
| ~ArgmaxCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| size_t class_num_{0}; | |||||
| size_t batch_size_{0}; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32), | |||||
| ArgmaxCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,82 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/bias_add_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void BiasAddCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| bias_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||||
| if (input_shape_.size() == 4) { | |||||
| data_shape_ = 4; | |||||
| } else if (input_shape_.size() == 2) { | |||||
| data_shape_ = 2; | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "bias add input data format should be NCHW or NC"; | |||||
| } | |||||
| if (input_shape_.size() != 2 && input_shape_.size() != 4) { | |||||
| MS_LOG(EXCEPTION) << "bias add input shape nchw or nc"; | |||||
| } | |||||
| if (bias_shape_.size() != 1) { | |||||
| MS_LOG(EXCEPTION) << "bias shape invalid"; | |||||
| } | |||||
| if (input_shape_[1] != bias_shape_[0]) { | |||||
| MS_LOG(EXCEPTION) << "bias shape not match"; | |||||
| } | |||||
| } | |||||
| bool BiasAddCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/, | |||||
| const std::vector<AddressPtr> &outputs) { | |||||
| if (inputs.size() != 2 || outputs.size() != 1) { | |||||
| MS_LOG(EXCEPTION) << "inputs outputs size not supoort"; | |||||
| } | |||||
| auto src_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| auto bias_addr = reinterpret_cast<float *>(inputs[1]->addr); | |||||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| if (data_shape_ == 4) { | |||||
| size_t h_size = input_shape_[3]; | |||||
| size_t c_size = input_shape_[2] * h_size; | |||||
| size_t n_size = input_shape_[1] * c_size; | |||||
| size_t hw_size = input_shape_[2] * input_shape_[3]; | |||||
| size_t n_offset = 0; | |||||
| for (size_t n = 0; n < input_shape_[0]; ++n) { | |||||
| size_t c_offset = 0; | |||||
| for (size_t c = 0; c < input_shape_[1]; ++c) { | |||||
| for (size_t hw = 0; hw < hw_size; ++hw) { | |||||
| size_t offset = n_offset + c_offset + hw; | |||||
| output_addr[offset] = src_addr[offset] + bias_addr[c]; | |||||
| } | |||||
| c_offset += c_size; | |||||
| } | |||||
| n_offset += n_size; | |||||
| } | |||||
| } else { | |||||
| size_t n_offset = 0; | |||||
| for (size_t n = 0; n < input_shape_[0]; ++n) { | |||||
| for (size_t c = 0; c < input_shape_[1]; ++c) { | |||||
| output_addr[n_offset + c] = src_addr[n_offset + c] + bias_addr[c]; | |||||
| } | |||||
| n_offset += input_shape_[1]; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,46 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class BiasAddCPUKernel : public CPUKernel { | |||||
| public: | |||||
| BiasAddCPUKernel() = default; | |||||
| ~BiasAddCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| uint8_t data_shape_{0}; | |||||
| std::vector<size_t> input_shape_; | |||||
| std::vector<size_t> bias_shape_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL( | |||||
| BiasAdd, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| BiasAddCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,68 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void BiasAddGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| if (input_shape_.size() != 4 && input_shape_.size() != 2) { | |||||
| MS_LOG(EXCEPTION) << "input data format not support"; | |||||
| } | |||||
| } | |||||
| bool BiasAddGradCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/, | |||||
| const std::vector<AddressPtr> &outputs) { | |||||
| if (inputs.size() != 1 || outputs.size() != 1) { | |||||
| MS_LOG(EXCEPTION) << "input output size not support"; | |||||
| } | |||||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| if (input_shape_.size() == 4) { | |||||
| size_t h_size = input_shape_[3]; | |||||
| size_t c_size = h_size * input_shape_[2]; | |||||
| size_t n_size = c_size * input_shape_[1]; | |||||
| size_t hw_size = input_shape_[2] * input_shape_[3]; | |||||
| size_t c_offset = 0; | |||||
| for (size_t c = 0; c < input_shape_[1]; ++c) { | |||||
| output_addr[c] = 0; | |||||
| size_t n_offset = 0; | |||||
| for (size_t n = 0; n < input_shape_[0]; ++n) { | |||||
| for (size_t hw = 0; hw < hw_size; ++hw) { | |||||
| size_t offset = c_offset + n_offset + hw; | |||||
| output_addr[c] += input_addr[offset]; | |||||
| } | |||||
| n_offset += n_size; | |||||
| } | |||||
| c_offset += c_size; | |||||
| } | |||||
| } else if (input_shape_.size() == 2) { | |||||
| for (size_t c = 0; c < input_shape_[1]; ++c) { | |||||
| output_addr[c] = 0; | |||||
| size_t n_offset = 0; | |||||
| for (size_t n = 0; n < input_shape_[0]; ++n) { | |||||
| output_addr[c] += input_addr[c + n_offset]; | |||||
| n_offset += input_shape_[1]; | |||||
| } | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,43 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class BiasAddGradCPUKernel : public CPUKernel { | |||||
| public: | |||||
| BiasAddGradCPUKernel() = default; | |||||
| ~BiasAddGradCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| std::vector<size_t> input_shape_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(BiasAddGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| BiasAddGradCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_ | |||||
| @@ -0,0 +1,106 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/concat_cpu_kernel.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void ConcatCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| CheckParam(kernel_node); | |||||
| axis_ = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS); | |||||
| auto input_1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||||
| if (axis_ < 0) { | |||||
| axis_ = axis_ + SizeToInt(input_1_shape.size()); | |||||
| } | |||||
| axis_ += 4 - input_1_shape.size(); | |||||
| auto input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||||
| for (size_t i = 0; i < input_num; i++) { | |||||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); | |||||
| CPUKernelUtils::ExpandDimsTo4(&input_shape); | |||||
| input_shape_list_.push_back(input_shape); | |||||
| } | |||||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||||
| CPUKernelUtils::ExpandDimsTo4(&output_shape_); | |||||
| } | |||||
| bool ConcatCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| auto buff_size = outputs[0]->size; | |||||
| size_t dim0 = output_shape_[0]; | |||||
| size_t dim1 = output_shape_[1]; | |||||
| size_t dim2 = output_shape_[2]; | |||||
| if (axis_ == 3) { | |||||
| for (size_t i = 0; i < dim0; ++i) { | |||||
| for (size_t j = 0; j < dim1; ++j) { | |||||
| for (size_t k = 0; k < dim2; ++k) { | |||||
| CopyDataToOutput(inputs, i, j, k, &output_addr, &buff_size); | |||||
| } | |||||
| } | |||||
| } | |||||
| } else if (axis_ == 2) { | |||||
| for (size_t i = 0; i < dim0; ++i) { | |||||
| for (size_t j = 0; j < dim1; ++j) { | |||||
| CopyDataToOutput(inputs, i, j, 0, &output_addr, &buff_size); | |||||
| } | |||||
| } | |||||
| } else if (axis_ == 1) { | |||||
| for (size_t i = 0; i < dim0; ++i) { | |||||
| CopyDataToOutput(inputs, i, 0, 0, &output_addr, &buff_size); | |||||
| } | |||||
| } else if (axis_ == 0) { | |||||
| CopyDataToOutput(inputs, 0, 0, 0, &output_addr, &buff_size); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| void ConcatCPUKernel::CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, | |||||
| size_t dim2, float **output_addr, size_t *buff_size) { | |||||
| for (size_t i = 0; i < input_shape_list_.size(); ++i) { | |||||
| auto input_i_shape = input_shape_list_[i]; | |||||
| auto input_i_addr = reinterpret_cast<float *>(inputs[i]->addr); | |||||
| size_t num = CPUKernelUtils::GetElementNumOnAxis(input_i_shape, axis_); | |||||
| num *= input_i_shape[axis_]; | |||||
| auto pos = CPUKernelUtils::CalcOffset(input_i_shape, dim0, dim1, dim2, 0); | |||||
| auto ret = memcpy_s(*output_addr, *buff_size, input_i_addr + pos, num * sizeof(float)); | |||||
| if (ret != EOK) { | |||||
| MS_LOG(EXCEPTION) << "memcpy failed."; | |||||
| } | |||||
| *output_addr += num; | |||||
| *buff_size -= num * sizeof(float); | |||||
| } | |||||
| } | |||||
| void ConcatCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||||
| if (input_shape.size() > 4) { | |||||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but ConcatCPUKernel olny support 4d or lower."; | |||||
| } | |||||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||||
| if (output_num != 1) { | |||||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ConcatCPUKernel needs 1 output."; | |||||
| } | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,50 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class ConcatCPUKernel : public CPUKernel { | |||||
| public: | |||||
| ConcatCPUKernel() : axis_(0) {} | |||||
| ~ConcatCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| void CheckParam(const CNodePtr &kernel_node); | |||||
| void CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2, | |||||
| float **output_addr, size_t *buff_size); | |||||
| int axis_; | |||||
| std::vector<std::vector<size_t>> input_shape_list_; | |||||
| std::vector<size_t> output_shape_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(Concat, | |||||
| KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| ConcatCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,80 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void CPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||||
| size_t type_size = sizeof(float); | |||||
| for (size_t input_index = 0; input_index < input_num; ++input_index) { | |||||
| std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, input_index); | |||||
| size_t tensor_size = | |||||
| shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies<size_t>()); | |||||
| input_size_list_.emplace_back(tensor_size); | |||||
| } | |||||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||||
| for (size_t output_index = 0; output_index < output_num; ++output_index) { | |||||
| std::vector<size_t> shape = AnfAlgo::GetOutputDeviceShape(kernel_node, output_index); | |||||
| size_t tensor_size = | |||||
| shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies<size_t>()); | |||||
| output_size_list_.emplace_back(tensor_size); | |||||
| } | |||||
| } | |||||
| void CPUKernel::Init(const CNodePtr &kernel_node) { | |||||
| InitKernel(kernel_node); | |||||
| InitInputOutputSize(kernel_node); | |||||
| } | |||||
| void CPUKernelUtils::ExpandDimsTo4(std::vector<size_t> *shape) { | |||||
| auto len = shape->size(); | |||||
| if (len < 4) { | |||||
| for (size_t i = 0; i < 4 - len; ++i) { | |||||
| shape->insert(shape->begin(), 1); | |||||
| } | |||||
| } | |||||
| } | |||||
| size_t CPUKernelUtils::CalcOffset(const std::vector<size_t> &shape, size_t dim0, size_t dim1, size_t dim2, | |||||
| size_t dim3) { | |||||
| size_t offset = dim0 * shape[1] * shape[2] * shape[3] + dim1 * shape[2] * shape[3] + dim2 * shape[3] + dim3; | |||||
| return offset; | |||||
| } | |||||
| size_t CPUKernelUtils::GetElementNumOnAxis(const std::vector<size_t> &shape, int axis) { | |||||
| if (axis < 0) { | |||||
| axis = axis + SizeToInt(shape.size()); | |||||
| } | |||||
| size_t result = 1; | |||||
| for (int j = 3; j > axis; --j) { | |||||
| result *= shape[j]; | |||||
| } | |||||
| return result; | |||||
| } | |||||
| void CPUKernelUtils::GetElementNumEveryDim(const std::vector<size_t> &shape, std::vector<size_t> *element_num) { | |||||
| size_t accumulation = 1; | |||||
| element_num->emplace_back(1); | |||||
| for (size_t i = shape.size() - 1; i > 0; --i) { | |||||
| accumulation *= shape[i]; | |||||
| element_num->emplace_back(accumulation); | |||||
| } | |||||
| std::reverse(element_num->begin(), element_num->end()); | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,87 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_ | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include <numeric> | |||||
| #include <functional> | |||||
| #include "backend/kernel_compiler/kernel.h" | |||||
| #include "ir/anf.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| using mindspore::kernel::Address; | |||||
| using mindspore::kernel::AddressPtr; | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| const char KSIZE[] = "ksize"; | |||||
| const char STRIDE[] = "stride"; | |||||
| const char STRIDES[] = "strides"; | |||||
| const char DILATION[] = "dilation"; | |||||
| const char PAD[] = "pad"; | |||||
| const char PAD_MODE[] = "pad_mode"; | |||||
| const char PADDING[] = "padding"; | |||||
| const char PAD_MODE_LOWER_SAME[] = "same"; | |||||
| const char PAD_MODE_LOWER_VALID[] = "valid"; | |||||
| const char PAD_MODE_UPPER_SAME[] = "SAME"; | |||||
| const char PAD_MODE_UPPER_VALID[] = "VALID"; | |||||
| const char TRANSPOSE_A[] = "transpose_a"; | |||||
| const char TRANSPOSE_B[] = "transpose_b"; | |||||
| const char IS_GRAD[] = "is_grad"; | |||||
| const char TRANSPOSE_NO = 'N'; | |||||
| const char TRANSPOSE_YES = 'T'; | |||||
| const char AXIS[] = "axis"; | |||||
| const char BEGIN[] = "begin"; | |||||
| const char END[] = "end"; | |||||
| const char SIZE[] = "size"; | |||||
| const char USE_NESTEROV[] = "use_nesterov"; | |||||
| class CPUKernel : public kernel::KernelMod { | |||||
| public: | |||||
| CPUKernel() = default; | |||||
| ~CPUKernel() override = default; | |||||
| virtual void Init(const CNodePtr &kernel_node); | |||||
| virtual void InitKernel(const CNodePtr &kernel_node) = 0; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs, void * /*stream_ptr*/) override { | |||||
| return Launch(inputs, workspace, outputs); | |||||
| }; | |||||
| virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) = 0; | |||||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||||
| protected: | |||||
| virtual void InitInputOutputSize(const CNodePtr &kernel_node); | |||||
| std::vector<size_t> input_size_list_; | |||||
| std::vector<size_t> output_size_list_; | |||||
| std::vector<size_t> workspace_size_list_; | |||||
| }; | |||||
| class CPUKernelUtils { | |||||
| public: | |||||
| static void ExpandDimsTo4(std::vector<size_t> *shape); | |||||
| static size_t CalcOffset(const std::vector<size_t> &shape, size_t dim0, size_t dim1, size_t dim2, size_t dim3); | |||||
| static size_t GetElementNumOnAxis(const std::vector<size_t> &shape, int axis); | |||||
| static void GetElementNumEveryDim(const std::vector<size_t> &shape, std::vector<size_t> *element_num); | |||||
| }; | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,104 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| #include <memory> | |||||
| #include <iostream> | |||||
| #include <string> | |||||
| #include "runtime/device/kernel_info.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| CPUKernelFactory &CPUKernelFactory::GetInstance() { | |||||
| static CPUKernelFactory instance; | |||||
| return instance; | |||||
| } | |||||
| void CPUKernelFactory::Register(const std::string &kernel_name, const KernelAttr &kernel_attr, | |||||
| CPUKernelCreator &&kernel_creator) { | |||||
| (void)name_to_attr_creator_[kernel_name].emplace_back(kernel_attr, kernel_creator); | |||||
| #if !defined(_WIN32) && !defined(_WIN64) | |||||
| MS_LOG(DEBUG) << "CPUKernelFactory register operator: " << kernel_name; | |||||
| #endif | |||||
| } | |||||
| std::shared_ptr<CPUKernel> CPUKernelFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) { | |||||
| auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info()); | |||||
| MS_EXCEPTION_IF_NULL(kernel_info); | |||||
| const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info(); | |||||
| MS_EXCEPTION_IF_NULL(kernel_build_Info); | |||||
| std::pair<bool, size_t> ret_pair = CPUKernelAttrCheck(kernel_name, *kernel_build_Info); | |||||
| if (ret_pair.first) { | |||||
| return (name_to_attr_creator_.find(kernel_name)->second)[ret_pair.second].second(); | |||||
| } | |||||
| return nullptr; | |||||
| } | |||||
| std::pair<bool, size_t> CPUKernelFactory::CPUKernelAttrCheck(const std::string &kernel_name, | |||||
| const KernelBuildInfo &kernel_info) { | |||||
| auto iter = name_to_attr_creator_.find(kernel_name); | |||||
| if (iter == name_to_attr_creator_.end()) { | |||||
| MS_LOG(INFO) << "Not registered CPU kernel: op[" << kernel_name << "]!"; | |||||
| return std::make_pair(false, 0); | |||||
| } | |||||
| auto creators = iter->second; | |||||
| for (size_t index = 0; index < creators.size(); ++index) { | |||||
| auto attr_creator = creators[index]; | |||||
| if (CPUKernelSingleAttrCheck(attr_creator.first, kernel_info)) { | |||||
| return std::make_pair(true, index); | |||||
| } | |||||
| } | |||||
| return std::make_pair(false, 0); | |||||
| } | |||||
| bool CPUKernelFactory::CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info) { | |||||
| for (size_t i = 0; i < kernel_info.GetInputNum(); ++i) { | |||||
| auto dtype = kernel_attr.GetAllSame() ? kernel_attr.GetInputAttr(0).first : kernel_attr.GetInputAttr(i).first; | |||||
| if (kernel_info.GetInputDeviceType(i) != dtype) { | |||||
| MS_LOG(DEBUG) << "input index:" << i << ", kernel info type:" << kernel_info.GetInputDeviceType(i) | |||||
| << ", register type:" << dtype; | |||||
| return false; | |||||
| } | |||||
| } | |||||
| for (size_t i = 0; i < kernel_info.GetOutputNum(); ++i) { | |||||
| auto dtype = kernel_attr.GetAllSame() ? kernel_attr.GetOutputAttr(0).first : kernel_attr.GetOutputAttr(i).first; | |||||
| if (kernel_info.GetOutputDeviceType(i) != dtype) { | |||||
| MS_LOG(DEBUG) << "output index:" << i << ", kernel info type:" << kernel_info.GetOutputDeviceType(i) | |||||
| << ", register type:" << dtype; | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::vector<KernelAttr> CPUKernelFactory::GetSupportedKernelAttrList(const std::string &kernel_name) { | |||||
| std::vector<KernelAttr> result; | |||||
| auto iter = name_to_attr_creator_.find(kernel_name); | |||||
| if (iter == name_to_attr_creator_.end()) { | |||||
| MS_LOG(WARNING) << "Not registered CPU kernel: op[" << kernel_name << "]!"; | |||||
| return result; | |||||
| } | |||||
| auto creators = iter->second; | |||||
| for (size_t index = 0; index < creators.size(); ++index) { | |||||
| auto attr_creator = creators[index]; | |||||
| result.push_back(attr_creator.first); | |||||
| } | |||||
| return result; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,79 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_ | |||||
| #include <functional> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "common/utils.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "runtime/device/cpu/kernel_select_cpu.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| using mindspore::device::cpu::KernelAttr; | |||||
| using CPUKernelCreator = std::function<std::shared_ptr<CPUKernel>()>; | |||||
| class CPUKernelFactory { | |||||
| public: | |||||
| static CPUKernelFactory &GetInstance(); | |||||
| void Register(const std::string &kernel_name, const KernelAttr &kernel_attr, CPUKernelCreator &&kernel_creator); | |||||
| std::shared_ptr<CPUKernel> Create(const std::string &kernel_name, const CNodePtr &apply_kernel); | |||||
| std::vector<KernelAttr> GetSupportedKernelAttrList(const std::string &kernel_name); | |||||
| private: | |||||
| CPUKernelFactory() = default; | |||||
| ~CPUKernelFactory() = default; | |||||
| DISABLE_COPY_AND_ASSIGN(CPUKernelFactory) | |||||
| std::pair<bool, size_t> CPUKernelAttrCheck(const std::string &kernel_name, const KernelBuildInfo &kernel_info); | |||||
| bool CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info); | |||||
| std::map<std::string, std::vector<std::pair<KernelAttr, CPUKernelCreator>>> name_to_attr_creator_; | |||||
| }; | |||||
| class CPUKernelRegistrar { | |||||
| public: | |||||
| CPUKernelRegistrar(const std::string &kernel_name, const KernelAttr &kernel_attr, CPUKernelCreator &&kernel_creator) { | |||||
| CPUKernelFactory::GetInstance().Register(kernel_name, kernel_attr, std::move(kernel_creator)); | |||||
| } | |||||
| ~CPUKernelRegistrar() = default; | |||||
| }; | |||||
| #define MS_REG_CPU_KERNEL(OPNAME, ATTR, OPCLASS) MS_REG_CPU_KERNEL_(__COUNTER__, OPNAME, ATTR, OPCLASS) | |||||
| #define MS_REG_CPU_KERNEL_(COUNT, OPNAME, ATTR, OPCLASS) _MS_REG_CPU_KERNEL_(COUNT, OPNAME, ATTR, OPCLASS) | |||||
| #define _MS_REG_CPU_KERNEL_(COUNT, OPNAME, ATTR, OPCLASS) \ | |||||
| static_assert(std::is_base_of<CPUKernel, OPCLASS>::value, " must be base of CPUKernel"); \ | |||||
| static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_reg(#OPNAME, ATTR, \ | |||||
| []() { return std::make_shared<OPCLASS>(); }); | |||||
| #define MS_REG_CPU_KERNEL_T(OPNAME, ATTR, OPCLASS, T) MS_REG_CPU_KERNEL_T_(__COUNTER__, OPNAME, ATTR, OPCLASS, T) | |||||
| #define MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) | |||||
| #define _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) \ | |||||
| static_assert(std::is_base_of<CPUKernel, OPCLASS<T>>::value, " must be base of CPUKernel"); \ | |||||
| static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_##OPNAME##_##T##_reg( \ | |||||
| #OPNAME, ATTR, []() { return std::make_shared<OPCLASS<T>>(); }); | |||||
| #define MS_REG_CPU_KERNEL_T_S(OPNAME, ATTR, OPCLASS, T, S) \ | |||||
| static_assert(std::is_base_of<CPUKernel, OPCLASS<T, S>>::value, " must be base of CPUKernel"); \ | |||||
| static const CPUKernelRegistrar g_cpu_kernel_##OPNAME##_##T##_##S##_reg( \ | |||||
| #OPNAME, ATTR, []() { return std::make_shared<OPCLASS<T, S>>(); }); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_ | |||||
| @@ -0,0 +1,50 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/debug_cpu_kernel.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| #include "common/utils.h" | |||||
| #ifdef ENABLE_DEBUGGER | |||||
| #include "debug/debugger/debugger.h" | |||||
| #endif | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void DebugCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); } | |||||
| bool DebugCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.size() < 1 || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << " input or output empty!"; | |||||
| } | |||||
| auto val = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| MS_LOG(DEBUG) << " launch DebugCountCPUKernel val " << *val; | |||||
| auto output = reinterpret_cast<int *>(outputs[0]->addr); | |||||
| size_t elem_num = inputs[0]->size / sizeof(int); | |||||
| for (size_t i = 0; i < elem_num; i++) { | |||||
| output[i] = val[i]; | |||||
| } | |||||
| #ifdef ENABLE_DEBUGGER | |||||
| // debugger will suspend execution is neccessary | |||||
| Debugger::GetInstance()->PostDebugOp(); | |||||
| #endif | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,41 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class DebugCPUKernel : public CPUKernel { | |||||
| public: | |||||
| DebugCPUKernel() = default; | |||||
| ~DebugCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(Debug, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32), DebugCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,78 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <thread> | |||||
| #include "backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| #include "runtime/device/cpu/mpi/mpi_adapter.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| CheckParam(kernel_node); | |||||
| split_num_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "split_num"); | |||||
| MS_LOG(INFO) << "split_num: " << split_num_; | |||||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||||
| if (input_shape[0] % split_num_ != 0) { | |||||
| MS_LOG(EXCEPTION) << "Input shape[0] is " << input_shape[0] << ", but it must be multiple of split_num."; | |||||
| } | |||||
| } | |||||
| bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| #if defined(_WIN32) || defined(_WIN64) | |||||
| auto start_time = std::chrono::steady_clock::now(); | |||||
| #else | |||||
| struct timeval start_time, end_time; | |||||
| (void)gettimeofday(&start_time, nullptr); | |||||
| #endif | |||||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| size_t input_size = inputs[0]->size; | |||||
| size_t output_size = outputs[0]->size; | |||||
| MS_LOG(DEBUG) << "input addr: " << input_addr << "input size: " << input_size; | |||||
| MS_LOG(DEBUG) << "output addr: " << output_addr << "output size: " << output_size; | |||||
| memset_s(output_addr, output_size, 0, output_size); | |||||
| const std::vector<int> &rank_group = {0, 1, 2, 3, 4, 5, 6, 7}; | |||||
| size_t input_split_lens = input_size / split_num_ / sizeof(float_t); | |||||
| size_t output_split_lens = output_size / split_num_ / sizeof(float_t); | |||||
| auto mpi_instance = device::cpu::MPIAdapter::Instance(); | |||||
| MS_EXCEPTION_IF_NULL(mpi_instance); | |||||
| for (int i = 0; i < split_num_; i++) { | |||||
| mpi_instance->AllGather(input_addr + i * input_split_lens, output_addr + i * output_split_lens, rank_group, | |||||
| input_split_lens); | |||||
| } | |||||
| #if defined(_WIN32) || defined(_WIN64) | |||||
| auto end_time = std::chrono::steady_clock::now(); | |||||
| std::chrono::duration<double, std::ratio<1, 1000000>> cost = end_time - start_time; | |||||
| MS_LOG(INFO) << "EmbeddingLookUpCommGradCPUKernel, used time: " << cost.count() << " us"; | |||||
| #else | |||||
| (void)gettimeofday(&end_time, nullptr); | |||||
| uint64_t time = 1000000 * static_cast<uint64_t>(end_time.tv_sec - start_time.tv_sec); | |||||
| time += static_cast<uint64_t>(end_time.tv_usec - start_time.tv_usec); | |||||
| MS_LOG(INFO) << "EmbeddingLookUpCommGradCPUKernel, used time: " << time << " us"; | |||||
| #endif | |||||
| return true; | |||||
| } | |||||
| void EmbeddingLookUpCommGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||||
| if (input_num != 1) { | |||||
| MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCommGradCPUKernel needs 1."; | |||||
| } | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,46 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class EmbeddingLookUpCommGradCPUKernel : public CPUKernel { | |||||
| public: | |||||
| EmbeddingLookUpCommGradCPUKernel() : split_num_(1) {} | |||||
| ~EmbeddingLookUpCommGradCPUKernel() override{}; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| void CheckParam(const CNodePtr &kernel_node); | |||||
| int split_num_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(EmbeddingLookupCommGrad, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| EmbeddingLookUpCommGradCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,212 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <thread> | |||||
| #include <string> | |||||
| #include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| #include "runtime/device/cpu/mpi/mpi_adapter.h" | |||||
| #include "ir/primitive.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| CheckParam(kernel_node); | |||||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||||
| input_lens_ = 1; | |||||
| for (auto shape : input_shape_) { | |||||
| input_lens_ = input_lens_ * shape; | |||||
| } | |||||
| indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||||
| indices_lens_ = 1; | |||||
| for (auto shape : indices_shape_) { | |||||
| indices_lens_ = indices_lens_ * shape; | |||||
| } | |||||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||||
| axis_ = 4 - input_shape_.size(); | |||||
| if (AnfAlgo::HasNodeAttr(kAttrReduceScatterFlag, kernel_node)) { | |||||
| reduce_scatter_flag_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, kAttrReduceScatterFlag); | |||||
| } | |||||
| #ifdef ENABLE_MPI | |||||
| if (reduce_scatter_flag_) { | |||||
| size_t gatherv2_out_lens = 1; | |||||
| for (int i = 0; i < SizeToInt(input_shape_.size()); i++) { | |||||
| if (i == 0) { | |||||
| for (int j = 0; j < SizeToInt(indices_shape_.size()); j++) { | |||||
| gatherv2_out_lens = gatherv2_out_lens * indices_shape_[j]; | |||||
| } | |||||
| } else { | |||||
| gatherv2_out_lens = gatherv2_out_lens * input_shape_[i]; | |||||
| } | |||||
| } | |||||
| gatherv2_out_lens_ = gatherv2_out_lens * sizeof(float); | |||||
| gather_v2_out_ = malloc(gatherv2_out_lens_); | |||||
| if (gather_v2_out_ == nullptr) { | |||||
| MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel malloc failed, malloc lens: " << gatherv2_out_lens_; | |||||
| } | |||||
| auto ret = memset_s(gather_v2_out_, gatherv2_out_lens_, 0, gatherv2_out_lens_); | |||||
| if (ret != 0) { | |||||
| MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel memset gatherv2 out buff failed"; | |||||
| } | |||||
| split_num_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "split_num"); | |||||
| } | |||||
| #else | |||||
| if (reduce_scatter_flag_) { | |||||
| MS_LOG(EXCEPTION) << "Not Enable MPI, please build version with -M on when set reduce_scatter_flag true"; | |||||
| } | |||||
| #endif | |||||
| if (AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) { | |||||
| offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, kAttrOffset); | |||||
| } | |||||
| CPUKernelUtils::ExpandDimsTo4(&input_shape_); | |||||
| CPUKernelUtils::ExpandDimsTo4(&output_shape_); | |||||
| } | |||||
| bool EmbeddingLookUpCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| float *gather_out_addr = reduce_scatter_flag_ ? reinterpret_cast<float *>(gather_v2_out_) : output_addr; | |||||
| size_t dim0 = input_shape_[0]; | |||||
| size_t dim1 = input_shape_[1]; | |||||
| size_t dim2 = input_shape_[2]; | |||||
| if (axis_ == 3) { | |||||
| for (size_t i = 0; i < dim0; ++i) { | |||||
| for (size_t j = 0; j < dim1; ++j) { | |||||
| for (size_t k = 0; k < dim2; ++k) { | |||||
| LookUpTable(inputs, i, j, k, &gather_out_addr); | |||||
| } | |||||
| } | |||||
| } | |||||
| } else if (axis_ == 2) { | |||||
| for (size_t i = 0; i < dim0; ++i) { | |||||
| for (size_t j = 0; j < dim1; ++j) { | |||||
| LookUpTable(inputs, i, j, 0, &gather_out_addr); | |||||
| } | |||||
| } | |||||
| } else if (axis_ == 1) { | |||||
| for (size_t i = 0; i < dim0; ++i) { | |||||
| LookUpTable(inputs, i, 0, 0, &gather_out_addr); | |||||
| } | |||||
| } else if (axis_ == 0) { | |||||
| LookUpTable(inputs, 0, 0, 0, &gather_out_addr); | |||||
| } | |||||
| #ifdef ENABLE_MPI | |||||
| if (reduce_scatter_flag_) { | |||||
| size_t one_split_lens = gatherv2_out_lens_ / split_num_ / sizeof(float); | |||||
| size_t reduce_scatter_out_lens = one_split_lens / 8; | |||||
| const std::vector<int> &group = {0, 1, 2, 3, 4, 5, 6, 7}; | |||||
| auto mpi_instance = device::cpu::MPIAdapter::Instance(); | |||||
| MS_EXCEPTION_IF_NULL(mpi_instance); | |||||
| for (int i = 0; i < split_num_; i++) { | |||||
| mpi_instance->ReduceScatter(reinterpret_cast<float *>(gather_v2_out_) + i * one_split_lens, | |||||
| output_addr + i * reduce_scatter_out_lens, group, one_split_lens / 8, "sum"); | |||||
| } | |||||
| } | |||||
| #endif | |||||
| return true; | |||||
| } | |||||
| void LookUpTable_task(const float *input_addr, float *output_addr, const int *indices_addr, size_t indices_lens, | |||||
| size_t num, size_t dim0, size_t dim1, size_t dim2, int offset, size_t axis, | |||||
| std::vector<size_t> input_shape, size_t input_lens) { | |||||
| size_t lens = num * sizeof(float); | |||||
| for (size_t i = 0; i < indices_lens; ++i) { | |||||
| int indices = indices_addr[i] - offset; | |||||
| if (indices >= 0) { | |||||
| size_t index = IntToSize(indices); | |||||
| if (index < input_shape[axis]) { | |||||
| size_t pos = 0; | |||||
| if (axis == 3) { | |||||
| pos = CPUKernelUtils::CalcOffset(input_shape, dim0, dim1, dim2, index); | |||||
| } else if (axis == 2) { | |||||
| pos = CPUKernelUtils::CalcOffset(input_shape, dim0, dim1, index, 0); | |||||
| } else if (axis == 1) { | |||||
| pos = CPUKernelUtils::CalcOffset(input_shape, dim0, index, 0, 0); | |||||
| } else if (axis == 0) { | |||||
| pos = CPUKernelUtils::CalcOffset(input_shape, index, 0, 0, 0); | |||||
| } | |||||
| if (pos + num <= input_lens) { | |||||
| auto ret = memcpy_s(output_addr, lens, input_addr + pos, lens); | |||||
| if (ret != EOK) { | |||||
| MS_LOG(EXCEPTION) << "LookUpTable task memcpy failed."; | |||||
| } | |||||
| } else { | |||||
| auto ret = memset_s(output_addr, lens, 0, lens); | |||||
| if (ret != EOK) { | |||||
| MS_LOG(EXCEPTION) << "LookUpTable task memset failed."; | |||||
| } | |||||
| } | |||||
| } else { | |||||
| auto ret = memset_s(output_addr, lens, 0, lens); | |||||
| if (ret != EOK) { | |||||
| MS_LOG(EXCEPTION) << "LookUpTable task memset failed."; | |||||
| } | |||||
| } | |||||
| } else { | |||||
| auto ret = memset_s(output_addr, lens, 0, lens); | |||||
| if (ret != EOK) { | |||||
| MS_LOG(EXCEPTION) << "LookUpTable task memset failed."; | |||||
| } | |||||
| } | |||||
| output_addr += num; | |||||
| } | |||||
| } | |||||
| void EmbeddingLookUpCPUKernel::LookUpTable(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, | |||||
| size_t dim2, float **output_addr) { | |||||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr); | |||||
| size_t num = CPUKernelUtils::GetElementNumOnAxis(input_shape_, axis_); | |||||
| float *task_out_addr = *output_addr; | |||||
| const size_t thread_num = 8; | |||||
| std::thread threads[8]; | |||||
| size_t task_proc_lens = (indices_lens_ + thread_num - 1) / thread_num; | |||||
| size_t i; | |||||
| size_t task_offset = 0; | |||||
| MS_LOG(DEBUG) << "indices_lens_: " << indices_lens_ << " one task proc lens:" << task_proc_lens; | |||||
| for (i = 0; i < thread_num; i++) { | |||||
| if (task_offset >= indices_lens_) { | |||||
| break; | |||||
| } | |||||
| MS_LOG(DEBUG) << "task_offset: " << task_offset << " task_proc_lenss:" << task_proc_lens; | |||||
| threads[i] = | |||||
| std::thread(LookUpTable_task, input_addr, task_out_addr + task_offset * num, indices_addr + task_offset, | |||||
| task_proc_lens, num, dim0, dim1, dim2, offset_, axis_, input_shape_, input_lens_); | |||||
| task_offset += task_proc_lens; | |||||
| if (task_offset + task_proc_lens > indices_lens_) { | |||||
| task_proc_lens = indices_lens_ - task_offset; | |||||
| } | |||||
| } | |||||
| for (size_t j = 0; j < i; j++) { | |||||
| threads[j].join(); | |||||
| } | |||||
| *output_addr += num * indices_lens_; | |||||
| } | |||||
| void EmbeddingLookUpCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||||
| if (input_shape.size() > 4) { | |||||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() | |||||
| << ", but EmbeddingLookUpCPUKernel olny support 4d or lower."; | |||||
| } | |||||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||||
| if (input_num != 2) { | |||||
| MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCPUKernel needs 2."; | |||||
| } | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,74 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class EmbeddingLookUpCPUKernel : public CPUKernel { | |||||
| public: | |||||
| EmbeddingLookUpCPUKernel() { | |||||
| axis_ = 0; | |||||
| offset_ = 0; | |||||
| split_num_ = 0; | |||||
| input_lens_ = 0; | |||||
| indices_lens_ = 0; | |||||
| gatherv2_out_lens_ = 0; | |||||
| reduce_scatter_flag_ = false; | |||||
| gather_v2_out_ = nullptr; | |||||
| } | |||||
| ~EmbeddingLookUpCPUKernel() override { | |||||
| if (gather_v2_out_ != nullptr) { | |||||
| free(gather_v2_out_); | |||||
| gather_v2_out_ = nullptr; | |||||
| } | |||||
| } | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| void LookUpTable(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2, | |||||
| float **output_addr); | |||||
| void CheckParam(const CNodePtr &kernel_node); | |||||
| std::vector<size_t> input_shape_; | |||||
| std::vector<size_t> indices_shape_; | |||||
| std::vector<size_t> output_shape_; | |||||
| int axis_; | |||||
| int offset_; | |||||
| int split_num_; | |||||
| size_t input_lens_; | |||||
| size_t indices_lens_; | |||||
| size_t gatherv2_out_lens_; | |||||
| bool reduce_scatter_flag_; | |||||
| void *gather_v2_out_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL( | |||||
| EmbeddingLookup, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), | |||||
| EmbeddingLookUpCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,46 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/equal_count_cpu_kernel.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void EqualCountCPUKernel::InitKernel(const CNodePtr & /*kernel_node*/) {} | |||||
| bool EqualCountCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.size() < 2 || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "input or output empty!"; | |||||
| } | |||||
| if (inputs[0]->size != inputs[1]->size) { | |||||
| MS_LOG(EXCEPTION) << "input or output size!"; | |||||
| } | |||||
| int count = 0; | |||||
| auto left = reinterpret_cast<int *>(inputs[0]->addr); | |||||
| auto right = reinterpret_cast<int *>(inputs[1]->addr); | |||||
| size_t elem_num = inputs[0]->size / sizeof(int); | |||||
| for (size_t i = 0; i < elem_num; i++) { | |||||
| if (left[i] == right[i]) { | |||||
| count++; | |||||
| } | |||||
| } | |||||
| auto output = reinterpret_cast<int *>(outputs[0]->addr); | |||||
| output[0] = count; | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,43 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class EqualCountCPUKernel : public CPUKernel { | |||||
| public: | |||||
| EqualCountCPUKernel() = default; | |||||
| ~EqualCountCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL( | |||||
| EqualCount, | |||||
| KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||||
| EqualCountCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,115 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/gather_cpu_kernel.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void GatherV2CPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| CheckParam(kernel_node); | |||||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||||
| indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||||
| axis_ = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS); | |||||
| if (axis_ < 0) { | |||||
| axis_ = axis_ + SizeToInt(input_shape_.size()); | |||||
| } | |||||
| axis_ += 4 - input_shape_.size(); | |||||
| CPUKernelUtils::ExpandDimsTo4(&input_shape_); | |||||
| CPUKernelUtils::ExpandDimsTo4(&output_shape_); | |||||
| } | |||||
| bool GatherV2CPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| auto buff_size = outputs[0]->size; | |||||
| size_t dim0 = input_shape_[0]; | |||||
| size_t dim1 = input_shape_[1]; | |||||
| size_t dim2 = input_shape_[2]; | |||||
| if (axis_ == 3) { | |||||
| for (size_t i = 0; i < dim0; ++i) { | |||||
| for (size_t j = 0; j < dim1; ++j) { | |||||
| for (size_t k = 0; k < dim2; ++k) { | |||||
| CopyDataToOutput(inputs, i, j, k, &output_addr, &buff_size); | |||||
| } | |||||
| } | |||||
| } | |||||
| } else if (axis_ == 2) { | |||||
| for (size_t i = 0; i < dim0; ++i) { | |||||
| for (size_t j = 0; j < dim1; ++j) { | |||||
| CopyDataToOutput(inputs, i, j, 0, &output_addr, &buff_size); | |||||
| } | |||||
| } | |||||
| } else if (axis_ == 1) { | |||||
| for (size_t i = 0; i < dim0; ++i) { | |||||
| CopyDataToOutput(inputs, i, 0, 0, &output_addr, &buff_size); | |||||
| } | |||||
| } else if (axis_ == 0) { | |||||
| CopyDataToOutput(inputs, 0, 0, 0, &output_addr, &buff_size); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| void GatherV2CPUKernel::CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, | |||||
| size_t dim2, float **output_addr, size_t *buff_size) { | |||||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr); | |||||
| size_t elem_num = inputs[1]->size / 4; | |||||
| size_t num = CPUKernelUtils::GetElementNumOnAxis(input_shape_, axis_); | |||||
| for (size_t i = 0; i < elem_num; ++i) { | |||||
| if (indices_addr[i] < 0) { | |||||
| MS_LOG(EXCEPTION) << "The indices value is less than 0."; | |||||
| } | |||||
| size_t index = IntToSize(indices_addr[i]); | |||||
| if (index >= input_shape_[IntToSize(axis_)]) { | |||||
| auto ret = memset_s(*output_addr, *buff_size, 0., num * sizeof(float)); | |||||
| if (ret != EOK) { | |||||
| MS_LOG(EXCEPTION) << "memset failed."; | |||||
| } | |||||
| } else { | |||||
| size_t pos = 0; | |||||
| if (axis_ == 3) { | |||||
| pos = CPUKernelUtils::CalcOffset(input_shape_, dim0, dim1, dim2, index); | |||||
| } else if (axis_ == 2) { | |||||
| pos = CPUKernelUtils::CalcOffset(input_shape_, dim0, dim1, index, 0); | |||||
| } else if (axis_ == 1) { | |||||
| pos = CPUKernelUtils::CalcOffset(input_shape_, dim0, index, 0, 0); | |||||
| } else if (axis_ == 0) { | |||||
| pos = CPUKernelUtils::CalcOffset(input_shape_, index, 0, 0, 0); | |||||
| } | |||||
| auto ret = memcpy_s(*output_addr, *buff_size, input_addr + pos, num * sizeof(float)); | |||||
| if (ret != EOK) { | |||||
| MS_LOG(EXCEPTION) << "memcpy failed."; | |||||
| } | |||||
| } | |||||
| *output_addr += num; | |||||
| *buff_size -= num * sizeof(float); | |||||
| } | |||||
| } // namespace kernel | |||||
| void GatherV2CPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||||
| if (input_shape.size() > 4) { | |||||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but GatherV2CPUKernel olny support 4d or lower."; | |||||
| } | |||||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||||
| if (input_num != 2) { | |||||
| MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but GatherV2CPUKernel needs 2."; | |||||
| } | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,52 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class GatherV2CPUKernel : public CPUKernel { | |||||
| public: | |||||
| GatherV2CPUKernel() : axis_(0) {} | |||||
| ~GatherV2CPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| void CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2, | |||||
| float **output_addr, size_t *buff_size); | |||||
| void CheckParam(const CNodePtr &kernel_node); | |||||
| std::vector<size_t> input_shape_; | |||||
| std::vector<size_t> indices_shape_; | |||||
| std::vector<size_t> output_shape_; | |||||
| int axis_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL( | |||||
| GatherV2, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), | |||||
| GatherV2CPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,91 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h" | |||||
| #include <string> | |||||
| #include "common/utils.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||||
| if (src_shape.size() != 4 || weight_shape.size() != 4) { | |||||
| MS_LOG(EXCEPTION) << "conv2d only support nchw input!"; | |||||
| } | |||||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||||
| dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); | |||||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||||
| int kernel_size = SizeToInt(weight_shape[3]); | |||||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE); | |||||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION); | |||||
| if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) { | |||||
| MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!"; | |||||
| } | |||||
| if (stride_ori[0] != 1 || stride_ori[1] != 1) { | |||||
| MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!"; | |||||
| } | |||||
| if (dilation_ori.size() != 4 || dilation_ori[2] != 1 || dilation_ori[3] != 1) { | |||||
| MS_LOG(EXCEPTION) << "conv2d dilation only support 1, and dilation must be 4d!"; | |||||
| } | |||||
| if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { | |||||
| MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!"; | |||||
| } | |||||
| int stride = stride_ori[2]; | |||||
| int dilation = dilation_ori[2]; | |||||
| dnnl::memory::dims strides{stride, stride}; | |||||
| dnnl::memory::dims dilates{dilation - 1, dilation - 1}; | |||||
| std::vector<int> int_padding_l; | |||||
| std::vector<int> int_padding_r; | |||||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE); | |||||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); | |||||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||||
| MS_LOG(EXCEPTION) << "get padding failed"; | |||||
| } | |||||
| dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]}; | |||||
| dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]}; | |||||
| dnnl::convolution_forward::desc desc = | |||||
| dnnl::convolution_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_auto, src_desc, | |||||
| weights_desc, dst_desc, strides, dilates, padding_l, padding_r); | |||||
| auto prim_desc = dnnl::convolution_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||||
| primitive_ = std::make_shared<dnnl::convolution_forward>(prim_desc); | |||||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||||
| AddArgument(DNNL_ARG_WEIGHTS, weights_desc); | |||||
| AddArgument(DNNL_ARG_DST, dst_desc); | |||||
| } | |||||
| bool Conv2dCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.size() < 2 || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||||
| } | |||||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||||
| ExecutePrimitive(); | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,43 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class Conv2dCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| Conv2dCPUKernel() = default; | |||||
| ~Conv2dCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL( | |||||
| Conv2D, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| Conv2dCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,93 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h" | |||||
| #include <string> | |||||
| #include "common/utils.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||||
| std::vector<size_t> weight_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||||
| std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| if (src_shape.size() != 4 || weight_shape.size() != 4) { | |||||
| MS_LOG(EXCEPTION) << ("conv2d grad filter only support nchw input!"); | |||||
| } | |||||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||||
| dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); | |||||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||||
| int kernel_size = SizeToInt(weight_shape[3]); | |||||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE); | |||||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION); | |||||
| if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { | |||||
| MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel only support equal stride, and stride must be 2d!"; | |||||
| } | |||||
| if (dilation_ori.size() != 4 || dilation_ori[2] != 1 || dilation_ori[3] != 1) { | |||||
| MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel dilation only support 1, and dilation must be 4d!"; | |||||
| } | |||||
| if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { | |||||
| MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel dilation only support 1 in N axis and C axis!"; | |||||
| } | |||||
| int stride = stride_ori[0]; | |||||
| int dilation = dilation_ori[2]; | |||||
| dnnl::memory::dims strides{stride, stride}; | |||||
| dnnl::memory::dims dilates{dilation - 1, dilation - 1}; | |||||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE); | |||||
| std::vector<int> int_padding_l; | |||||
| std::vector<int> int_padding_r; | |||||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); | |||||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||||
| MS_LOG(EXCEPTION) << "get padding failed"; | |||||
| } | |||||
| dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]}; | |||||
| dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]}; | |||||
| dnnl::convolution_forward::desc forward_desc = | |||||
| dnnl::convolution_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_auto, src_desc, | |||||
| weights_desc, dst_desc, strides, dilates, padding_l, padding_r); | |||||
| auto forward_prim_desc = dnnl::convolution_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine()); | |||||
| dnnl::convolution_backward_weights::desc backward_desc = dnnl::convolution_backward_weights::desc( | |||||
| dnnl::algorithm::convolution_auto, src_desc, weights_desc, dst_desc, strides, dilates, padding_l, padding_r); | |||||
| auto backward_prim_desc = dnnl::convolution_backward_weights::primitive_desc( | |||||
| backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc); | |||||
| primitive_ = std::make_shared<dnnl::convolution_backward_weights>(backward_prim_desc); | |||||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_DST, dst_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_WEIGHTS, weights_desc); | |||||
| } | |||||
| bool Conv2dGradFilterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.size() < 2 || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||||
| } | |||||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS, outputs[0]->addr); | |||||
| ExecutePrimitive(); | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,43 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class Conv2dGradFilterCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| Conv2dGradFilterCPUKernel() = default; | |||||
| ~Conv2dGradFilterCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL( | |||||
| Conv2DBackpropFilter, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| Conv2dGradFilterCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,92 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h" | |||||
| #include <string> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| #include "common/utils.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| std::vector<size_t> src_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||||
| std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||||
| std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| if (src_shape.size() != 4 || weight_shape.size() != 4) { | |||||
| MS_LOG(EXCEPTION) << "conv2d grad filter only support nchw input!"; | |||||
| } | |||||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||||
| dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); | |||||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||||
| int kernel_size = SizeToInt(weight_shape[3]); | |||||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE); | |||||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION); | |||||
| if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { | |||||
| MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel only support equal stride, and stride must be 2d!"; | |||||
| } | |||||
| if (dilation_ori.size() != 4 || dilation_ori[2] != 1 || dilation_ori[3] != 1) { | |||||
| MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel dilation only support 1, and dilation must be 4d!"; | |||||
| } | |||||
| if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { | |||||
| MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel dilation only support 1 in N axis and C axis!"; | |||||
| } | |||||
| int stride = stride_ori[0]; | |||||
| int dilation = dilation_ori[2]; | |||||
| dnnl::memory::dims strides{stride, stride}; | |||||
| dnnl::memory::dims dilates{dilation - 1, dilation - 1}; | |||||
| std::vector<int> int_padding_l; | |||||
| std::vector<int> int_padding_r; | |||||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE); | |||||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); | |||||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||||
| MS_LOG(EXCEPTION) << "conv2d grad get padding failed"; | |||||
| } | |||||
| dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]}; | |||||
| dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]}; | |||||
| dnnl::convolution_forward::desc forward_desc = | |||||
| dnnl::convolution_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_auto, src_desc, | |||||
| weights_desc, dst_desc, strides, dilates, padding_l, padding_r); | |||||
| auto forward_prim_desc = dnnl::convolution_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine()); | |||||
| dnnl::convolution_backward_data::desc backward_desc = dnnl::convolution_backward_data::desc( | |||||
| dnnl::algorithm::convolution_auto, src_desc, weights_desc, dst_desc, strides, dilates, padding_l, padding_r); | |||||
| auto backward_prim_desc = | |||||
| dnnl::convolution_backward_data::primitive_desc(backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc); | |||||
| primitive_ = std::make_shared<dnnl::convolution_backward_data>(backward_prim_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_SRC, src_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_DST, dst_desc); | |||||
| AddArgument(DNNL_ARG_WEIGHTS, weights_desc); | |||||
| } | |||||
| bool Conv2dGradInputCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.size() < 2 || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||||
| } | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC, outputs[0]->addr); | |||||
| ExecutePrimitive(); | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,43 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class Conv2dGradInputCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| Conv2dGradInputCPUKernel() = default; | |||||
| ~Conv2dGradInputCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL( | |||||
| Conv2DBackpropInput, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| Conv2dGradInputCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,141 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h" | |||||
| #include <string> | |||||
| #include "common/utils.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| #ifdef PLATFORM_86 | |||||
| _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); | |||||
| _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); | |||||
| #endif | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| using tag = dnnl::memory::format_tag; | |||||
| using dim = dnnl::memory::dims; | |||||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||||
| std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); | |||||
| bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional"); | |||||
| input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size"); | |||||
| hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size"); | |||||
| num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers"); | |||||
| has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias"); | |||||
| batch_size_ = SizeToInt(src_shape[1]); | |||||
| seq_len_ = SizeToInt(src_shape[0]); | |||||
| num_directions_ = 1; | |||||
| if (bidirectional_) { | |||||
| num_directions_ = 2; | |||||
| } | |||||
| if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) { | |||||
| MS_LOG(EXCEPTION) << "error iteration shape!"; | |||||
| } | |||||
| if (num_layers_ <= 0) { | |||||
| MS_LOG(EXCEPTION) << "layers must be greater than zero!"; | |||||
| } | |||||
| if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) { | |||||
| MS_LOG(EXCEPTION) << "conv2d only support 3-D input!"; | |||||
| } | |||||
| const int gate_size = 4 * hidden_size_; | |||||
| for (int i = 0; i < num_layers_; ++i) { | |||||
| weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_); | |||||
| weight_h_size_ += gate_size * hidden_size_; | |||||
| } | |||||
| weight_size_ = weight_size_ * num_directions_; | |||||
| weight_h_size_ = weight_h_size_ * num_directions_; | |||||
| auto eng = MKLKernelEngine::Get().engine(); | |||||
| dnnl::stream s(eng); | |||||
| dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional; | |||||
| if (bidirectional_) { | |||||
| direction = dnnl::rnn_direction::bidirectional_concat; | |||||
| } | |||||
| dim src_dims = {seq_len_, batch_size_, input_size_}; | |||||
| dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||||
| dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||||
| weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_}; | |||||
| weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_}; | |||||
| bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_}; | |||||
| dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_}; | |||||
| dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||||
| dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||||
| dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc); | |||||
| dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc); | |||||
| dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc); | |||||
| dnnl::memory::desc bias_desc = formatted_md(bias_dims_, tag::ldgo); | |||||
| dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc); | |||||
| dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc); | |||||
| dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc); | |||||
| auto desc = std::make_shared<dnnl::lstm_forward::desc>(dnnl::prop_kind::forward_training, direction, src_desc, | |||||
| src_h_desc, src_c_desc, formatted_md(weights_dims_, tag::any), | |||||
| formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, | |||||
| dst_h_desc, dst_c_desc); | |||||
| prim_desc_ = dnnl::lstm_forward::primitive_desc(*desc, eng); | |||||
| primitive_ = std::make_shared<dnnl::lstm_forward>(prim_desc_); | |||||
| AddArgument(DNNL_ARG_SRC_LAYER, src_desc); | |||||
| AddArgument(DNNL_ARG_SRC_ITER, src_h_desc); | |||||
| AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc); | |||||
| AddArgument(DNNL_ARG_WEIGHTS_LAYER, prim_desc_.weights_layer_desc()); | |||||
| AddArgument(DNNL_ARG_WEIGHTS_ITER, prim_desc_.weights_iter_desc()); | |||||
| AddArgument(DNNL_ARG_BIAS, bias_desc); | |||||
| AddArgument(DNNL_ARG_DST_LAYER, dst_desc); | |||||
| AddArgument(DNNL_ARG_DST_ITER, dst_h_desc); | |||||
| AddArgument(DNNL_ARG_DST_ITER_C, dst_c_desc); | |||||
| AddArgument(DNNL_ARG_WORKSPACE, prim_desc_.workspace_desc()); | |||||
| } | |||||
| bool LstmCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| using dt = dnnl::memory::data_type; | |||||
| using tag = dnnl::memory::format_tag; | |||||
| auto eng = MKLKernelEngine::Get().engine(); | |||||
| auto user_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng); | |||||
| auto user_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng); | |||||
| auto weights_memory = dnnl::memory(prim_desc_.weights_layer_desc(), eng); | |||||
| auto weights_h_memory = dnnl::memory(prim_desc_.weights_iter_desc(), eng); | |||||
| user_weights_memory.set_data_handle(inputs[3]->addr); | |||||
| user_weights_h_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_); | |||||
| Reorder(&user_weights_memory, &weights_memory); | |||||
| Reorder(&user_weights_h_memory, &weights_h_memory); | |||||
| auto bias_memory = dnnl::memory(prim_desc_.bias_desc(), eng); | |||||
| if (has_bias_) { | |||||
| bias_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_); | |||||
| } else { | |||||
| auto ret = | |||||
| memset_s(bias_memory.get_data_handle(), prim_desc_.bias_desc().get_size(), 0, prim_desc_.bias_desc().get_size()); | |||||
| if (ret != 0) { | |||||
| MS_LOG(EXCEPTION) << "bias memset error"; | |||||
| } | |||||
| } | |||||
| // set handle | |||||
| SetArgumentHandle(DNNL_ARG_SRC_LAYER, inputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_SRC_ITER, inputs[1]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_SRC_ITER_C, inputs[2]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_WEIGHTS_LAYER, weights_memory.get_data_handle()); | |||||
| SetArgumentHandle(DNNL_ARG_WEIGHTS_ITER, weights_h_memory.get_data_handle()); | |||||
| SetArgumentHandle(DNNL_ARG_BIAS, bias_memory.get_data_handle()); | |||||
| SetArgumentHandle(DNNL_ARG_DST_LAYER, outputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DST_ITER, outputs[1]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DST_ITER_C, outputs[2]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_WORKSPACE, outputs[3]->addr); | |||||
| ExecutePrimitive(); | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,70 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H_ | |||||
| #if defined(__x86_64__) || defined(__amd64__) || defined(_M_IX86) || defined(_M_X64) | |||||
| #define PLATFORM_86 | |||||
| #endif | |||||
| #ifdef PLATFORM_86 | |||||
| #include <pmmintrin.h> | |||||
| #endif | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class LstmCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| LstmCPUKernel() = default; | |||||
| ~LstmCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| int weight_size_ = 0; | |||||
| int weight_h_size_ = 0; | |||||
| int input_size_; | |||||
| int hidden_size_; | |||||
| int num_layers_; | |||||
| int batch_size_; | |||||
| int seq_len_; | |||||
| int num_directions_; | |||||
| bool bidirectional_; | |||||
| bool has_bias_; | |||||
| dnnl::memory::dims weights_dims_; | |||||
| dnnl::memory::dims weights_h_dims_; | |||||
| dnnl::memory::dims bias_dims_; | |||||
| dnnl::lstm_forward::primitive_desc prim_desc_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(LSTM, | |||||
| KernelAttr() | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32), | |||||
| LstmCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H | |||||
| @@ -0,0 +1,196 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h" | |||||
| #include <cstring> | |||||
| #include <cmath> | |||||
| #include <numeric> | |||||
| #include <string> | |||||
| #include "common/utils.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void LSTMGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| using tag = dnnl::memory::format_tag; | |||||
| using dim = dnnl::memory::dims; | |||||
| auto eng = MKLKernelEngine::Get().engine(); | |||||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||||
| std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); | |||||
| bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional"); | |||||
| input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size"); | |||||
| hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size"); | |||||
| num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers"); | |||||
| has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias"); | |||||
| batch_size_ = SizeToInt(src_shape[1]); | |||||
| seq_len_ = SizeToInt(src_shape[0]); | |||||
| num_directions_ = 1; | |||||
| if (bidirectional_) { | |||||
| num_directions_ = 2; | |||||
| } | |||||
| if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) { | |||||
| MS_LOG(EXCEPTION) << "error iteration shape!"; | |||||
| } | |||||
| if (num_layers_ <= 0) { | |||||
| MS_LOG(EXCEPTION) << "layers must be greater than zero!"; | |||||
| } | |||||
| if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) { | |||||
| MS_LOG(EXCEPTION) << "conv2d only support 3-D input!"; | |||||
| } | |||||
| const int gate_size = 4 * hidden_size_; | |||||
| for (int i = 0; i < num_layers_; ++i) { | |||||
| weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_); | |||||
| weight_h_size_ += gate_size * hidden_size_; | |||||
| } | |||||
| weight_size_ = weight_size_ * num_directions_; | |||||
| weight_h_size_ = weight_h_size_ * num_directions_; | |||||
| dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional; | |||||
| if (bidirectional_) { | |||||
| direction = dnnl::rnn_direction::bidirectional_concat; | |||||
| } | |||||
| dim src_dims = {seq_len_, batch_size_, input_size_}; | |||||
| dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||||
| dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||||
| weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_}; | |||||
| weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_}; | |||||
| bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_}; | |||||
| dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_}; | |||||
| dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||||
| dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||||
| dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc); | |||||
| dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc); | |||||
| dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc); | |||||
| dnnl::memory::desc bias_desc = formatted_md(bias_dims_, tag::ldgo); | |||||
| dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc); | |||||
| dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc); | |||||
| dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc); | |||||
| auto forward_desc = std::make_shared<dnnl::lstm_forward::desc>( | |||||
| dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc, | |||||
| formatted_md(weights_dims_, tag::any), formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, dst_h_desc, | |||||
| dst_c_desc); | |||||
| auto prim_forward_desc = dnnl::lstm_forward::primitive_desc(*forward_desc, eng); | |||||
| auto backward_desc = std::make_shared<dnnl::lstm_backward::desc>( | |||||
| dnnl::prop_kind::backward, direction, src_desc, src_h_desc, src_c_desc, formatted_md(weights_dims_, tag::any), | |||||
| formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, dst_h_desc, dst_c_desc, src_desc, src_h_desc, | |||||
| src_c_desc, formatted_md(weights_dims_, tag::any), formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, | |||||
| dst_h_desc, dst_c_desc); | |||||
| prim_backward_desc_ = dnnl::lstm_backward::primitive_desc(*backward_desc, eng, prim_forward_desc); | |||||
| primitive_ = std::make_shared<dnnl::lstm_backward>(prim_backward_desc_); | |||||
| AddArgument(DNNL_ARG_SRC_LAYER, src_desc); | |||||
| AddArgument(DNNL_ARG_SRC_ITER, src_h_desc); | |||||
| AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc); | |||||
| AddArgument(DNNL_ARG_WEIGHTS_LAYER, prim_backward_desc_.weights_layer_desc()); | |||||
| AddArgument(DNNL_ARG_WEIGHTS_ITER, prim_backward_desc_.weights_iter_desc()); | |||||
| AddArgument(DNNL_ARG_BIAS, bias_desc); | |||||
| AddArgument(DNNL_ARG_DST_LAYER, dst_desc); | |||||
| AddArgument(DNNL_ARG_DST_ITER, dst_h_desc); | |||||
| AddArgument(DNNL_ARG_DST_ITER_C, dst_c_desc); | |||||
| AddArgument(DNNL_ARG_WORKSPACE, prim_forward_desc.workspace_desc()); | |||||
| AddArgument(DNNL_ARG_DIFF_SRC_LAYER, src_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_SRC_ITER, src_h_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_SRC_ITER_C, src_c_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_WEIGHTS_LAYER, prim_backward_desc_.diff_weights_layer_desc()); | |||||
| AddArgument(DNNL_ARG_DIFF_WEIGHTS_ITER, prim_backward_desc_.diff_weights_iter_desc()); | |||||
| AddArgument(DNNL_ARG_DIFF_BIAS, bias_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_DST_LAYER, dst_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_DST_ITER, dst_h_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_DST_ITER_C, dst_c_desc); | |||||
| } | |||||
| bool LSTMGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> &workspace /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| using dt = dnnl::memory::data_type; | |||||
| using tag = dnnl::memory::format_tag; | |||||
| auto eng = MKLKernelEngine::Get().engine(); | |||||
| // construct fw memory | |||||
| auto user_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng); | |||||
| auto user_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng); | |||||
| auto weights_memory = dnnl::memory(prim_backward_desc_.weights_layer_desc(), eng); | |||||
| auto weights_h_memory = dnnl::memory(prim_backward_desc_.weights_iter_desc(), eng); | |||||
| auto bias_memory = dnnl::memory(prim_backward_desc_.bias_desc(), eng); | |||||
| user_weights_memory.set_data_handle(inputs[3]->addr); | |||||
| user_weights_h_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_); | |||||
| Reorder(&user_weights_memory, &weights_memory); | |||||
| Reorder(&user_weights_h_memory, &weights_h_memory); | |||||
| if (has_bias_) { | |||||
| bias_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_); | |||||
| } else { | |||||
| if (memset_s(bias_memory.get_data_handle(), prim_backward_desc_.bias_desc().get_size(), 0, | |||||
| prim_backward_desc_.bias_desc().get_size())) { | |||||
| MS_LOG(EXCEPTION) << "bias memset error"; | |||||
| } | |||||
| } | |||||
| // construct bw memory | |||||
| auto diff_weights_memory = dnnl::memory(prim_backward_desc_.diff_weights_layer_desc(), eng); | |||||
| auto diff_weights_h_memory = dnnl::memory(prim_backward_desc_.diff_weights_iter_desc(), eng); | |||||
| auto diff_bias_memory = dnnl::memory(prim_backward_desc_.diff_bias_desc(), eng); | |||||
| auto user_diff_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng); | |||||
| auto user_diff_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng); | |||||
| user_diff_weights_memory.set_data_handle(outputs[3]->addr); | |||||
| user_diff_weights_h_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_); | |||||
| if (memset_s(user_diff_weights_memory.get_data_handle(), user_diff_weights_memory.get_desc().get_size(), 0, | |||||
| user_diff_weights_memory.get_desc().get_size())) { | |||||
| MS_LOG(EXCEPTION) << "user weights grad memset error"; | |||||
| } | |||||
| if (memset_s(user_diff_weights_h_memory.get_data_handle(), user_diff_weights_h_memory.get_desc().get_size(), 0, | |||||
| user_diff_weights_h_memory.get_desc().get_size())) { | |||||
| MS_LOG(EXCEPTION) << "user weights iter grad memset error"; | |||||
| } | |||||
| if (has_bias_) { | |||||
| diff_bias_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_ + weight_h_size_); | |||||
| } | |||||
| if (memset_s(diff_bias_memory.get_data_handle(), prim_backward_desc_.diff_bias_desc().get_size(), 0, | |||||
| prim_backward_desc_.diff_bias_desc().get_size())) { | |||||
| MS_LOG(EXCEPTION) << "bias grad memset error"; | |||||
| } | |||||
| if (memset_s(diff_weights_memory.get_data_handle(), diff_weights_memory.get_desc().get_size(), 0, | |||||
| diff_weights_memory.get_desc().get_size())) { | |||||
| MS_LOG(EXCEPTION) << "weights grad memset error"; | |||||
| } | |||||
| if (memset_s(diff_weights_h_memory.get_data_handle(), diff_weights_h_memory.get_desc().get_size(), 0, | |||||
| diff_weights_h_memory.get_desc().get_size())) { | |||||
| MS_LOG(EXCEPTION) << "weights iter grad memset error"; | |||||
| } | |||||
| SetArgumentHandle(DNNL_ARG_SRC_LAYER, inputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_SRC_ITER, inputs[1]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_SRC_ITER_C, inputs[2]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_WEIGHTS_LAYER, weights_memory.get_data_handle()); | |||||
| SetArgumentHandle(DNNL_ARG_WEIGHTS_ITER, weights_h_memory.get_data_handle()); | |||||
| SetArgumentHandle(DNNL_ARG_BIAS, bias_memory.get_data_handle()); | |||||
| SetArgumentHandle(DNNL_ARG_DST_LAYER, inputs[4]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DST_ITER, inputs[5]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DST_ITER_C, inputs[6]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_WORKSPACE, inputs[10]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC_LAYER, outputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC_ITER, outputs[1]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC_ITER_C, outputs[2]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS_LAYER, diff_weights_memory.get_data_handle()); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS_ITER, diff_weights_h_memory.get_data_handle()); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_BIAS, diff_bias_memory.get_data_handle()); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_DST_LAYER, inputs[7]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_DST_ITER, inputs[8]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_DST_ITER_C, inputs[9]->addr); | |||||
| ExecutePrimitive(); | |||||
| Reorder(&diff_weights_memory, &user_diff_weights_memory); | |||||
| Reorder(&diff_weights_h_memory, &user_diff_weights_h_memory); | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,71 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class LSTMGradCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| LSTMGradCPUKernel() = default; | |||||
| ~LSTMGradCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| int weight_size_ = 0; | |||||
| int weight_h_size_ = 0; | |||||
| int input_size_; | |||||
| int hidden_size_; | |||||
| int num_layers_; | |||||
| int batch_size_; | |||||
| int seq_len_; | |||||
| int num_directions_; | |||||
| bool bidirectional_; | |||||
| bool has_bias_; | |||||
| dnnl::memory::dims weights_dims_; | |||||
| dnnl::memory::dims weights_h_dims_; | |||||
| dnnl::memory::dims bias_dims_; | |||||
| dnnl::lstm_backward::primitive_desc prim_backward_desc_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(LSTMGrad, | |||||
| KernelAttr() | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32), | |||||
| LSTMGradCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,71 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h" | |||||
| #include <algorithm> | |||||
| #include <utility> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "common/utils.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void MatMulCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||||
| if (src_shape.size() != 2 || weight_shape.size() != 2 || dst_shape.size() != 2) { | |||||
| MS_LOG(EXCEPTION) << "matmul invalid input size"; | |||||
| } | |||||
| bool trans_a = AnfAlgo::GetNodeAttr<bool>(kernel_node, TRANSPOSE_A); | |||||
| bool trans_b = AnfAlgo::GetNodeAttr<bool>(kernel_node, TRANSPOSE_B); | |||||
| if (trans_a) { | |||||
| trans_a_ = TRANSPOSE_YES; | |||||
| dim_m_ = static_cast<dnnl_dim_t>(src_shape[1]); | |||||
| dim_k_ = static_cast<dnnl_dim_t>(src_shape[0]); | |||||
| } else { | |||||
| dim_m_ = static_cast<dnnl_dim_t>(src_shape[0]); | |||||
| dim_k_ = static_cast<dnnl_dim_t>(src_shape[1]); | |||||
| } | |||||
| if (trans_b) { | |||||
| trans_b_ = TRANSPOSE_YES; | |||||
| } | |||||
| dim_n_ = static_cast<dnnl_dim_t>(dst_shape[1]); | |||||
| } | |||||
| bool MatMulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.size() < 2 || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "matmul error input output size!"; | |||||
| } | |||||
| dnnl_dim_t lda = dim_m_; | |||||
| if (trans_a_ == TRANSPOSE_NO) { | |||||
| lda = dim_k_; | |||||
| } | |||||
| dnnl_dim_t ldb = dim_k_; | |||||
| if (trans_b_ == TRANSPOSE_NO) { | |||||
| ldb = dim_n_; | |||||
| } | |||||
| auto input_a = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| auto input_b = reinterpret_cast<float *>(inputs[1]->addr); | |||||
| auto output = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| (void)dnnl_sgemm(trans_a_, trans_b_, dim_m_, dim_n_, dim_k_, 1.f, input_a, lda, input_b, ldb, 0.f, output, dim_n_); | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,50 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class MatMulCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| MatMulCPUKernel() = default; | |||||
| ~MatMulCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| char trans_a_{TRANSPOSE_NO}; | |||||
| char trans_b_{TRANSPOSE_NO}; | |||||
| dnnl_dim_t dim_m_{0}; | |||||
| dnnl_dim_t dim_n_{0}; | |||||
| dnnl_dim_t dim_k_{0}; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL( | |||||
| MatMul, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| MatMulCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,106 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include <algorithm> | |||||
| #include "common/utils.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, | |||||
| const std::vector<size_t> &src_shape, int kernel_size, int stride, | |||||
| std::vector<int> *padding_l, std::vector<int> *padding_r) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| if (src_shape.size() < 2) { | |||||
| MS_LOG(EXCEPTION) << "set pad only support src dim >= 2!"; | |||||
| } | |||||
| std::vector<int> weight_height; | |||||
| weight_height.emplace_back(src_shape[src_shape.size() - 2]); | |||||
| weight_height.emplace_back(src_shape[src_shape.size() - 1]); | |||||
| int rad = kernel_size / 2; | |||||
| int need_pad = kernel_size - 1; | |||||
| MS_LOG(INFO) << "pad mode " << pad_mode; | |||||
| if (pad_mode == PAD_MODE_LOWER_SAME || pad_mode == PAD_MODE_UPPER_SAME) { | |||||
| for (auto wh : weight_height) { | |||||
| int re = (wh - 1) % stride; | |||||
| int pad = std::max(rad - (re / 2), 0); | |||||
| padding_r->emplace_back(pad); | |||||
| pad = std::max(need_pad - pad - re, 0); | |||||
| padding_l->emplace_back(pad); | |||||
| } | |||||
| } else if (pad_mode == PAD_MODE_LOWER_VALID || pad_mode == PAD_MODE_UPPER_VALID) { | |||||
| MS_LOG(INFO) << "pad valid"; | |||||
| padding_l->emplace_back(0); | |||||
| padding_l->emplace_back(0); | |||||
| padding_r->emplace_back(0); | |||||
| padding_r->emplace_back(0); | |||||
| } else { | |||||
| std::vector<int> pad = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, PAD); | |||||
| if (pad.size() != 4) { | |||||
| MS_LOG(EXCEPTION) << "wrong pad size in max pooling " << pad.size(); | |||||
| } | |||||
| padding_l->emplace_back(pad[0]); | |||||
| padding_l->emplace_back(pad[1]); | |||||
| padding_r->emplace_back(pad[2]); | |||||
| padding_r->emplace_back(pad[3]); | |||||
| } | |||||
| } | |||||
| dnnl::memory::format_tag MKLCPUKernel::GetDefaultFormatTag(const dnnl::memory::dims &dims) const { | |||||
| dnnl::memory::format_tag mem_tag; | |||||
| auto dim_size = dims.size(); | |||||
| if (dim_size == 4) { | |||||
| mem_tag = dnnl::memory::format_tag::abcd; | |||||
| } else if (dim_size == 3) { | |||||
| mem_tag = dnnl::memory::format_tag::abc; | |||||
| } else if (dim_size == 2) { | |||||
| mem_tag = dnnl::memory::format_tag::ab; | |||||
| } else if (dim_size == 1) { | |||||
| mem_tag = dnnl::memory::format_tag::a; | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "kernel dims invalid " << dim_size; | |||||
| } | |||||
| return mem_tag; | |||||
| } | |||||
| dnnl::memory::desc MKLCPUKernel::GetDefaultMemDesc(const std::vector<size_t> &shape) { | |||||
| dnnl::memory::dims dims; | |||||
| dims.insert(dims.end(), shape.begin(), shape.end()); | |||||
| dnnl::memory::format_tag mem_tag = GetDefaultFormatTag(dims); | |||||
| dnnl::memory::desc mem_desc(dims, dnnl::memory::data_type::f32, mem_tag); | |||||
| return mem_desc; | |||||
| } | |||||
| void MKLCPUKernel::AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc) { | |||||
| arguments_[arg_key] = MKLKernelEngine::Get().CreateMemory(mem_desc, alloc); | |||||
| } | |||||
| void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) { | |||||
| auto arg_iter = arguments_.find(arg_key); | |||||
| if (arg_iter != arguments_.end()) { | |||||
| arg_iter->second.set_data_handle(ptr); | |||||
| } | |||||
| } | |||||
| void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); } | |||||
| void MKLCPUKernel::Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem) { | |||||
| MKLKernelEngine::Get().Reorder(src_mem, dst_mem); | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,52 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_ | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include "dnnl.hpp" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class MKLCPUKernel : public CPUKernel { | |||||
| public: | |||||
| MKLCPUKernel() = default; | |||||
| ~MKLCPUKernel() override = default; | |||||
| protected: | |||||
| void GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, const std::vector<size_t> &src_shape, | |||||
| int kernel_size, int stride, std::vector<int> *padding_l, std::vector<int> *padding_r); | |||||
| void AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc = false); | |||||
| void SetArgumentHandle(int arg_key, void *ptr); | |||||
| dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const; | |||||
| dnnl::memory::desc GetDefaultMemDesc(const std::vector<size_t> &shape); | |||||
| void ExecutePrimitive(); | |||||
| std::unordered_map<int, dnnl::memory> arguments_; | |||||
| std::shared_ptr<dnnl::primitive> primitive_{nullptr}; | |||||
| inline dnnl::memory::desc formatted_md(const dnnl::memory::dims &dimensions, dnnl::memory::format_tag layout) { | |||||
| return dnnl::memory::desc{{dimensions}, dnnl::memory::data_type::f32, layout}; | |||||
| } | |||||
| void Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem); | |||||
| }; | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,40 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "utils/log_adapter.h" | |||||
| #include "dnnl.hpp" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void MKLKernelEngine::Execute(const std::shared_ptr<dnnl::primitive> &primitive, | |||||
| const std::unordered_map<int, dnnl::memory> &arguments) { | |||||
| MS_EXCEPTION_IF_NULL(primitive); | |||||
| primitive->execute(stream_, arguments); | |||||
| (void)stream_.wait(); | |||||
| } | |||||
| dnnl::memory MKLKernelEngine::CreateMemory(const dnnl::memory::desc &mem_desc, bool alloc) { | |||||
| if (alloc) { | |||||
| return dnnl::memory(mem_desc, engine_); | |||||
| } else { | |||||
| return dnnl::memory(mem_desc, engine_, nullptr); | |||||
| } | |||||
| } | |||||
| void MKLKernelEngine::Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem) { | |||||
| dnnl::reorder(*src_mem, *dst_mem).execute(stream_, *src_mem, *dst_mem); | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,61 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| #include "common/utils.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void MulCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||||
| if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) { | |||||
| MS_LOG(EXCEPTION) << "mul only support same dim input or tensor * scalar " << src0_shape.size() << " vs " | |||||
| << src1_shape.size(); | |||||
| } | |||||
| if (src1_shape.size() < src0_shape.size()) { | |||||
| for (size_t i = src1_shape.size(); i < src0_shape.size(); ++i) { | |||||
| src1_shape.emplace_back(1); | |||||
| } | |||||
| } | |||||
| dnnl::memory::desc src0_mem_desc = GetDefaultMemDesc(src0_shape); | |||||
| dnnl::memory::desc src1_mem_desc = GetDefaultMemDesc(src1_shape); | |||||
| dnnl::memory::desc dst_mem_desc = GetDefaultMemDesc(dst_shape); | |||||
| dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_mul, src0_mem_desc, src1_mem_desc, dst_mem_desc); | |||||
| auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||||
| primitive_ = std::make_shared<dnnl::binary>(prim_desc); | |||||
| AddArgument(DNNL_ARG_SRC_0, src0_mem_desc); | |||||
| AddArgument(DNNL_ARG_SRC_1, src1_mem_desc); | |||||
| AddArgument(DNNL_ARG_DST, dst_mem_desc); | |||||
| } | |||||
| bool MulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.size() < 2 || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "mul error input output size!"; | |||||
| } | |||||
| SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||||
| ExecutePrimitive(); | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,42 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class MulCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| MulCPUKernel() = default; | |||||
| ~MulCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL( | |||||
| Mul, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| MulCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,69 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h" | |||||
| #include <string> | |||||
| #include <algorithm> | |||||
| #include "common/utils.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||||
| std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE); | |||||
| std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES); | |||||
| if (kernel_sizes.size() != 4 || strides.size() != 4) { | |||||
| MS_LOG(EXCEPTION) << "invalid kernel size " << kernel_sizes.size() << " or stride size " << strides.size(); | |||||
| } | |||||
| dnnl::memory::dims strides_dims{strides[2], strides[3]}; | |||||
| dnnl::memory::dims kernels_dims{kernel_sizes[2], kernel_sizes[3]}; | |||||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING); | |||||
| std::vector<int> int_padding_l; | |||||
| std::vector<int> int_padding_r; | |||||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_sizes[3], strides[3], &int_padding_l, &int_padding_r); | |||||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||||
| MS_LOG(EXCEPTION) << "pooling get padding failed"; | |||||
| } | |||||
| dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]}; | |||||
| dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]}; | |||||
| dnnl::pooling_forward::desc desc = | |||||
| dnnl::pooling_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::pooling_max, src_desc, dst_desc, | |||||
| strides_dims, kernels_dims, padding_l, padding_r); | |||||
| auto prim_desc = dnnl::pooling_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||||
| primitive_ = std::make_shared<dnnl::pooling_forward>(prim_desc); | |||||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||||
| AddArgument(DNNL_ARG_DST, dst_desc); | |||||
| AddArgument(DNNL_ARG_WORKSPACE, prim_desc.workspace_desc()); | |||||
| } | |||||
| bool PoolingCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.empty() || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||||
| } | |||||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||||
| ExecutePrimitive(); | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,41 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class PoolingCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| PoolingCPUKernel() = default; | |||||
| ~PoolingCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(MaxPool, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| PoolingCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,124 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h" | |||||
| #include <string> | |||||
| #include <utility> | |||||
| #include <algorithm> | |||||
| #include "common/utils.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| src_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| dst_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||||
| std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE); | |||||
| std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES); | |||||
| if (kernel_sizes.size() != 4 || strides.size() != 4 || src_shape_.size() != 4 || dst_shape_.size() != 4) { | |||||
| MS_LOG(EXCEPTION) << "pooling grad invalid input size"; | |||||
| } | |||||
| std::vector<int> padding_r; | |||||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING); | |||||
| kernel_size_ = kernel_sizes[3]; | |||||
| stride_ = strides[3]; | |||||
| GetPadding(kernel_node, pad_mode, src_shape_, kernel_size_, stride_, &padding_l_, &padding_r); | |||||
| } | |||||
| void PoolingGradCPUKernel::RowPoolingGrad(const float *input, float *output, float diff, | |||||
| const std::vector<std::pair<size_t, size_t>> &box, | |||||
| std::vector<std::pair<size_t, float>> *row_max_pair) { | |||||
| float max_value = 0; | |||||
| size_t max_index = box[1].second; | |||||
| size_t src_width = src_shape_[3]; | |||||
| size_t index_start; | |||||
| size_t index; | |||||
| for (size_t i = box[1].first; i < box[1].second; ++i) { | |||||
| if ((*row_max_pair)[i].first == 0) { | |||||
| index_start = box[0].first * src_width; | |||||
| for (size_t j = box[0].first; j < box[0].second; ++j) { | |||||
| index = index_start + i; | |||||
| if (input[index] > (*row_max_pair)[i].second || j == box[0].first) { | |||||
| (*row_max_pair)[i].second = input[index]; | |||||
| (*row_max_pair)[i].first = index; | |||||
| } | |||||
| index_start += src_width; | |||||
| } | |||||
| } | |||||
| if ((*row_max_pair)[i].second > max_value || max_index == box[1].second) { | |||||
| max_value = (*row_max_pair)[i].second; | |||||
| max_index = i; | |||||
| } | |||||
| } | |||||
| output[(*row_max_pair)[max_index].first] += diff; | |||||
| } | |||||
| void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *diff, float *output) { | |||||
| int src_width = SizeToInt(src_shape_[3]); | |||||
| int src_height = SizeToInt(src_shape_[2]); | |||||
| std::vector<std::pair<size_t, float>> row_max_pair(src_shape_[3]); | |||||
| std::vector<std::pair<size_t, size_t>> box(2); | |||||
| int h_start = -padding_l_[0]; | |||||
| size_t diff_index = 0; | |||||
| for (size_t h = 0; h < dst_shape_[2]; ++h) { | |||||
| box[0].first = IntToSize(std::max(h_start, 0)); | |||||
| box[0].second = IntToSize(std::min(h_start + kernel_size_, src_height)); | |||||
| for (size_t w = 0; w < src_shape_[3]; ++w) { | |||||
| row_max_pair[w].first = 0; | |||||
| row_max_pair[w].second = 0; | |||||
| } | |||||
| int w_start = -padding_l_[1]; | |||||
| for (size_t w = 0; w < dst_shape_[3]; ++w) { | |||||
| box[1].first = IntToSize(std::max(w_start, 0)); | |||||
| box[1].second = IntToSize(std::min(w_start + kernel_size_, src_width)); | |||||
| RowPoolingGrad(input, output, diff[diff_index], box, &row_max_pair); | |||||
| diff_index += 1; | |||||
| w_start += stride_; | |||||
| } | |||||
| h_start += stride_; | |||||
| } | |||||
| } | |||||
| bool PoolingGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.size() < 3 || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "pooling grad error input output size!"; | |||||
| } | |||||
| auto input = reinterpret_cast<float *>(inputs[0]->addr); | |||||
| auto diff = reinterpret_cast<float *>(inputs[2]->addr); | |||||
| auto output = reinterpret_cast<float *>(outputs[0]->addr); | |||||
| auto ret = memset_s(output, outputs[0]->size, 0, outputs[0]->size); | |||||
| if (ret != 0) { | |||||
| MS_LOG(EXCEPTION) << "pooling grad memset error"; | |||||
| } | |||||
| size_t src_wh = src_shape_[2] * src_shape_[3]; | |||||
| size_t dst_wh = dst_shape_[2] * dst_shape_[3]; | |||||
| for (size_t n = 0; n < src_shape_[0]; ++n) { | |||||
| for (size_t c = 0; c < src_shape_[1]; ++c) { | |||||
| ChannelPoolingGrad(input, diff, output); | |||||
| input = input + src_wh; | |||||
| output = output + src_wh; | |||||
| diff = diff + dst_wh; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,56 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include <utility> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class PoolingGradCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| PoolingGradCPUKernel() = default; | |||||
| ~PoolingGradCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| private: | |||||
| void RowPoolingGrad(const float *input, float *output, float diff, const std::vector<std::pair<size_t, size_t>> &box, | |||||
| std::vector<std::pair<size_t, float>> *row_max_pair); | |||||
| void ChannelPoolingGrad(const float *input, const float *diff, float *output); | |||||
| int stride_{0}, kernel_size_{0}; | |||||
| std::vector<int> padding_l_; | |||||
| std::vector<size_t> src_shape_; | |||||
| std::vector<size_t> dst_shape_; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(MaxPoolGrad, | |||||
| KernelAttr() | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddInputAttr(kNumberTypeFloat32) | |||||
| .AddOutputAttr(kNumberTypeFloat32), | |||||
| PoolingGradCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,52 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| #include "common/utils.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void ReluCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| if (src_shape.size() != 4 && src_shape.size() != 2) { | |||||
| MS_LOG(EXCEPTION) << "relu kernel dims invalid " << src_shape.size(); | |||||
| } | |||||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||||
| dnnl::eltwise_forward::desc desc = | |||||
| dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0); | |||||
| auto prim_desc = dnnl::eltwise_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||||
| primitive_ = std::make_shared<dnnl::eltwise_forward>(prim_desc); | |||||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||||
| AddArgument(DNNL_ARG_DST, src_desc); | |||||
| } | |||||
| bool ReluCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.empty() || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||||
| } | |||||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||||
| ExecutePrimitive(); | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,40 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class ReluCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| ReluCPUKernel() = default; | |||||
| ~ReluCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), ReluCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,69 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| #include "common/utils.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void ReluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| if (src_shape.size() != 4 && src_shape.size() != 2) { | |||||
| MS_LOG(EXCEPTION) << "relu grad kernel dims invalid " << src_shape.size(); | |||||
| } | |||||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||||
| dnnl::eltwise_forward::desc forward_desc = | |||||
| dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0); | |||||
| auto forward_prim_desc = dnnl::eltwise_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine()); | |||||
| dnnl::eltwise_backward::desc backward_desc = | |||||
| dnnl::eltwise_backward::desc(dnnl::algorithm::eltwise_relu, src_desc, src_desc, 0.0, 0.0); | |||||
| auto backward_prim_desc = | |||||
| dnnl::eltwise_backward::primitive_desc(backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc); | |||||
| primitive_ = std::make_shared<dnnl::eltwise_backward>(backward_prim_desc); | |||||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_SRC, src_desc); | |||||
| AddArgument(DNNL_ARG_DIFF_DST, src_desc); | |||||
| } | |||||
| bool ReluGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.size() < 2 || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "relu grad error input output size!"; | |||||
| } | |||||
| if (inputs[0]->size != outputs[0]->size) { | |||||
| MS_LOG(EXCEPTION) << "relu grad error input output data size!"; | |||||
| } | |||||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC, inputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); | |||||
| ExecutePrimitive(); | |||||
| size_t mem_bits = outputs[0]->size; | |||||
| auto ret = memcpy_s(outputs[0]->addr, mem_bits, inputs[0]->addr, mem_bits); | |||||
| if (ret != 0) { | |||||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno " << ret; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,43 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_ | |||||
| #define MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| class ReluGradCPUKernel : public MKLCPUKernel { | |||||
| public: | |||||
| ReluGradCPUKernel() = default; | |||||
| ~ReluGradCPUKernel() override = default; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL( | |||||
| ReluGrad, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| ReluGradCPUKernel); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_ | |||||
| @@ -0,0 +1,54 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||||
| #include "runtime/device/cpu/cpu_device_address.h" | |||||
| #include "common/utils.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| void SoftmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||||
| std::vector<int> axis_list = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, AXIS); | |||||
| if (axis_list.size() != 1) { | |||||
| MS_LOG(EXCEPTION) << "cpu softmax only support input axis size 1"; | |||||
| } | |||||
| int axis = axis_list[0]; | |||||
| if (axis == -1 || axis >= SizeToInt(src_shape.size())) { | |||||
| axis = SizeToInt(src_shape.size()) - 1; | |||||
| } | |||||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||||
| dnnl::softmax_forward::desc desc = dnnl::softmax_forward::desc(dnnl::prop_kind::forward_training, src_desc, axis); | |||||
| auto prim_desc = dnnl::softmax_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||||
| primitive_ = std::make_shared<dnnl::softmax_forward>(prim_desc); | |||||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||||
| AddArgument(DNNL_ARG_DST, src_desc); | |||||
| } | |||||
| bool SoftmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||||
| const std::vector<kernel::AddressPtr> &outputs) { | |||||
| if (inputs.empty() || outputs.empty()) { | |||||
| MS_LOG(EXCEPTION) << "softmax error input output size!"; | |||||
| } | |||||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||||
| ExecutePrimitive(); | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||