| @@ -17,6 +17,10 @@ else() | |||
| set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2") | |||
| endif() | |||
| if (ENABLE_PYTHON) | |||
| add_compile_definitions(ENABLE_PYTHON) | |||
| endif() | |||
| set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp") | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC") | |||
| @@ -70,6 +70,22 @@ Alexey Shevlyakov, avakh, baihuawei, BowenK, buxue, caifubi, caojian05, Cathy Wo | |||
| Contributions of any kind are welcome! | |||
| # Release 0.3.1-alpha | |||
| ## Major Features and Improvements | |||
| ### Ascend 910 Training and Inference Framework | |||
| * Frontend and User Interface | |||
| * Independent model init interface. | |||
| * Data processing, augmentation, and save format | |||
| * Support sample padding for minddataset. | |||
| ## Bugfixes | |||
| * Python API | |||
| * Fix bugs in the lars optimizer([!1894](https://gitee.com/mindspore/mindspore/pulls/1894)) | |||
| * Data processing | |||
| * Fix accuracy problem of RandomCropDecodeResize ([!2340](https://gitee.com/mindspore/mindspore/pulls/2340)) | |||
| # Release 0.3.0-alpha | |||
| ## Major Features and Improvements | |||
| @@ -24,8 +24,8 @@ usage() | |||
| { | |||
| echo "Usage:" | |||
| echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" | |||
| echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]" | |||
| echo " [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]" | |||
| echo "" | |||
| echo "Options:" | |||
| echo " -d Debug mode" | |||
| @@ -48,6 +48,7 @@ usage() | |||
| echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" | |||
| echo " -Q Enable dump memory, default off" | |||
| echo " -D Enable dumping of function graph ir, default on" | |||
| echo " -S Enable async data dump, default off" | |||
| echo " -z Compile dataset & mindrecord, default on" | |||
| echo " -M Enable MPI and NCCL for GPU training, gpu default on" | |||
| echo " -V Specify the minimum required cuda version, default CUDA 10.1" | |||
| @@ -56,6 +57,7 @@ usage() | |||
| echo " -s Enable serving module, default off" | |||
| echo " -B Enable debugger, default off" | |||
| echo " -E Enable IBVERBS for parameter server, default off" | |||
| echo " -l Compile with python dependency, default on" | |||
| } | |||
| # check value of input is 'on' or 'off' | |||
| @@ -87,6 +89,7 @@ checkopts() | |||
| ENABLE_TIMELINE="off" | |||
| ENABLE_DUMP2PROTO="on" | |||
| ENABLE_DUMPE2E="off" | |||
| ENABLE_DATA_DUMP="off" | |||
| ENABLE_DUMP_IR="on" | |||
| COMPILE_MINDDATA="on" | |||
| ENABLE_MPI="off" | |||
| @@ -98,9 +101,10 @@ checkopts() | |||
| ENABLE_SERVING="off" | |||
| ENABLE_DEBUGGER="off" | |||
| ENABLE_IBVERBS="off" | |||
| ENABLE_PYTHON="on" | |||
| # Process the options | |||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt | |||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt | |||
| do | |||
| OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') | |||
| case "${opt}" in | |||
| @@ -151,6 +155,10 @@ checkopts() | |||
| check_on_off $OPTARG p | |||
| ENABLE_PROFILE="$OPTARG" | |||
| ;; | |||
| l) | |||
| check_on_off $OPTARG l | |||
| ENABLE_PYTHON="$OPTARG" | |||
| ;; | |||
| i) | |||
| INC_BUILD="on" | |||
| ;; | |||
| @@ -212,6 +220,11 @@ checkopts() | |||
| ENABLE_DUMPE2E="$OPTARG" | |||
| echo "enable dump end to end" | |||
| ;; | |||
| S) | |||
| check_on_off $OPTARG S | |||
| ENABLE_DATA_DUMP="$OPTARG" | |||
| echo "enable data dump" | |||
| ;; | |||
| D) | |||
| check_on_off $OPTARG D | |||
| ENABLE_DUMP_IR="$OPTARG" | |||
| @@ -315,7 +328,11 @@ build_mindspore() | |||
| if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" | |||
| fi | |||
| if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON" | |||
| fi | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" | |||
| if [[ "X$ENABLE_MPI" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON" | |||
| fi | |||
| @@ -9,11 +9,11 @@ else() | |||
| LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N} | |||
| URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz | |||
| MD5 0c2662a2b0bc80b0eb56495205247c8f | |||
| CONFIGURE_COMMAND ./icu4c/source/runConfigureICU Linux --enable-rpath --disable-tests --disable-samples --disable-icuio --disable-extras ICU_DATA_FILTER_FILE=${CMAKE_SOURCE_DIR}/third_party/icu4c/filter.json | |||
| CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/scripts/build_icu4c.sh | |||
| ) | |||
| include_directories(${icu4c_INC}) | |||
| add_library(mindspore::icuuc ALIAS icu4c::${LIB_ICU_COMMON}) | |||
| add_library(mindspore::icudata ALIAS icu4c::${LIB_ICU_DATA}) | |||
| add_library(mindspore::icui18n ALIAS icu4c::${LIB_ICU_I18N}) | |||
| add_definitions(-D ENABLE_ICU4C) | |||
| endif() | |||
| endif() | |||
| @@ -15,7 +15,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake) | |||
| if (ENABLE_DEBUGGER) | |||
| if (ENABLE_DEBUGGER OR ENABLE_SERVING) | |||
| # build dependencies of gRPC | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake) | |||
| @@ -30,7 +30,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake) | |||
| if(USE_GLOG) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake) | |||
| endif() | |||
| if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows") | |||
| if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows" AND NOT ENABLE_GE) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake) | |||
| endif() | |||
| @@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF) | |||
| option(ENABLE_AKG "enable akg" OFF) | |||
| option(ENABLE_DEBUGGER "enable debugger" OFF) | |||
| option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF) | |||
| option(ENABLE_PYTHON "Enable python" ON) | |||
| if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") | |||
| if (WIN32) | |||
| @@ -115,6 +116,10 @@ if(ENABLE_DUMP_E2E) | |||
| add_compile_definitions(ENABLE_DUMP_E2E) | |||
| endif() | |||
| if(ENABLE_DATA_DUMP) | |||
| add_compile_definitions(ENABLE_DATA_DUMP) | |||
| endif() | |||
| if(ENABLE_DEBUGGER) | |||
| add_compile_definitions(ENABLE_DEBUGGER) | |||
| endif() | |||
| @@ -213,7 +213,6 @@ install( | |||
| ${CMAKE_SOURCE_DIR}/mindspore/parallel | |||
| ${CMAKE_SOURCE_DIR}/mindspore/mindrecord | |||
| ${CMAKE_SOURCE_DIR}/mindspore/train | |||
| ${CMAKE_SOURCE_DIR}/mindspore/model_zoo | |||
| ${CMAKE_SOURCE_DIR}/mindspore/common | |||
| ${CMAKE_SOURCE_DIR}/mindspore/ops | |||
| ${CMAKE_SOURCE_DIR}/mindspore/communication | |||
| @@ -261,3 +260,17 @@ if (EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset) | |||
| COMPONENT mindspore | |||
| ) | |||
| endif () | |||
| if (ENABLE_SERVING) | |||
| install( | |||
| TARGETS ms_serving | |||
| DESTINATION ${INSTALL_BASE_DIR} | |||
| COMPONENT mindspore | |||
| ) | |||
| install( | |||
| TARGETS inference | |||
| DESTINATION ${INSTALL_LIB_DIR} | |||
| COMPONENT mindspore | |||
| ) | |||
| endif () | |||
| @@ -0,0 +1,15 @@ | |||
| { | |||
| "DumpSettings": { | |||
| "net_name": "ResNet50", | |||
| "mode": 1, | |||
| "iteration": 0, | |||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | |||
| }, | |||
| "DumpSettingsSpec": { | |||
| "net_name": "net name eg:ResNet50", | |||
| "mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||
| "iteration": "specified iteration ", | |||
| "kernels": "op's full scope name which need to be dump" | |||
| } | |||
| } | |||
| @@ -1 +1 @@ | |||
| Subproject commit 4084909d62c159da6ba316f61ad3d02a4857b34b | |||
| Subproject commit 31aa96ef41067a0ecdc4113ef245f8ede48f3457 | |||
| @@ -20,7 +20,7 @@ | |||
| #include <utility> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "ir/dtype/type_id.h" | |||
| #include "mindspore/core/ir/dtype/type_id.h" | |||
| namespace mindspore { | |||
| #define MS_API __attribute__((visibility("default"))) | |||
| @@ -334,7 +334,7 @@ class Parser: | |||
| def __init__(self, fn: (types.FunctionType, types.MethodType), parse_method=None) -> None: | |||
| self.fn = fn | |||
| self.parse_method = parse_method | |||
| _, self.line_offset = inspect.getsourcelines(self.fn) | |||
| self.line_offset = 0 | |||
| self.filename: str = inspect.getfile(self.fn) | |||
| # Used to resolve the function's globals Namespace. | |||
| @@ -350,7 +350,8 @@ class Parser: | |||
| logger.debug("fn = %r", self.fn) | |||
| tree = None | |||
| if isinstance(self.fn, (types.FunctionType, types.MethodType)): | |||
| original_src = inspect.getsource(self.fn) | |||
| lines, self.line_offset = inspect.getsourcelines(self.fn) | |||
| original_src = ''.join(lines) | |||
| hexstr = hashlib.sha256(original_src.encode()).hexdigest() | |||
| tree = Parser.ast_cache.get(hexstr) | |||
| if not tree: | |||
| @@ -108,7 +108,8 @@ def enumerate_(x, start=0): | |||
| """Enumerate list or tuple.""" | |||
| x_type = F.typeof(x) | |||
| ret = () | |||
| if check_is_tuple_or_list(x_type, "enumerate"): | |||
| op_name = "enumerate" | |||
| if check_is_tuple_or_list(x_type, op_name, "first input") and check_is_const_int(start, op_name, "start"): | |||
| ret = zip(range(start, start + len(x)), x) | |||
| return ret | |||
| @@ -123,11 +124,22 @@ def while_cond(x): | |||
| @constexpr | |||
| def check_is_tuple_or_list(x, op_name): | |||
| def check_is_tuple_or_list(x, op_name, arg_name): | |||
| """check whether x is list or tuple.""" | |||
| if isinstance(x, (mstype.list_type, mstype.tuple_type)): | |||
| return True | |||
| raise TypeError(f"For '{op_name}', the input parameter should be tuple or list, but got {x}.") | |||
| raise TypeError(f"For '{op_name}', the '{arg_name}' should be tuple or list, but got {x}.") | |||
| @constexpr | |||
| def check_is_const_int(x, op_name, arg_name): | |||
| """check whether x is const int.""" | |||
| if x is None: | |||
| raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got not const.") | |||
| if not isinstance(x, int): | |||
| raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got {x}.") | |||
| return True | |||
| @constexpr | |||
| def check_is_tensor_bool_cond(shp): | |||
| @@ -1,4 +1,5 @@ | |||
| ## common setting | |||
| include_directories(${CMAKE_SOURCE_DIR}/mindspore/core) | |||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}) | |||
| include_directories(${CMAKE_BINARY_DIR}) | |||
| link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine) | |||
| @@ -35,20 +36,20 @@ if(ENABLE_GPU) | |||
| include_directories(${CUDNN_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS}) | |||
| file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "device/gpu/*.cc" | |||
| "device/gpu/*.cu" | |||
| "kernel/gpu/*.cu" | |||
| "kernel/akg/gpu/*.cc" | |||
| "kernel/akg/akg_kernel_build.cc" | |||
| "kernel/akg/akg_kernel_attrs_process.cc" | |||
| "runtime/device/gpu/*.cc" | |||
| "runtime/device/gpu/*.cu" | |||
| "backend/kernel_compiler/gpu/*.cu" | |||
| "backend/kernel_compiler/akg/gpu/*.cc" | |||
| "backend/kernel_compiler/akg/akg_kernel_build.cc" | |||
| "backend/kernel_compiler/akg/akg_kernel_attrs_process.cc" | |||
| ) | |||
| list(APPEND CUDA_NVCC_FLAGS -arch=sm_53) | |||
| list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc") | |||
| list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/mpi/mpi_initializer.cc" | |||
| "device/gpu/distribution/collective_wrapper.cc" | |||
| "device/gpu/distribution/mpi_wrapper.cc" | |||
| "device/gpu/distribution/nccl_wrapper.cc" | |||
| list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/blocking_queue.cc" "runtime/device/gpu/gpu_buffer_mgr.cc") | |||
| list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/mpi/mpi_initializer.cc" | |||
| "runtime/device/gpu/distribution/collective_wrapper.cc" | |||
| "runtime/device/gpu/distribution/mpi_wrapper.cc" | |||
| "runtime/device/gpu/distribution/nccl_wrapper.cc" | |||
| ) | |||
| set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) | |||
| @@ -56,6 +57,7 @@ if(ENABLE_GPU) | |||
| set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) | |||
| cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST}) | |||
| set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS}) | |||
| add_compile_definitions(ENABLE_GPU) | |||
| endif () | |||
| ## make flatuffer files | |||
| @@ -101,16 +103,20 @@ if (ENABLE_DUMP_PROTO) | |||
| endif () | |||
| if (ENABLE_D) | |||
| include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu") | |||
| include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu") | |||
| include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir") | |||
| file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/aicpu/proto/*.proto") | |||
| file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto") | |||
| ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN}) | |||
| file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto") | |||
| ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER}) | |||
| file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto") | |||
| ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP}) | |||
| list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS}) | |||
| list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS}) | |||
| list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS}) | |||
| add_compile_definitions(ENABLE_D) | |||
| endif () | |||
| @@ -121,18 +127,36 @@ if (MINDSPORE_PROTO_LIST) | |||
| endif() | |||
| ## make sub objects | |||
| set(SUB_COMP | |||
| transform pre_activate parallel pipeline device kernel common debug gvar ir onnx operator optimizer predict | |||
| pybind_api pynative session utils vm | |||
| set(SUB_COMP | |||
| transform/graph_ir | |||
| transform/onnx | |||
| backend/optimizer | |||
| backend/kernel_compiler | |||
| backend/session | |||
| runtime/device | |||
| frontend/optimizer | |||
| frontend/parallel | |||
| frontend/operator | |||
| pipeline/jit | |||
| pipeline/pynative | |||
| common debug gvar predict pybind_api utils vm | |||
| ) | |||
| foreach (_comp ${SUB_COMP}) | |||
| add_subdirectory(${_comp}) | |||
| if (TARGET _mindspore_${_comp}_obj) | |||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${_comp}_obj>) | |||
| add_dependencies(_mindspore_${_comp}_obj proto_input flat_input) | |||
| string(REPLACE "/" "_" sub ${_comp}) | |||
| if (TARGET _mindspore_${sub}_obj) | |||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>) | |||
| add_dependencies(_mindspore_${sub}_obj proto_input flat_input) | |||
| endif () | |||
| endforeach () | |||
| add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base) | |||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_base_obj>) | |||
| add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/abstract abstract) | |||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_abstract_obj>) | |||
| add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir) | |||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>) | |||
| add_dependencies(_mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input flat_input) | |||
| set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) | |||
| add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) | |||
| @@ -204,8 +228,8 @@ endif() | |||
| # set c_expression building | |||
| set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) | |||
| set_property(SOURCE "pipeline/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE) | |||
| pybind11_add_module(_c_expression "pipeline/init.cc") | |||
| set_property(SOURCE "pipeline/jit/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE) | |||
| pybind11_add_module(_c_expression "pipeline/jit/init.cc") | |||
| MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}") | |||
| if (CMAKE_SYSTEM_NAME MATCHES "Linux") | |||
| @@ -231,9 +255,11 @@ else () | |||
| target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive) | |||
| target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module) | |||
| target_link_libraries(_c_expression PRIVATE mindspore_gvar) | |||
| target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a) | |||
| if (${ENABLE_IBVERBS} STREQUAL "ON") | |||
| target_link_libraries(_c_expression PRIVATE ibverbs rdmacm) | |||
| if (NOT ENABLE_GE) | |||
| target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a) | |||
| if (${ENABLE_IBVERBS} STREQUAL "ON") | |||
| target_link_libraries(_c_expression PRIVATE ibverbs rdmacm) | |||
| endif() | |||
| endif() | |||
| endif () | |||
| @@ -260,8 +286,8 @@ if (ENABLE_CPU) | |||
| endif () | |||
| if (ENABLE_MINDDATA) | |||
| add_subdirectory(mindrecord) | |||
| add_subdirectory(dataset) | |||
| add_subdirectory(minddata/mindrecord) | |||
| add_subdirectory(minddata/dataset) | |||
| endif () | |||
| # build inference | |||
| @@ -270,7 +296,7 @@ set(LOAD_ONNX_SRC | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc | |||
| ) | |||
| add_library(inference SHARED | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/session/session.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/backend/session/session.cc | |||
| ${LOAD_ONNX_SRC} | |||
| ) | |||
| target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY} | |||
| @@ -0,0 +1,66 @@ | |||
| file(GLOB_RECURSE KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "kernel_build_info.cc" | |||
| "kash/*.cc" | |||
| "common_utils.cc" | |||
| "oplib/*.cc" | |||
| ) | |||
| if (ENABLE_D) | |||
| file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "kernel_query.cc" | |||
| "kernel_fusion.cc" | |||
| "akg/ascend/*.cc" | |||
| "akg/akg_kernel_build.cc" | |||
| "akg/akg_kernel_attrs_process.cc" | |||
| "akg/akg_kernel_metadata.cc" | |||
| "tbe/*.cc" | |||
| "aicpu/*.cc" | |||
| "rts/*.cc" | |||
| "hccl/*.cc" | |||
| ) | |||
| add_compile_definitions(ENABLE_D) | |||
| endif () | |||
| if (ENABLE_CPU) | |||
| file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "cpu/*.cc" | |||
| ) | |||
| list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/push_kernel.cc" | |||
| "cpu/ps/pull_kernel.cc" | |||
| "cpu/ps/embedding_look_up_ps_kernel.cc" | |||
| "cpu/ps/embedding_look_up_proxy_kernel.cc" | |||
| "cpu/ps/apply_momentum_ps_kernel.cc" | |||
| "cpu/ps/sparse_apply_adam_ps_kernel.cc" | |||
| "cpu/ps/sparse_apply_ftrl_ps_kernel.cc") | |||
| if (NOT ENABLE_MPI) | |||
| list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc") | |||
| list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc") | |||
| list(REMOVE_ITEM CPU_SRC_LIST "cpu/embedding_look_up_comm_grad_cpu_kernel.cc") | |||
| endif () | |||
| endif () | |||
| if (ENABLE_GPU) | |||
| file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "gpu/*.cu" | |||
| "akg/gpu/*.cc" | |||
| "akg/akg_kernel_build.cc" | |||
| "akg/akg_kernel_attrs_process.cc" | |||
| ) | |||
| file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc") | |||
| list(REMOVE_ITEM GPU_SRC_LIST "gpu/nccl/nccl_gpu_kernel.cc") | |||
| if (ENABLE_MPI) | |||
| include(ExternalProject) | |||
| file(GLOB_RECURSE GPU_NCCL_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/nccl/*.cc") | |||
| list(APPEND GPU_SRC_LIST ${GPU_NCCL_LIST}) | |||
| endif () | |||
| # add_library(_mindspore_kernel_cuda_obj OBJECT ${CUDA_SRC_LIST}) | |||
| endif() | |||
| set_property(SOURCE ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST} | |||
| PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_KERNEL) | |||
| add_library(_mindspore_backend_kernel_compiler_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST}) | |||
| @@ -0,0 +1,312 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h" | |||
| #include <google/protobuf/text_format.h> | |||
| #include <fstream> | |||
| #include <utility> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <algorithm> | |||
| #include <map> | |||
| #include "runtime/device/kernel_runtime.h" | |||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h" | |||
| #include "backend/kernel_compiler/akg/akg_kernel_build.h" | |||
| #include "proto/tensor.pb.h" | |||
| #include "proto/tensor_shape.pb.h" | |||
| #include "proto/attr.pb.h" | |||
| #include "proto/node_def.pb.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "common/utils.h" | |||
| #include "backend/kernel_compiler/aicpu/aicpu_util.h" | |||
| #include "backend/session/kernel_graph.h" | |||
| #include "backend/kernel_compiler/common_utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| using FNodeAttrHandle = std::function<void(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto)>; | |||
| bool SetIOIputSize(const std::shared_ptr<AnfNode> &anf_node, const size_t &input_num, | |||
| std::vector<size_t> *input_size_list) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(input_size_list); | |||
| for (size_t i = 0; i < input_num; i++) { | |||
| std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i); | |||
| if (AnfAlgo::GetInputDeviceDataType(anf_node, i) == kObjectTypeString) { | |||
| if (!anf_node->isa<CNode>()) { | |||
| MS_LOG(EXCEPTION) << "anf_node is not CNode."; | |||
| } | |||
| auto cnode = anf_node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (cnode->inputs().size() < (i + 1)) { | |||
| MS_LOG(ERROR) << "cnode inputs size " << cnode->inputs().size() << " is smaller than " << i + 1; | |||
| return false; | |||
| } | |||
| auto input_node = cnode->inputs()[i + 1]; | |||
| MS_EXCEPTION_IF_NULL(input_node); | |||
| if (input_node->isa<ValueNode>()) { | |||
| auto value_ptr = GetValueNode(input_node); | |||
| auto value = GetValue<std::string>(value_ptr); | |||
| input_size_list->push_back(value.size()); | |||
| } | |||
| } else { | |||
| auto type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i)); | |||
| MS_EXCEPTION_IF_NULL(type_ptr); | |||
| int64_t size_i = 1; | |||
| for (size_t j = 0; j < shape_i.size(); j++) { | |||
| size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j])); | |||
| } | |||
| size_t type_byte = GetTypeByte(type_ptr); | |||
| if (type_byte == 0) { | |||
| return false; | |||
| } | |||
| size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte)); | |||
| input_size_list->push_back(LongToSize(size_i)); | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| bool SetIOSize(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod_ptr); | |||
| std::vector<size_t> input_size_list; | |||
| std::vector<size_t> output_size_list; | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); | |||
| if (!SetIOIputSize(anf_node, input_num, &input_size_list)) { | |||
| return false; | |||
| } | |||
| kernel_mod_ptr->SetInputSizeList(input_size_list); | |||
| for (size_t i = 0; i < output_num; i++) { | |||
| std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i); | |||
| TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i)); | |||
| MS_EXCEPTION_IF_NULL(type_ptr); | |||
| int64_t size_i = 1; | |||
| for (size_t j = 0; j < shape_i.size(); j++) { | |||
| size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j])); | |||
| } | |||
| size_t type_byte = GetTypeByte(type_ptr); | |||
| if (type_byte == 0) { | |||
| return false; | |||
| } | |||
| size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte)); | |||
| output_size_list.push_back(LongToSize(size_i)); | |||
| } | |||
| kernel_mod_ptr->SetOutputSizeList(output_size_list); | |||
| return true; | |||
| } | |||
| void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value, | |||
| ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) { | |||
| MS_EXCEPTION_IF_NULL(node_attr); | |||
| MS_EXCEPTION_IF_NULL(value); | |||
| if (type == "int") { | |||
| auto attr_value = GetValue<int>(value); | |||
| (*node_attr)[attr_name].set_i(attr_value); | |||
| } else if (type == "str") { | |||
| auto attr_value = GetValue<std::string>(value); | |||
| (*node_attr)[attr_name].set_s(attr_value); | |||
| } else if (type == "bool") { | |||
| auto attr_value = GetValue<bool>(value); | |||
| (*node_attr)[attr_name].set_b(attr_value); | |||
| } else if (type == "float") { | |||
| auto attr_value = GetValue<float>(value); | |||
| (*node_attr)[attr_name].set_f(attr_value); | |||
| } else if (type == "listInt") { | |||
| std::vector<int> attr_value; | |||
| auto value_type = value->type(); | |||
| MS_EXCEPTION_IF_NULL(value_type); | |||
| auto value_type_str = value_type->ToString(); | |||
| if (value_type_str == "Int32") { | |||
| int data = GetValue<int>(value); | |||
| attr_value.push_back(data); | |||
| } else { | |||
| attr_value = GetValue<std::vector<int>>(value); | |||
| } | |||
| mindspore::AttrValue input_shape_attr; | |||
| mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array(); | |||
| MS_EXCEPTION_IF_NULL(input_shape_attr_list); | |||
| for (const auto shape : attr_value) { | |||
| input_shape_attr_list->add_i(shape); | |||
| } | |||
| (*node_attr)[attr_name] = input_shape_attr; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "type: " << type << "not support"; | |||
| } | |||
| } | |||
| void SetNodeAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(proto); | |||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||
| if (op_name == kInitDataSetQueue) { | |||
| op_name = kInitData; | |||
| } | |||
| if (op_name == kPrint) { | |||
| return; | |||
| } | |||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU); | |||
| MS_EXCEPTION_IF_NULL(op_info_ptr); | |||
| auto attrs_ptr = op_info_ptr->attrs_ptr(); | |||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); | |||
| for (const auto &attr_ptr : attrs_ptr) { | |||
| MS_EXCEPTION_IF_NULL(attr_ptr); | |||
| std::string attr_name = attr_ptr->name(); | |||
| auto value = primitive->GetAttr(attr_name); | |||
| if (value != nullptr) { | |||
| if (attr_name == kQueueName || attr_name == kSharedName) { | |||
| attr_name = kChannelName; | |||
| } else if (attr_name == kSeed0) { | |||
| attr_name = kSeed; | |||
| } else if (attr_name == kSeed1) { | |||
| attr_name = kSeed2; | |||
| } | |||
| std::string type = attr_ptr->type(); | |||
| ParseAttrValue(type, attr_name, value, node_attr); | |||
| } | |||
| } | |||
| MS_LOG(INFO) << "Set node attr end!"; | |||
| } | |||
| void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||
| MS_EXCEPTION_IF_NULL(proto); | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); | |||
| if (input_num == 0) { | |||
| MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have input."; | |||
| return; | |||
| } | |||
| for (size_t input_index = 0; input_index < input_num; input_index++) { | |||
| ::mindspore::Tensor *node_inputs = proto->add_inputs(); | |||
| MS_EXCEPTION_IF_NULL(node_inputs); | |||
| TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index); | |||
| std::vector<size_t> input_shape; | |||
| int32_t input_data_type; | |||
| if (input_type == kObjectTypeString) { | |||
| auto cnode = anf_node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| auto input_node = cnode->inputs()[input_index + 1]; | |||
| auto value_ptr = GetValueNode(input_node); | |||
| auto value = GetValue<std::string>(value_ptr); | |||
| input_shape.push_back(1); | |||
| input_shape.push_back(value.size()); | |||
| input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown); | |||
| } else { | |||
| input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); | |||
| input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type); | |||
| } | |||
| mindspore::TensorShape *tensorShape = node_inputs->mutable_tensor_shape(); | |||
| for (auto item : input_shape) { | |||
| mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); | |||
| dim->set_size((::google::protobuf::int64)item); | |||
| } | |||
| node_inputs->set_tensor_type((mindspore::DataType)input_data_type); | |||
| node_inputs->set_mem_device("HBM"); | |||
| } | |||
| } | |||
| void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||
| MS_EXCEPTION_IF_NULL(proto); | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); | |||
| if (output_num == 0) { | |||
| MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. "; | |||
| return; | |||
| } | |||
| for (size_t output_index = 0; output_index < output_num; output_index++) { | |||
| ::mindspore::Tensor *node_outputs = proto->add_outputs(); | |||
| MS_EXCEPTION_IF_NULL(node_outputs); | |||
| std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); | |||
| mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape(); | |||
| MS_EXCEPTION_IF_NULL(tensorShape); | |||
| for (auto item : output_shape) { | |||
| mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); | |||
| MS_EXCEPTION_IF_NULL(dim); | |||
| dim->set_size((::google::protobuf::int64)item); | |||
| } | |||
| TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); | |||
| int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); | |||
| node_outputs->set_tensor_type((mindspore::DataType)output_data_type); | |||
| node_outputs->set_mem_device("HBM"); | |||
| } | |||
| } | |||
| void SetNodedefProto(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(proto); | |||
| MS_LOG(INFO) << "SetNodedefProto entry"; | |||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||
| if (op_name == kInitDataSetQueue) { | |||
| op_name = kInitData; | |||
| } | |||
| // set op name | |||
| proto->set_op(op_name); | |||
| // set inputs tensor | |||
| SetNodeInputs(anf_node, proto); | |||
| // set outputs tensor | |||
| SetNodeOutputs(anf_node, proto); | |||
| // set node attr | |||
| SetNodeAttr(anf_node, proto); | |||
| MS_LOG(INFO) << "SetNodedefProto end!"; | |||
| } | |||
| bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node, | |||
| const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) { | |||
| MS_EXCEPTION_IF_NULL(kernel_mod_ptr); | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_LOG(INFO) << "CreateNodeDefBytes entry"; | |||
| mindspore::NodeDef proto; | |||
| SetNodedefProto(anf_node, &proto); | |||
| std::string nodeDefStr; | |||
| if (!proto.SerializeToString(&nodeDefStr)) { | |||
| MS_LOG(ERROR) << "Serialize nodeDef to string failed."; | |||
| return false; | |||
| } | |||
| kernel_mod_ptr->SetNodeDef(nodeDefStr); | |||
| MS_LOG(INFO) << "CreateNodeDefBytes end!"; | |||
| return true; | |||
| } | |||
| KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||
| if (op_name == kInitDataSetQueue) { | |||
| op_name = kInitData; | |||
| } | |||
| auto kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>(); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod_ptr); | |||
| kernel_mod_ptr->SetAnfNode(anf_node); | |||
| kernel_mod_ptr->SetNodeName(op_name); | |||
| if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) { | |||
| MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!"; | |||
| } | |||
| if (!SetIOSize(anf_node, kernel_mod_ptr)) { | |||
| MS_LOG(EXCEPTION) << "Set input output size list failed."; | |||
| } | |||
| return kernel_mod_ptr; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,27 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_ | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_ | |||
| @@ -0,0 +1,73 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h" | |||
| #include <memory> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/oplib/oplib.h" | |||
| #include "backend/kernel_compiler/common_utils.h" | |||
| #include "backend/kernel_compiler/aicpu/aicpu_util.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) { | |||
| MS_LOG(INFO) << "AicpuMetadataInfo."; | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_info_list); | |||
| std::string op_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| if (op_name == kInitDataSetQueue) { | |||
| op_name = kInitData; | |||
| } | |||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU); | |||
| if (op_info_ptr == nullptr) { | |||
| MS_LOG(DEBUG) << "Aicpu does not have op [" << op_name << "]"; | |||
| return; | |||
| } | |||
| // For compatibility with the current framework | |||
| if (op_name == kPrint || op_name == kGetNext || op_name == kPack) { | |||
| std::vector<std::string> inputs_format{}; | |||
| std::vector<TypeId> inputs_type{}; | |||
| if (op_name == kPrint || op_name == kPack) { | |||
| for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { | |||
| inputs_format.emplace_back(kOpFormat_DEFAULT); | |||
| inputs_type.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index)); | |||
| } | |||
| } | |||
| std::vector<std::string> outputs_format; | |||
| std::vector<TypeId> outputs_type; | |||
| for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) { | |||
| outputs_format.emplace_back(kOpFormat_DEFAULT); | |||
| outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index)); | |||
| } | |||
| auto builder = KernelBuildInfo::KernelBuildInfoBuilder(); | |||
| builder.SetInputsFormat(inputs_format); | |||
| builder.SetInputsDeviceType(inputs_type); | |||
| builder.SetOutputsFormat(outputs_format); | |||
| builder.SetOutputsDeviceType(outputs_type); | |||
| builder.SetProcessor(AICPU); | |||
| builder.SetKernelType(AICPU_KERNEL); | |||
| builder.SetFusionType(OPAQUE); | |||
| kernel_info_list->push_back(builder.Build()); | |||
| return; | |||
| } | |||
| if (!ParseMetadata(kernel_node, op_info_ptr, AICPU, kernel_info_list)) { | |||
| MS_LOG(WARNING) << "Aicpu parsed metadata op [" << op_name << "] failed"; | |||
| return; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,30 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_ | |||
| #include <string> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/kernel_build_info.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_META_DATA_H_ | |||
| @@ -0,0 +1,156 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h" | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include "runtime/mem.h" | |||
| #include "runtime/rt.h" | |||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h" | |||
| #include "utils/convert_utils.h" | |||
| #include "backend/kernel_compiler/aicpu/aicpu_util.h" | |||
| #include "utils/context/ms_context.h" | |||
| using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>; | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr auto AICPU_OPS_SO_NAME = "libaicpu_kernels.so"; | |||
| AicpuOpKernelMod::AicpuOpKernelMod() : anf_node_(nullptr) {} | |||
| AicpuOpKernelMod::~AicpuOpKernelMod() { | |||
| args_.clear(); | |||
| inputList_.clear(); | |||
| outputList_.clear(); | |||
| anf_node_ = nullptr; | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| } | |||
| void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; } | |||
| const std::vector<size_t> &AicpuOpKernelMod::GetInputSizeList() const { return input_size_list_; } | |||
| void AicpuOpKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; } | |||
| const std::vector<size_t> &AicpuOpKernelMod::GetOutputSizeList() const { return output_size_list_; } | |||
| void AicpuOpKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; } | |||
| const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } | |||
| void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; } | |||
| void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; } | |||
| void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); } | |||
| void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; } | |||
| void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| anf_node_ = anf_node; | |||
| } | |||
| void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| MS_LOG(INFO) << "CreateCpuKernelInfoOffline start"; | |||
| node_so_ = AICPU_OPS_SO_NAME; | |||
| // InputOutputAddr | |||
| vector<void *> io_addrs; | |||
| (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(io_addrs), | |||
| [](const AddressPtr &input) -> void * { return input->addr; }); | |||
| (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(io_addrs), | |||
| [](const AddressPtr &output) -> void * { return output->addr; }); | |||
| auto io_addrs_num = io_addrs.size(); | |||
| // calculate paramLen: AicpuParamHead.len + ioAddrsSize + notifyId.len + customizedAttr.len | |||
| auto param_len = sizeof(AicpuParamHead); | |||
| // get input and output addrs size, no need to check overflow | |||
| auto io_addrs_size = io_addrs_num * sizeof(uint64_t); | |||
| // refresh paramLen, no need to check overflow | |||
| param_len += io_addrs_size; | |||
| auto node_def_len = node_def_str_.length(); | |||
| param_len += node_def_len; | |||
| // Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr | |||
| AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)}; | |||
| args_.clear(); | |||
| (void)args_.append(reinterpret_cast<const char *>(¶mHead), sizeof(AicpuParamHead)); | |||
| // TaskArgs append ioAddrs | |||
| if (io_addrs_size != 0) { | |||
| (void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size); | |||
| } | |||
| // When it's aicpu customized ops, taskArgs should append customized attr | |||
| if (node_def_len != 0) { | |||
| (void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len); | |||
| } | |||
| MS_LOG(INFO) << "CreateCpuKernelInfoOffline end"; | |||
| } | |||
| bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) { | |||
| if (stream_ptr == nullptr) { | |||
| MS_LOG(ERROR) << "stream_ptr should not be nullptr."; | |||
| return false; | |||
| } | |||
| CreateCpuKernelInfo(inputs, outputs); | |||
| if (node_name_ == kTopK) { | |||
| node_name_ = kTopKV2; | |||
| } | |||
| MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_ | |||
| << ", args_size:" << args_.length(); | |||
| if (rtCpuKernelLaunch(reinterpret_cast<const void *>(node_so_.c_str()), | |||
| reinterpret_cast<const void *>(node_name_.c_str()), 1, | |||
| reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()), nullptr, | |||
| stream_ptr) != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "Aicpu op launch failed!"; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> &inputs, | |||
| const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, uint32_t stream_id) { | |||
| MS_LOG(INFO) << "AicpuOpKernelMod GenTask start"; | |||
| stream_id_ = stream_id; | |||
| node_so_ = AICPU_OPS_SO_NAME; | |||
| std::vector<void *> input_data_addrs; | |||
| (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs), | |||
| [](const AddressPtr &input) -> void * { return input->addr; }); | |||
| std::vector<void *> output_data_addrs; | |||
| (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs), | |||
| [](const AddressPtr &output) -> void * { return output->addr; }); | |||
| if (node_name_ == kTopK) { | |||
| node_name_ = kTopKV2; | |||
| } | |||
| AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>( | |||
| kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump()); | |||
| MS_LOG(INFO) << "AicpuOpKernelMod GenTask end"; | |||
| return {task_info_ptr}; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,75 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/ascend_kernel_mod.h" | |||
| #include "backend/kernel_compiler/aicpu/aicpu_util.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class AicpuOpKernelMod : public AscendKernelMod { | |||
| public: | |||
| AicpuOpKernelMod(); | |||
| ~AicpuOpKernelMod() override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override; | |||
| std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, uint32_t stream_id) override; | |||
| void SetInputList(const std::vector<int64_t> &inputList); | |||
| void SetOutputList(const std::vector<int64_t> &outputList); | |||
| void SetAnfNode(const AnfNodePtr &anf_node); | |||
| void SetNodeDef(const std::string &nodeDef); | |||
| void SetNodeName(const std::string &node_name); | |||
| /** | |||
| * @brief Build AICPU Engine kernel structure, and allocate device memory for offline task generate | |||
| * @return SUCCESS | |||
| * @return FAIL | |||
| * | |||
| */ | |||
| void CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs); | |||
| void SetInputSizeList(const std::vector<size_t> &size_list); | |||
| void SetOutputSizeList(const std::vector<size_t> &size_list); | |||
| void SetWorkspaceSizeList(const std::vector<size_t> &size_list); | |||
| const std::vector<size_t> &GetInputSizeList() const override; | |||
| const std::vector<size_t> &GetOutputSizeList() const override; | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override; | |||
| private: | |||
| std::string args_; | |||
| std::string node_def_str_; | |||
| std::string node_name_; | |||
| std::string node_so_; | |||
| std::vector<int64_t> inputList_; | |||
| std::vector<int64_t> outputList_; | |||
| AnfNodePtr anf_node_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| }; | |||
| using AicpuOpKernelModPtr = std::shared_ptr<AicpuOpKernelMod>; | |||
| using AicputOpKernelModPtrList = std::vector<AicpuOpKernelModPtr>; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_MOD_H_ | |||
| @@ -0,0 +1,56 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/aicpu/aicpu_util.h" | |||
| #include <vector> | |||
| #include <string> | |||
| #include "proto/types.pb.h" | |||
| #include "runtime/mem.h" | |||
| #include "runtime/rt.h" | |||
| #include "utils/convert_utils.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| static std::map<int32_t, int32_t> MS_PROTO_DATA_TYPE_MAP = { | |||
| {mindspore::TypeId::kTypeUnknown, mindspore::DataType::MS_UNKNOWN}, | |||
| {mindspore::TypeId::kNumberTypeBool, mindspore::DataType::MS_BOOL}, | |||
| {mindspore::TypeId::kNumberTypeInt, mindspore::DataType::MS_INT32}, | |||
| {mindspore::TypeId::kNumberTypeInt8, mindspore::DataType::MS_INT8}, | |||
| {mindspore::TypeId::kNumberTypeInt16, mindspore::DataType::MS_INT16}, | |||
| {mindspore::TypeId::kNumberTypeInt32, mindspore::DataType::MS_INT32}, | |||
| {mindspore::TypeId::kNumberTypeInt64, mindspore::DataType::MS_INT64}, | |||
| {mindspore::TypeId::kNumberTypeUInt, mindspore::DataType::MS_UINT32}, | |||
| {mindspore::TypeId::kNumberTypeUInt8, mindspore::DataType::MS_UINT8}, | |||
| {mindspore::TypeId::kNumberTypeUInt16, mindspore::DataType::MS_UINT16}, | |||
| {mindspore::TypeId::kNumberTypeUInt32, mindspore::DataType::MS_UINT32}, | |||
| {mindspore::TypeId::kNumberTypeUInt64, mindspore::DataType::MS_UINT64}, | |||
| {mindspore::TypeId::kNumberTypeFloat16, mindspore::DataType::MS_FLOAT16}, | |||
| {mindspore::TypeId::kNumberTypeFloat, mindspore::DataType::MS_FLOAT32}, | |||
| {mindspore::TypeId::kNumberTypeFloat32, mindspore::DataType::MS_FLOAT32}, | |||
| {mindspore::TypeId::kNumberTypeFloat64, mindspore::DataType::MS_FLOAT64}, | |||
| }; | |||
| int AicpuOpUtil::MsTypeToProtoType(TypeId ms_type) { | |||
| auto iter = MS_PROTO_DATA_TYPE_MAP.find(ms_type); | |||
| if (iter != MS_PROTO_DATA_TYPE_MAP.end()) { | |||
| return MS_PROTO_DATA_TYPE_MAP[ms_type]; | |||
| } else { | |||
| MS_LOG(ERROR) << "UnSupported ms_type value" << static_cast<int>(ms_type); | |||
| return -1; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,64 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_ | |||
| #include <cstdint> | |||
| #include <vector> | |||
| #include <map> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr auto kInitDataSetQueue = "InitDataSetQueue"; | |||
| constexpr auto kInitData = "InitData"; | |||
| constexpr auto kGetNext = "GetNext"; | |||
| constexpr auto kPrint = "Print"; | |||
| constexpr auto kPack = "Pack"; | |||
| constexpr auto kOutputTypes = "output_types"; | |||
| constexpr auto kOutputShapes = "output_shapes"; | |||
| constexpr auto kChannelName = "channel_name"; | |||
| constexpr auto kSharedName = "shared_name"; | |||
| constexpr auto kShapes = "shapes"; | |||
| constexpr auto kTypes = "types"; | |||
| constexpr auto kQueueName = "queue_name"; | |||
| constexpr auto kSeed = "seed"; | |||
| constexpr auto kSeed0 = "Seed0"; | |||
| constexpr auto kSeed1 = "Seed1"; | |||
| constexpr auto kSeed2 = "seed2"; | |||
| constexpr auto kTopK = "TopK"; | |||
| constexpr auto kTopKV2 = "TopKV2"; | |||
| struct AicpuParamHead { | |||
| uint32_t length; // Total length: include cunstom message | |||
| uint32_t ioAddrNum; // Input and output address number | |||
| uint32_t extInfoLength; // extInfo struct Length | |||
| uint64_t extInfoAddr; // extInfo address | |||
| } __attribute__((packed)); | |||
| class AicpuOpUtil { | |||
| public: | |||
| static int MsTypeToProtoType(TypeId ms_type); | |||
| private: | |||
| // kernel id | |||
| static uint64_t KernelId_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_ | |||
| @@ -0,0 +1,180 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h" | |||
| #include <algorithm> | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "backend/optimizer/common/helper.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void SetAkgAttrsForFour2Five(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| // The x and output are akg op input and output param. | |||
| std::vector<std::string> input_names = {"x"}; | |||
| std::vector<std::string> output_names = {"output"}; | |||
| AnfAlgo::SetNodeAttr("input_names", MakeValue(input_names), anf_node); | |||
| AnfAlgo::SetNodeAttr("output_names", MakeValue(output_names), anf_node); | |||
| TypeId dst_type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, 0); | |||
| std::string dst_type; | |||
| if (dst_type_id == kFloat32->type_id()) { | |||
| dst_type = "float32"; | |||
| } else if (dst_type_id == kFloat16->type_id()) { | |||
| dst_type = "float16"; | |||
| } | |||
| AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node); | |||
| } | |||
| void SetAkgAttrsForFive2Four(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::vector<std::string> input_names = {"x"}; | |||
| std::vector<std::string> output_names = {"output"}; | |||
| AnfAlgo::SetNodeAttr("input_names", MakeValue(input_names), anf_node); | |||
| AnfAlgo::SetNodeAttr("output_names", MakeValue(output_names), anf_node); | |||
| std::vector<size_t> origin_shape = AnfAlgo::GetOutputInferShape(anf_node, 0); | |||
| if (origin_shape.size() != kShape4dDims) { | |||
| MS_LOG(EXCEPTION) << "The dim of origin_shape is not equal to 4, but it's dim is " << origin_shape.size() << "."; | |||
| } | |||
| std::vector<int> shape_transform; | |||
| (void)std::transform(origin_shape.begin(), origin_shape.end(), std::back_inserter(shape_transform), | |||
| [](const int &origin_shape) { return static_cast<int>(origin_shape); }); | |||
| AnfAlgo::SetNodeAttr("shape4d", MakeValue(shape_transform), anf_node); | |||
| AnfAlgo::SetNodeAttr("output_format", MakeValue(kOpFormat_NCHW), anf_node); | |||
| TypeId dst_type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, 0); | |||
| std::string dst_type; | |||
| if (dst_type_id == kFloat32->type_id()) { | |||
| dst_type = "float32"; | |||
| } else if (dst_type_id == kFloat16->type_id()) { | |||
| dst_type = "float16"; | |||
| } | |||
| AnfAlgo::SetNodeAttr("dstType", MakeValue(dst_type), anf_node); | |||
| } | |||
| void SetAkgAttrsForCast(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| // The x and output are akg op input and output param. | |||
| std::vector<std::string> input_names = {"x", "dst_type"}; | |||
| std::vector<std::string> output_names = {"output"}; | |||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node); | |||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node); | |||
| std::string dst_type; | |||
| TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, 0); | |||
| if (output_type == kFloat32->type_id()) { | |||
| dst_type = "float32"; | |||
| } else if (output_type == kFloat16->type_id()) { | |||
| dst_type = "float16"; | |||
| } else if (output_type == kInt32->type_id()) { | |||
| dst_type = "int32"; | |||
| } else { | |||
| MS_LOG(WARNING) << "Unknown cast_to type: " << TypeIdToType(output_type)->ToString(); | |||
| } | |||
| AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node); | |||
| } | |||
| void SetAkgAttrsForBNGrad1(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::vector<std::string> input_names{"dy", "data", "mean"}; | |||
| std::vector<std::string> output_names{"dgamma_red_hw", "dbeta_red_hw", "data_minus_mean"}; | |||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node); | |||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node); | |||
| } | |||
| void SetAkgAttrsForBNGrad2(const AnfNodePtr &anf_node) { | |||
| const size_t kBNGrad2InputSize = 5; | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::vector<std::string> input_names{"dgamma_red_hw", "dbeta_red_hw", "variance", "gamma"}; | |||
| std::vector<std::string> output_names{"bn_scale", "bn_bias", "rs", "dgamma_dx", "dbeta_dx"}; | |||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node); | |||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node); | |||
| auto cnode = anf_node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (cnode->inputs().size() < kBNGrad2InputSize) { | |||
| MS_LOG(EXCEPTION) << "The inputs size of BNGrad2 is less then " << kBNGrad2InputSize; | |||
| } | |||
| auto input1 = cnode->input(1); | |||
| MS_EXCEPTION_IF_NULL(input1); | |||
| auto tuple_getitem = input1->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(tuple_getitem); | |||
| if (tuple_getitem->inputs().size() < kTupleGetItemInputSize) { | |||
| MS_LOG(EXCEPTION) << "The inputs size of tuple_getitem is less then " << kTupleGetItemInputSize; | |||
| } | |||
| auto bn_grad1 = tuple_getitem->input(kRealInputNodeIndexInTupleGetItem); | |||
| std::vector<size_t> data_shape = AnfAlgo::GetInputDeviceShape(bn_grad1, 0); | |||
| AnfAlgo::SetNodeAttr(kAttrDataShape, MakeValue(opt::Convert2Int(data_shape)), anf_node); | |||
| } | |||
| void SetAkgAttrsForBNGrad3(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::vector<std::string> input_names{"dy", "rs", "dgamma_dx", "dbeta_dx", "data_minus_mean"}; | |||
| std::vector<std::string> output_names{"dx"}; | |||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), anf_node); | |||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), anf_node); | |||
| } | |||
| void SetAkgAttrsForFusedBN1(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| // Set attr for fused_bn1 | |||
| std::vector<std::string> fused_bn1_input_names{"data"}; | |||
| std::vector<std::string> fused_bn1_output_names{"mean", "var_part"}; | |||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn1_input_names), anf_node); | |||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn1_output_names), anf_node); | |||
| } | |||
| void SetAkgAttrsForFusedBN2(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| // Set attr for fused_bn2 | |||
| std::vector<std::string> fused_bn2_input_names{"mean", "var_part", "running_mean", "running_var"}; | |||
| std::vector<std::string> fused_bn2_output_names{"variance", "running_mean", "running_variance"}; | |||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn2_input_names), anf_node); | |||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn2_output_names), anf_node); | |||
| } | |||
| void SetAkgAttrsForFusedBN3(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| // Set attr for fused_bn3 | |||
| std::vector<std::string> fused_bn3_input_names{"data", "mean", "variance", "gamma", "beta"}; | |||
| std::vector<std::string> fused_bn3_output_names{"y"}; | |||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(fused_bn3_input_names), anf_node); | |||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(fused_bn3_output_names), anf_node); | |||
| } | |||
| void SetAkgAttrsForConvBN1(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::vector<std::string> conv_bn1_output_names{"data", "var_part", "mean"}; | |||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(conv_bn1_output_names), anf_node); | |||
| } | |||
| void SetAkgAttrsForBN2AddRelu(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::vector<std::string> bn2_add_relu_input_names{"data", "var_part", "mean", "other_branch_data", | |||
| "gamma", "beta", "running_mean", "running_var"}; | |||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(bn2_add_relu_input_names), anf_node); | |||
| std::vector<std::string> bn2_add_relu_output_names{"output", "running_mean", "running_variance", "save_inv_variance"}; | |||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(bn2_add_relu_output_names), anf_node); | |||
| } | |||
| void SetAkgAttrsForBN2Relu(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::vector<std::string> bn2_input_names{"data", "var_part", "mean", "gamma", "beta", "running_mean", "running_var"}; | |||
| std::vector<std::string> bn2_output_names{"y", "running_mean", "running_variance", "save_inv_variance"}; | |||
| AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(bn2_input_names), anf_node); | |||
| AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(bn2_output_names), anf_node); | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,58 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H | |||
| #define MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <unordered_map> | |||
| #include "ir/anf.h" | |||
| #include "utils/utils.h" | |||
| #include "frontend/operator/ops.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void SetAkgAttrsForFour2Five(const AnfNodePtr &anf_node); | |||
| void SetAkgAttrsForFive2Four(const AnfNodePtr &anf_node); | |||
| void SetAkgAttrsForCast(const AnfNodePtr &anf_node); | |||
| void SetAkgAttrsForBNGrad1(const AnfNodePtr &anf_node); | |||
| void SetAkgAttrsForBNGrad2(const AnfNodePtr &anf_node); | |||
| void SetAkgAttrsForBNGrad3(const AnfNodePtr &anf_node); | |||
| void SetAkgAttrsForFusedBN1(const AnfNodePtr &anf_node); | |||
| void SetAkgAttrsForFusedBN2(const AnfNodePtr &anf_node); | |||
| void SetAkgAttrsForFusedBN3(const AnfNodePtr &anf_node); | |||
| void SetAkgAttrsForConvBN1(const AnfNodePtr &anf_node); | |||
| void SetAkgAttrsForBN2AddRelu(const AnfNodePtr &anf_node); | |||
| void SetAkgAttrsForBN2Relu(const AnfNodePtr &anf_node); | |||
| const std::unordered_map<std::string, std::function<void(const AnfNodePtr &anf_node)>> kAkgKernelAttrsProcessMap = { | |||
| {kFour2FiveOpName, SetAkgAttrsForFour2Five}, | |||
| {kFive2FourOpName, SetAkgAttrsForFive2Four}, | |||
| {"Cast", SetAkgAttrsForCast}, | |||
| {kBNGrad1OpName, SetAkgAttrsForBNGrad1}, | |||
| {kBNGrad2OpName, SetAkgAttrsForBNGrad2}, | |||
| {kBNGrad3OpName, SetAkgAttrsForBNGrad3}, | |||
| {kFusedBN1OpName, SetAkgAttrsForFusedBN1}, | |||
| {kFusedBN2OpName, SetAkgAttrsForFusedBN2}, | |||
| {kFusedBN3OpName, SetAkgAttrsForFusedBN3}, | |||
| {kConvBN1OpName, SetAkgAttrsForConvBN1}, | |||
| {kBN2AddReluOpName, SetAkgAttrsForBN2AddRelu}, | |||
| {kBN2ReLUOpName, SetAkgAttrsForBN2Relu}, | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_ATTRS_PROCESS_H | |||
| @@ -0,0 +1,623 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/akg/akg_kernel_build.h" | |||
| #include <Python.h> | |||
| #include <sys/types.h> | |||
| #include <signal.h> | |||
| #include <unistd.h> | |||
| #include <dirent.h> | |||
| #include <cctype> | |||
| #include <cstdint> | |||
| #include <memory> | |||
| #include <map> | |||
| #include <utility> | |||
| #include <algorithm> | |||
| #include <functional> | |||
| #include <sstream> | |||
| #include <iterator> | |||
| #include <numeric> | |||
| #include <unordered_set> | |||
| #include "common/utils.h" | |||
| #include "utils/convert_utils.h" | |||
| #include "utils/any.h" | |||
| #include "utils/utils.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr int ME_MAX_KERNEL_NAME_LENGTH = 200; | |||
| constexpr int32_t ARGS_SIZE = 1; | |||
| constexpr auto kCompileWithJsonFunc = "compilewithjson"; | |||
| // json key | |||
| constexpr auto kOpDesc = "op_desc"; | |||
| constexpr auto kInputDesc = "input_desc"; | |||
| constexpr auto kShape = "shape"; | |||
| constexpr auto kDataType = "data_type"; | |||
| constexpr auto kOutputDesc = "output_desc"; | |||
| constexpr auto kName = "name"; | |||
| constexpr auto kTensorName = "tensor_name"; | |||
| constexpr auto kValue = "value"; | |||
| constexpr auto KDynInputSizes = "dyn_input_sizes"; | |||
| constexpr auto KInputNames = "input_names"; | |||
| constexpr auto KInput = "input"; | |||
| constexpr auto KDtype = "dtype"; | |||
| namespace { | |||
| template <typename T> | |||
| std::string Vector2Str(const std::vector<T> &inputs) { | |||
| if (!inputs.empty()) { | |||
| std::ostringstream oss; | |||
| (void)std::copy(inputs.begin(), inputs.end() - 1, std::ostream_iterator<T>(oss, ", ")); | |||
| oss << inputs.back(); | |||
| return oss.str(); | |||
| } | |||
| return ""; | |||
| } | |||
| } // namespace | |||
| std::string AkgKernelBuild::PyObjectToStr(PyObject *const PyObj) { | |||
| char *pChar = nullptr; | |||
| std::string str_res; | |||
| if (PyObj == nullptr) { | |||
| MS_LOG(ERROR) << "Input parameter is nullptr."; | |||
| return str_res; | |||
| } | |||
| PyObject *strArgs = PyObject_Str(PyObj); | |||
| if (strArgs != nullptr) { | |||
| (void)PyArg_Parse(strArgs, "s", &pChar); | |||
| } | |||
| if (pChar == nullptr) { | |||
| MS_LOG(ERROR) << "pChar is nullptr."; | |||
| return str_res; | |||
| } | |||
| str_res = pChar; | |||
| return str_res; | |||
| } | |||
| std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag, | |||
| const std::pair<size_t, size_t> &position) { | |||
| if (node_json.count(tag) == 0) { | |||
| MS_LOG(ERROR) << "Node [" << node_json.dump() << "] has no key [" << tag << "]."; | |||
| return ""; | |||
| } | |||
| auto const &tag_desc = node_json[tag]; | |||
| nlohmann::json first_index; | |||
| if (tag == kOutputDesc) { | |||
| first_index = tag_desc; | |||
| } else if (!tag_desc.is_array() || tag_desc.size() <= position.first) { | |||
| MS_LOG(ERROR) << "Node [" << tag_desc.dump() << "] has no enough value [" << position.first << "]."; | |||
| return ""; | |||
| } else { | |||
| first_index = tag_desc[position.first]; | |||
| } | |||
| if (!first_index.is_array() || first_index.size() <= position.second) { | |||
| MS_LOG(ERROR) << "Node [" << first_index.dump() << "] has no enough value [" << position.second << "]."; | |||
| return ""; | |||
| } | |||
| auto const &second_index = first_index[position.second]; | |||
| if (second_index.count(kTensorName) == 0) { | |||
| MS_LOG(ERROR) << "Node [" << second_index.dump() << "] has no key [" << kTensorName << "]."; | |||
| return ""; | |||
| } | |||
| return second_index[kTensorName]; | |||
| } | |||
| void SetTensorName(const std::string &tag, const std::string &new_name, const std::pair<size_t, size_t> &position, | |||
| nlohmann::json *const node_json) { | |||
| MS_EXCEPTION_IF_NULL(node_json); | |||
| if (node_json->count(tag) == 0) { | |||
| MS_LOG(ERROR) << "Node [" << node_json->dump() << "] has no key [" << tag << "]."; | |||
| return; | |||
| } | |||
| nlohmann::json *tag_desc = &((*node_json)[tag]); | |||
| nlohmann::json *first_index; | |||
| if (tag == kOutputDesc) { | |||
| first_index = tag_desc; | |||
| } else if (!tag_desc->is_array() || tag_desc->size() <= position.first) { | |||
| MS_LOG(ERROR) << "Node [" << tag_desc->dump() << "] has no enough value [" << position.first << "]."; | |||
| return; | |||
| } else { | |||
| first_index = &((*tag_desc)[position.first]); | |||
| } | |||
| if (!first_index->is_array() || first_index->size() <= position.second) { | |||
| MS_LOG(ERROR) << "Node [" << first_index->dump() << "] has no enough value [" << position.second << "]."; | |||
| return; | |||
| } | |||
| nlohmann::json *second_index = &((*first_index)[position.second]); | |||
| if (second_index->count(kTensorName) == 0) { | |||
| MS_LOG(ERROR) << "Node [" << second_index->dump() << "] has no key [" << kTensorName << "]."; | |||
| return; | |||
| } | |||
| (*second_index)[kTensorName] = new_name; | |||
| return; | |||
| } | |||
| int AkgKernelBuild::op_cnt_ = 0; | |||
| std::mutex AkgKernelBuild::op_cnt_mtx_; | |||
| std::string AkgKernelBuild::GetProcessor(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::string device; | |||
| switch (AnfAlgo::GetProcessor(anf_node)) { | |||
| case Processor::AICORE: | |||
| device = kProcessorAiCore; | |||
| break; | |||
| case Processor::AICPU: | |||
| device = kProcessorAiCpu; | |||
| break; | |||
| case Processor::CUDA: | |||
| device = kProcessorCuda; | |||
| break; | |||
| default: | |||
| MS_LOG(ERROR) << "Unknown processor type."; | |||
| break; | |||
| } | |||
| return device; | |||
| } | |||
| bool GetIOSize(const nlohmann::json &node_json, std::vector<size_t> *const input_size, | |||
| std::vector<size_t> *const output_size) { | |||
| if (input_size == nullptr || output_size == nullptr) { | |||
| MS_LOG(ERROR) << "input size or output size is nullptr"; | |||
| return false; | |||
| } | |||
| input_size->clear(); | |||
| output_size->clear(); | |||
| for (size_t i = 0; i < node_json[kInputDesc].size(); i++) { | |||
| for (size_t m = 0; m < node_json[kInputDesc][i].size(); m++) { | |||
| std::string dtype = node_json[kInputDesc][i][m][kDataType]; | |||
| size_t nbyte = GetDtypeNbyte(dtype); | |||
| size_t size_i = std::accumulate(node_json[kInputDesc][i][m][kShape].begin(), | |||
| node_json[kInputDesc][i][m][kShape].end(), nbyte, std::multiplies<size_t>()); | |||
| input_size->push_back(size_i); | |||
| } | |||
| } | |||
| for (size_t i = 0; i < node_json[kOutputDesc].size(); i++) { | |||
| std::string dtype = node_json[kOutputDesc][i][kDataType]; | |||
| size_t nbyte = GetDtypeNbyte(dtype); | |||
| size_t size_i = std::accumulate(node_json[kOutputDesc][i][kShape].begin(), node_json[kOutputDesc][i][kShape].end(), | |||
| nbyte, std::multiplies<size_t>()); | |||
| output_size->push_back(size_i); | |||
| } | |||
| return true; | |||
| } | |||
| int AkgKernelBuild::GetOpCntInc() { | |||
| op_cnt_mtx_.lock(); | |||
| int cnt = op_cnt_++; | |||
| op_cnt_mtx_.unlock(); | |||
| return cnt; | |||
| } | |||
| bool AkgKernelBuild::CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const inputs_json) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(inputs_json); | |||
| // for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input. | |||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||
| auto op_info = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG); | |||
| if (op_info == nullptr) { | |||
| MS_LOG(ERROR) << "Apply kernel [" << op_name << "] op_info is nullptr"; | |||
| return false; | |||
| } | |||
| std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr = op_info->inputs_ptr(); | |||
| if (inputs_ptr.empty()) { | |||
| MS_LOG(INFO) << "Apply kernel [" << op_name << "] regist info has no input info"; | |||
| return true; | |||
| } | |||
| auto op_info_input_num = inputs_ptr.size(); | |||
| // for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input. | |||
| std::vector<int> dyn_input_sizes; | |||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) { | |||
| dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes)); | |||
| } | |||
| size_t real_input_index = 0; | |||
| std::vector<nlohmann::json> input_list; | |||
| for (size_t i = 0; i < op_info_input_num; i++) { | |||
| size_t input_tensor_num; | |||
| std::shared_ptr<OpIOInfo> input_ptr = inputs_ptr[i]; | |||
| std::string op_input_name; | |||
| if (input_ptr == nullptr) { | |||
| MS_LOG(ERROR) << "Apply kernel [" << op_name << "] regist input[" << i << "] is nullptr"; | |||
| return false; | |||
| } | |||
| op_input_name = input_ptr->name(); | |||
| if (dyn_input_sizes.empty()) { | |||
| input_tensor_num = 1; | |||
| } else { | |||
| input_tensor_num = IntToSize(dyn_input_sizes[i]); | |||
| } | |||
| input_list.clear(); | |||
| for (size_t input_i = 0; input_i < input_tensor_num; input_i++) { | |||
| // dtype : float16 | |||
| auto type_id = AnfAlgo::GetInputDeviceDataType(anf_node, real_input_index); | |||
| std::string dtype = TypeId2String(type_id); | |||
| if (dtype.empty()) { | |||
| MS_LOG(ERROR) << "Op [" << op_name << "] input [" << input_i << "] data type is null. "; | |||
| return false; | |||
| } | |||
| nlohmann::json input_desc_json; | |||
| input_desc_json[kDataType] = dtype; | |||
| input_desc_json[kName] = op_input_name; | |||
| input_desc_json[kTensorName] = "input_" + std::to_string(GetInputTensorIdxInc(anf_node, real_input_index)); | |||
| auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, real_input_index); | |||
| if (anf_node->func_graph() != nullptr && anf_node->func_graph()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL) && | |||
| GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) { | |||
| MS_LOG(WARNING) << "we take input[" << real_input_index << "] of [" << anf_node->DebugString(2) | |||
| << "] as const tensor, shape: [" << Vector2Str(input_shape) | |||
| << "], value: " << input_desc_json[kValue]; | |||
| input_shape.clear(); | |||
| } | |||
| if (input_shape.empty()) { | |||
| input_shape.push_back(1); | |||
| } | |||
| input_desc_json[kShape] = input_shape; | |||
| input_list.emplace_back(input_desc_json); | |||
| real_input_index++; | |||
| } | |||
| inputs_json->emplace_back(input_list); | |||
| } | |||
| return true; | |||
| } | |||
| bool AkgKernelBuild::CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const outputs_json) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(outputs_json); | |||
| size_t output_tensor_num = AnfAlgo::GetOutputTensorNum(anf_node); | |||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG); | |||
| auto outputs = op_info_ptr->outputs_ptr(); | |||
| for (size_t i = 0; i < output_tensor_num; i++) { | |||
| nlohmann::json output_json; | |||
| auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, i); | |||
| std::string dtype = TypeId2String(type_id); | |||
| if (dtype.empty()) { | |||
| MS_LOG(ERROR) << "Op [" << op_name << "] output [" << i << "] data type is null. "; | |||
| return false; | |||
| } | |||
| std::string output_name = outputs[i]->name(); | |||
| output_json[kDataType] = dtype; | |||
| output_json[kName] = output_name; | |||
| output_json[kTensorName] = "output_" + std::to_string(i) + "_" + std::to_string(GetOutputTensorIdxInc()); | |||
| output_json[kShape] = AnfAlgo::GetOutputDeviceShape(anf_node, i); | |||
| outputs_json->push_back(output_json); | |||
| } | |||
| return true; | |||
| } | |||
| void GetJson(const AnfNodePtr &anf_node, const std::vector<int> &dyn_input_sizes, | |||
| const std::shared_ptr<OpAttr> &op_attr, nlohmann::json *const attr_json, const ValuePtr &attr_value) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(op_attr); | |||
| MS_EXCEPTION_IF_NULL(attr_json); | |||
| std::string type = op_attr->type(); | |||
| if (type == "int") { | |||
| (*attr_json)[kValue] = GetValue<int>(attr_value); | |||
| } else if (type == "str") { | |||
| (*attr_json)[kValue] = GetValue<std::string>(attr_value); | |||
| } else if (type == "bool") { | |||
| (*attr_json)[kValue] = GetValue<bool>(attr_value); | |||
| } else if (type == "float") { | |||
| (*attr_json)[kValue] = GetValue<float>(attr_value); | |||
| } else if (type == "listInt") { | |||
| (*attr_json)[kValue] = GetValue<std::vector<int>>(attr_value); | |||
| } else if (type == "listStr") { | |||
| std::vector<std::string> data_format; | |||
| if (op_attr->name() == kArgDataformat) { | |||
| size_t tensor_args_num = !dyn_input_sizes.empty() ? dyn_input_sizes.size() : AnfAlgo::GetInputTensorNum(anf_node); | |||
| for (size_t format_i = 0; format_i < tensor_args_num; format_i++) { | |||
| auto input_format = AnfAlgo::GetInputFormat(anf_node, format_i); | |||
| data_format.push_back(input_format); | |||
| } | |||
| } else { | |||
| data_format = GetValue<std::vector<std::string>>(attr_value); | |||
| } | |||
| (*attr_json)[kValue] = data_format; | |||
| } else { | |||
| MS_LOG(WARNING) << "attr type:" << type; | |||
| } | |||
| } | |||
| bool AkgKernelBuild::CreateAttrDescJson(const AnfNodePtr &anf_node, const std::string &op_name, | |||
| const std::shared_ptr<OpInfo> &op_info, nlohmann::json *const attrs_json) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(attrs_json); | |||
| MS_EXCEPTION_IF_NULL(op_info); | |||
| std::vector<std::shared_ptr<OpAttr>> attrs = op_info->attrs_ptr(); | |||
| if (attrs.empty()) { | |||
| MS_LOG(INFO) << "Apply kernel [" << op_name << "] op info attrs is empty"; | |||
| return true; | |||
| } | |||
| std::vector<std::shared_ptr<OpIOInfo>> inputs = op_info->inputs_ptr(); | |||
| std::vector<int> dyn_input_sizes; | |||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) { | |||
| dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes)); | |||
| } | |||
| if (inputs.empty()) { | |||
| MS_LOG(ERROR) << "Apply kernel [" << op_name << "] op info inputs is empty"; | |||
| return false; | |||
| } | |||
| // create input name list for atch "x_shape" in att with "x" in primitive. | |||
| std::map<size_t, std::string> op_info_shape_name; | |||
| for (size_t op_info_input_i = 0; op_info_input_i < inputs.size(); op_info_input_i++) { | |||
| std::string input_name = inputs[op_info_input_i]->name(); | |||
| std::string x_shape_name = input_name + "_shape"; | |||
| (void)op_info_shape_name.insert(make_pair(op_info_input_i, x_shape_name)); | |||
| } | |||
| for (const auto &op_attr : attrs) { | |||
| nlohmann::json attr_json; | |||
| ValuePtr attr_value = primitive->GetAttr(op_attr->name()); | |||
| if (attr_value == nullptr && op_attr->name() != kArgDataformat) { | |||
| if (op_attr->param_type() == "required") { | |||
| // match "x_shape" in att with "x" in primitive. | |||
| std::string attr_name = op_attr->name(); | |||
| auto find_item = std::find_if( | |||
| op_info_shape_name.begin(), op_info_shape_name.end(), | |||
| [attr_name](const std::map<size_t, std::string>::value_type item) { return item.second == attr_name; }); | |||
| if (find_item != op_info_shape_name.end()) { | |||
| if (!dyn_input_sizes.empty()) { | |||
| if (find_item->first >= dyn_input_sizes.size() - 1) { | |||
| MS_LOG(EXCEPTION) << "dyn_input_sizes list index:" << find_item->first | |||
| << " is out of range:" << dyn_input_sizes.size() - 1 << "."; | |||
| return false; | |||
| } | |||
| size_t tensor_idx = IntToSize(std::accumulate(&dyn_input_sizes[0], &dyn_input_sizes[find_item->first], 0)); | |||
| for (int input_i = 0; input_i < dyn_input_sizes[find_item->first]; input_i++) { | |||
| attr_json[kValue] = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, tensor_idx); | |||
| attr_json[kName] = op_attr->name(); | |||
| attrs_json->push_back(attr_json); | |||
| tensor_idx++; | |||
| } | |||
| } else { | |||
| attr_json[kValue] = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, find_item->first); | |||
| attr_json[kName] = op_attr->name(); | |||
| attrs_json->push_back(attr_json); | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "op [" << op_name << "] should have attr :" << op_attr->name(); | |||
| return false; | |||
| } | |||
| } | |||
| continue; | |||
| } | |||
| GetJson(anf_node, dyn_input_sizes, op_attr, &attr_json, attr_value); | |||
| attr_json[kName] = op_attr->name(); | |||
| attrs_json->push_back(attr_json); | |||
| } | |||
| return true; | |||
| } | |||
| bool AkgKernelBuild::GenerateSingleKernelJson(const AnfNodePtr &anf_node, const std::string &op_name, | |||
| nlohmann::json *const node_json) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(node_json); | |||
| int op_cnt = GetOpCntInc(); | |||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG); | |||
| MS_EXCEPTION_IF_NULL(op_info_ptr); | |||
| // get basic params from currentNodeOpDesc | |||
| (*node_json)[kName] = op_name; | |||
| (*node_json)["impl_path"] = op_info_ptr->impl_path(); | |||
| (*node_json)["process"] = AkgKernelBuild::GetProcessor(anf_node); | |||
| (*node_json)["composite"] = false; | |||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| ValuePtr input_names_v = primitive->GetAttr(KInputNames); | |||
| if (input_names_v == nullptr) { | |||
| MS_LOG(ERROR) << "ApplyKernel has no input_names, op[" << op_name << "]."; | |||
| return false; | |||
| } | |||
| std::vector<std::string> prim_input_names = GetValue<const std::vector<std::string>>(input_names_v); | |||
| std::string inputs_name; | |||
| for (const auto &prim_input_name : prim_input_names) { | |||
| (void)inputs_name.append("_input_").append(prim_input_name).append("_"); | |||
| } | |||
| // input desc | |||
| nlohmann::json inputs_json; | |||
| if (!CreateInputDescJson(anf_node, &inputs_json)) { | |||
| MS_LOG(ERROR) << "Create input desc json failed, op[" << op_name << "]."; | |||
| return false; | |||
| } | |||
| (*node_json)[kInputDesc] = inputs_json; | |||
| MS_LOG(INFO) << "Akg create input desc json success."; | |||
| std::string inputs_shape = "inputs_shape_"; | |||
| for (auto &i : inputs_json) { | |||
| for (auto &m : i) { | |||
| std::string data_type = m[kDataType]; | |||
| (void)inputs_shape.append("_").append(data_type).append("_"); | |||
| for (auto &j : m[kShape]) { | |||
| size_t n = j; | |||
| (void)inputs_shape.append(std::to_string(n)).append("_"); | |||
| } | |||
| } | |||
| } | |||
| // output desc | |||
| nlohmann::json outputs_json; | |||
| if (!CreateOutputDescJson(anf_node, &outputs_json)) { | |||
| MS_LOG(ERROR) << "Create output desc json failed, op[" << op_name << "]."; | |||
| return false; | |||
| } | |||
| (*node_json)[kOutputDesc] = outputs_json; | |||
| MS_LOG(INFO) << "Akg create output desc json success."; | |||
| std::string outputs_shape = "outputs_shape_"; | |||
| for (auto &i : outputs_json) { | |||
| std::string data_type = i[kDataType]; | |||
| (void)outputs_shape.append("_").append(data_type).append("_"); | |||
| for (auto &j : i[kShape]) { | |||
| size_t m = j; | |||
| (void)outputs_shape.append(std::to_string(m)).append("_"); | |||
| } | |||
| } | |||
| // attribute desc | |||
| nlohmann::json attrs_json; | |||
| if (!CreateAttrDescJson(anf_node, op_name, op_info_ptr, &attrs_json)) { | |||
| MS_LOG(ERROR) << "Create attr desc json failed, op[" << op_name << "]."; | |||
| return false; | |||
| } | |||
| (*node_json)["attr"] = attrs_json; | |||
| std::string json_str = node_json->dump(); | |||
| size_t hash_id = std::hash<std::string>()(json_str); | |||
| json_name_ = op_name + "_"; | |||
| (void)json_name_.append(std::to_string(hash_id)); | |||
| MS_LOG(INFO) << "full scope name is : " << anf_node->fullname_with_scope() << ", json info name is : " << json_name_; | |||
| json_info_ = json_str; | |||
| (*node_json)["id"] = op_cnt; | |||
| (*node_json)["op"] = json_name_; | |||
| MS_LOG(INFO) << "Akg create node desc json success."; | |||
| return true; | |||
| } | |||
| KernelPackPtr AkgKernelBuild::OpBuild(const std::string &node_json, const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| auto processor = AkgKernelBuild::GetProcessor(anf_node); | |||
| auto cached_kernel_pack = SearchCache(json_name_, processor); | |||
| if (cached_kernel_pack != nullptr) { | |||
| MS_LOG(INFO) << "Use cached kernel, json_name_[" << json_name_ << "], fullname_with_scope[" | |||
| << anf_node->fullname_with_scope() << "]."; | |||
| return cached_kernel_pack; | |||
| } | |||
| PyObject *pModule = nullptr; | |||
| PyObject *pFunc = nullptr; | |||
| PyObject *pArg = nullptr; | |||
| PyObject *pRes = nullptr; | |||
| pModule = PyImport_ImportModule(kAkgModule); | |||
| if (pModule == nullptr) { | |||
| MS_LOG(ERROR) << "Failed to import [" << kAkgModule << "]."; | |||
| return nullptr; | |||
| } | |||
| pFunc = PyObject_GetAttrString(pModule, kCompileWithJsonFunc); | |||
| pArg = PyTuple_New(ARGS_SIZE); | |||
| (void)PyTuple_SetItem(pArg, 0, Py_BuildValue("s", node_json.c_str())); | |||
| (void)alarm(AUTODIFF_COMPILE_OVERTIME); | |||
| pRes = PyEval_CallObject(pFunc, pArg); | |||
| (void)alarm(0); | |||
| if (pRes == nullptr) { | |||
| MS_LOG(ERROR) << "No ret got, failed to call function [" << kCompileWithJsonFunc << "], args:\n(" | |||
| << AkgKernelBuild::PyObjectToStr(pArg) << ")."; | |||
| return nullptr; | |||
| } | |||
| if (PyObject_IsTrue(pRes) != 1) { | |||
| MS_LOG(ERROR) << "Illegal ret, failed to call function [" << kCompileWithJsonFunc << "], args:\n(" | |||
| << AkgKernelBuild::PyObjectToStr(pArg) << ")."; | |||
| return nullptr; | |||
| } | |||
| auto new_kernel_pack = InsertCache(json_name_, processor); | |||
| kernel::SaveJsonInfo(json_name_, json_info_); | |||
| if (new_kernel_pack == nullptr) { | |||
| MS_LOG(ERROR) << "Insert to cache failed, json_name_[" << json_name_ << "], fullname_with_scope[" | |||
| << anf_node->fullname_with_scope() << "]."; | |||
| return nullptr; | |||
| } | |||
| return new_kernel_pack; | |||
| } | |||
| KernelPackPtr AkgKernelBuild::BuildByJson(const AnfNodePtr &anf_node, std::vector<size_t> *const input_size, | |||
| std::vector<size_t> *const output_size) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||
| auto it = kAkgKernelAttrsProcessMap.find(op_name); | |||
| if (it != kAkgKernelAttrsProcessMap.end()) { | |||
| it->second(anf_node); | |||
| } | |||
| MS_LOG(INFO) << "Akg start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; | |||
| nlohmann::json node_json; | |||
| if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { | |||
| MS_LOG(ERROR) << "Op[" << op_name << "] create single kernel json failed."; | |||
| } | |||
| std::string json_str = node_json.dump(); | |||
| auto kernel_pack = OpBuild(json_str, anf_node); | |||
| if (kernel_pack == nullptr) { | |||
| MS_LOG(ERROR) << "Akg build failed op[" << op_name << "], json:" << json_str; | |||
| return nullptr; | |||
| } | |||
| if (!GetIOSize(node_json, input_size, output_size)) { | |||
| MS_LOG(ERROR) << "Cal mem size failed."; | |||
| return nullptr; | |||
| } | |||
| MS_LOG(INFO) << "Akg compile success, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) | |||
| << "]"; | |||
| return kernel_pack; | |||
| } | |||
| size_t AkgKernelBuild::GetInputTensorIdxInc(const AnfNodePtr &anf_node, size_t input_idx) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| auto cnode = anf_node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (input_idx + 1 >= cnode->inputs().size()) { | |||
| MS_EXCEPTION(ArgumentError) << "input_idx [" << input_idx << "] is out of index of inputs of [" | |||
| << cnode->inputs().size() - 1 << "][" << cnode->DebugString() << "]"; | |||
| } | |||
| auto input_node = cnode->input(input_idx + 1); | |||
| if (input_tensor_idx_.find(input_node) == input_tensor_idx_.end()) { | |||
| size_t index = input_tensor_idx_.size(); | |||
| input_tensor_idx_[input_node] = index; | |||
| } | |||
| return input_tensor_idx_[input_node]; | |||
| } | |||
| size_t AkgKernelBuild::GetOutputTensorIdxInc() { | |||
| size_t idx = output_tensor_idx_++; | |||
| return idx; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,76 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_ | |||
| #include <unordered_map> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <map> | |||
| #include <utility> | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| #include "ir/dtype.h" | |||
| #include <nlohmann/json.hpp> | |||
| #include "backend/kernel_compiler/common_utils.h" | |||
| #include "backend/kernel_compiler/oplib/oplib.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class AkgKernelBuild { | |||
| public: | |||
| AkgKernelBuild() { | |||
| input_tensor_idx_ = {}; | |||
| output_tensor_idx_ = 0; | |||
| } | |||
| ~AkgKernelBuild() = default; | |||
| KernelPackPtr BuildByJson(const AnfNodePtr &anf_node, std::vector<size_t> *const input_size, | |||
| std::vector<size_t> *const output_size); | |||
| static std::string GetProcessor(const AnfNodePtr &anf_node); | |||
| static std::string PyObjectToStr(PyObject *const PyObj); | |||
| protected: | |||
| bool CreateInputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const inputs_json); | |||
| bool CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann::json *const outputs_json); | |||
| bool CreateAttrDescJson(const AnfNodePtr &anf_node, const std::string &op_name, | |||
| const std::shared_ptr<OpInfo> &op_info, nlohmann::json *const attrs_json); | |||
| KernelPackPtr OpBuild(const std::string &node_json, const AnfNodePtr &anf_node); | |||
| int GetOpCntInc(); | |||
| size_t GetInputTensorIdxInc(const AnfNodePtr &anf_node, size_t input_idx); | |||
| size_t GetOutputTensorIdxInc(); | |||
| bool GenerateSingleKernelJson(const AnfNodePtr &anf_node, const std::string &op_name, | |||
| nlohmann::json *const node_json); | |||
| static int op_cnt_; | |||
| // lock for variable fusionOpCnt in singleton mode | |||
| static std::mutex op_cnt_mtx_; | |||
| std::string json_name_; | |||
| std::string json_info_; | |||
| std::unordered_map<AnfNodePtr, size_t> input_tensor_idx_; | |||
| size_t output_tensor_idx_; | |||
| }; | |||
| bool GetIOSize(const nlohmann::json &node_json, std::vector<size_t> *const input_size, | |||
| std::vector<size_t> *const output_size); | |||
| void SetTensorName(const std::string &tag, const std::string &new_name, const std::pair<size_t, size_t> &position, | |||
| nlohmann::json *const node_json); | |||
| std::string GetTensorName(const nlohmann::json &node_json, const std::string &tag, | |||
| const std::pair<size_t, size_t> &position); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_AKGKERNELBUILD_H_ | |||
| @@ -0,0 +1,50 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/akg/akg_kernel_metadata.h" | |||
| #include <memory> | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "backend/kernel_compiler/oplib/oplib.h" | |||
| #include "backend/kernel_compiler/common_utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void AkgMetadataInfo(const CNodePtr &kernel_node, | |||
| std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_info_list); | |||
| std::string op_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| for (size_t i = 0; i < support_devices.size(); i++) { | |||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAKG); | |||
| if (op_info_ptr == nullptr) { | |||
| continue; | |||
| } | |||
| if (!ParseMetadata(kernel_node, op_info_ptr, Processor(i), kernel_info_list)) { | |||
| MS_LOG(WARNING) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "] failed."; | |||
| } else { | |||
| MS_LOG(DEBUG) << "Akg parsed metadata of op[" << op_name << "], device[" << support_devices[i] << "]."; | |||
| break; | |||
| } | |||
| } | |||
| if (kernel_info_list->empty()) { | |||
| MS_LOG(WARNING) << "Akg dose not has metadata of op[" << op_name << "]."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,31 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ | |||
| #include <string> | |||
| #include <vector> | |||
| #include <unordered_map> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/kernel_build_info.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void AkgMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_AKG_KERNEL_METADATA_H_ | |||
| @@ -0,0 +1,422 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h" | |||
| #include <algorithm> | |||
| #include <map> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <unordered_set> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include <Python.h> | |||
| #include "ir/dtype.h" | |||
| #include "ir/func_graph.h" | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| #include "backend/kernel_compiler/common_utils.h" | |||
| #include "backend/kernel_compiler/tbe/tbe_utils.h" | |||
| #include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h" | |||
| #include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr int32_t PARALLEL_ARGS_SIZE = 3; | |||
| constexpr int32_t PROCESS_NUM = 16; | |||
| constexpr int32_t TIME_OUT = 300; | |||
| constexpr auto kOpDesc = "op_desc"; | |||
| constexpr auto kShape = "shape"; | |||
| constexpr auto kDataType = "data_type"; | |||
| constexpr auto kInputDesc = "input_desc"; | |||
| constexpr auto kOutputDesc = "output_desc"; | |||
| constexpr auto kTensorName = "tensor_name"; | |||
| constexpr auto kCompileAkgKernelParallelFunc = "compile_akg_kernel_parallel"; | |||
| constexpr auto kMultiProcModule = "mindspore._extends.parallel_compile.akg_compiler.multi_process_compiler"; | |||
| namespace { | |||
| void UpdateTensorNameInJson(const std::vector<AnfNodePtr> &anf_nodes, | |||
| std::map<AnfNodePtr, nlohmann::json> *node_json_map) { | |||
| for (auto const &anf_node : anf_nodes) { | |||
| std::vector<int> dyn_input_sizes; | |||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) { | |||
| dyn_input_sizes = GetValue<const std::vector<int>>(primitive->GetAttr(kAttrDynInputSizes)); | |||
| } | |||
| bool is_dynamic_input = !dyn_input_sizes.empty(); | |||
| size_t input_num = is_dynamic_input ? dyn_input_sizes.size() : AnfAlgo::GetInputTensorNum(anf_node); | |||
| size_t real_input_index = 0; | |||
| for (size_t i = 0; i < input_num; ++i) { | |||
| size_t input_tensor_num = is_dynamic_input ? IntToSize(dyn_input_sizes[i]) : 1; | |||
| for (size_t j = 0; j < input_tensor_num; ++j) { | |||
| auto tmp_input = GetKernelInput(anf_node, real_input_index); | |||
| std::string tensor_name = GetTensorName((*node_json_map)[anf_node], kInputDesc, std::make_pair(i, j)); | |||
| if (node_json_map->find(tmp_input.first) != node_json_map->end()) { | |||
| std::string new_tensor_name = | |||
| GetTensorName((*node_json_map)[tmp_input.first], kOutputDesc, std::make_pair(0, tmp_input.second)); | |||
| SetTensorName(kInputDesc, new_tensor_name, std::make_pair(i, j), &((*node_json_map)[anf_node])); | |||
| MS_LOG(DEBUG) << "Update [" << real_input_index << "] input [" << tensor_name << "] of [" | |||
| << anf_node->fullname_with_scope() << "] to [" << tmp_input.second << "] output [" | |||
| << new_tensor_name << "] of [" << tmp_input.first->fullname_with_scope() << "]."; | |||
| } else { | |||
| MS_LOG(DEBUG) << "[" << real_input_index << "] input " << tensor_name << "] of [" | |||
| << anf_node->fullname_with_scope() << "] is out input."; | |||
| } | |||
| real_input_index++; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| nlohmann::json GetInputsJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list, | |||
| std::map<AnfNodePtr, nlohmann::json> *node_json_map) { | |||
| nlohmann::json inputs_json; | |||
| auto input_index = GetInputIndex(anf_nodes, input_list); | |||
| for (size_t i = 0; i < input_index.size(); ++i) { | |||
| auto tmp_input = input_index[i]; | |||
| auto type_id = AnfAlgo::GetInputDeviceDataType(tmp_input.first, tmp_input.second.first); | |||
| std::string dtype = TypeId2String(type_id); | |||
| nlohmann::json input_desc_json; | |||
| input_desc_json[kTensorName] = GetTensorName((*node_json_map)[tmp_input.first], kInputDesc, tmp_input.second); | |||
| input_desc_json[kDataType] = dtype; | |||
| input_desc_json[kShape] = AnfAlgo::GetInputDeviceShape(tmp_input.first, tmp_input.second.first); | |||
| inputs_json.emplace_back(std::vector<nlohmann::json>{input_desc_json}); | |||
| } | |||
| return inputs_json; | |||
| } | |||
| nlohmann::json GetOutputsJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list, | |||
| const std::vector<AnfNodePtr> &output_list, const nlohmann::json &inputs_json, | |||
| std::map<AnfNodePtr, nlohmann::json> *node_json_map) { | |||
| nlohmann::json outputs_json; | |||
| auto output_index = GetOutputIndex(anf_nodes, input_list, output_list); | |||
| for (size_t i = 0; i < output_index.size(); ++i) { | |||
| auto tmp_output = output_index[i]; | |||
| bool found = false; | |||
| nlohmann::json output_desc_json; | |||
| for (size_t input_i = 0; input_i < input_list.size(); ++input_i) { | |||
| if (tmp_output.first == input_list[input_i]) { | |||
| output_desc_json = inputs_json[input_i][0]; | |||
| found = true; | |||
| break; | |||
| } | |||
| } | |||
| if (!found) { | |||
| auto type_id = AnfAlgo::GetOutputDeviceDataType(tmp_output.first, tmp_output.second); | |||
| std::string dtype = TypeId2String(type_id); | |||
| output_desc_json[kTensorName] = | |||
| GetTensorName((*node_json_map)[tmp_output.first], kOutputDesc, std::make_pair(0, tmp_output.second)); | |||
| output_desc_json[kDataType] = dtype; | |||
| auto output_shape = AnfAlgo::GetOutputDeviceShape(tmp_output.first, tmp_output.second); | |||
| if (output_shape.empty()) { | |||
| output_shape.push_back(1); | |||
| } | |||
| output_desc_json[kShape] = output_shape; | |||
| } | |||
| outputs_json.emplace_back(output_desc_json); | |||
| } | |||
| return outputs_json; | |||
| } | |||
| std::pair<std::vector<std::string>, std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>>> PreProcessJsonForBuild( | |||
| const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args) { | |||
| // Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess. | |||
| std::vector<std::string> jsons; | |||
| std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> repeat_nodes; | |||
| std::unordered_set<std::string> json_name_set; | |||
| for (const auto &[builder, anf_node] : build_args) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| auto json_name = builder.json_name(); | |||
| MS_LOG(DEBUG) << "Akg start compile op: " << json_name; | |||
| auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); | |||
| if (cached_kernel_pack != nullptr) { | |||
| MS_LOG(DEBUG) << "Use cached kernel, json_name_[" << json_name << "], fullname_with_scope[" | |||
| << anf_node->fullname_with_scope() << "]."; | |||
| auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(cached_kernel_pack); | |||
| kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); | |||
| kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); | |||
| AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); | |||
| continue; | |||
| } | |||
| if (json_name_set.count(json_name) != 0) { | |||
| repeat_nodes.push_back({builder, anf_node}); | |||
| continue; | |||
| } | |||
| json_name_set.insert(json_name); | |||
| auto node_json = builder.kernel_json(); | |||
| kernel::SaveJsonInfo(json_name, node_json); | |||
| jsons.push_back(node_json); | |||
| } | |||
| return std::make_pair(jsons, repeat_nodes); | |||
| } | |||
| bool PostProcessAfterCompile(const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args, | |||
| const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &repeat_nodes) { | |||
| for (const auto &[builder, anf_node] : build_args) { | |||
| auto json_name = builder.json_name(); | |||
| auto new_kernel_pack = tbe::TbeUtils::InsertCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); | |||
| if (new_kernel_pack == nullptr) { | |||
| MS_LOG(ERROR) << "Insert to cache failed, json_name_[" << json_name << "], fullname_with_scope[" | |||
| << anf_node->fullname_with_scope() << "]."; | |||
| return false; | |||
| } | |||
| auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(new_kernel_pack); | |||
| kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); | |||
| kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); | |||
| AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); | |||
| MS_LOG(DEBUG) << "Akg compile " << json_name << " kernel and insert cache successfully!"; | |||
| } | |||
| for (const auto &[builder, anf_node] : repeat_nodes) { | |||
| auto node_json = builder.kernel_json(); | |||
| auto json_name = builder.json_name(); | |||
| auto cached_kernel_pack = tbe::TbeUtils::SearchCache(json_name, AkgKernelBuild::GetProcessor(anf_node)); | |||
| if (cached_kernel_pack == nullptr) { | |||
| return false; | |||
| } | |||
| MS_LOG(INFO) << "Use just compiled kernel, json_name_[" << json_name << "], fullname_with_scope[" | |||
| << anf_node->fullname_with_scope() << "]."; | |||
| auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(cached_kernel_pack); | |||
| kernel_mod_ptr->SetInputSizeList(builder.input_size_list()); | |||
| kernel_mod_ptr->SetOutputSizeList(builder.output_size_list()); | |||
| AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace | |||
| bool AkgAscendKernelBuilder::CollectJson(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||
| MS_LOG(INFO) << "AKG start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; | |||
| auto it = kAkgKernelAttrsProcessMap.find(op_name); | |||
| if (it != kAkgKernelAttrsProcessMap.end()) { | |||
| it->second(anf_node); | |||
| } | |||
| MS_LOG(INFO) << "Akg start compile, op[" << op_name << "], device[" << AkgKernelBuild::GetProcessor(anf_node) << "]"; | |||
| nlohmann::json node_json; | |||
| if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { | |||
| MS_LOG(ERROR) << "Op[" << op_name << "] create single kernel json failed."; | |||
| } | |||
| kernel_json_ = node_json.dump(); | |||
| if (!GetIOSize(node_json, &input_size_list_, &output_size_list_)) { | |||
| MS_LOG(ERROR) << "Cal mem size failed."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| bool AkgAscendKernelBuilder::GenJsonAndPreprocess4Fused(const std::vector<AnfNodePtr> &anf_nodes, | |||
| std::map<AnfNodePtr, nlohmann::json> *node_json_map) { | |||
| for (auto const &anf_node : anf_nodes) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||
| if (!AnfAlgo::IsRealKernel(anf_node)) { | |||
| MS_LOG(ERROR) << "Invalid anf node to build [" << anf_node->fullname_with_scope() << "]."; | |||
| return false; | |||
| } | |||
| auto it = kAkgKernelAttrsProcessMap.find(op_name); | |||
| if (it != kAkgKernelAttrsProcessMap.end()) { | |||
| it->second(anf_node); | |||
| } | |||
| nlohmann::json node_json; | |||
| if (!GenerateSingleKernelJson(anf_node, op_name, &node_json)) { | |||
| MS_LOG(ERROR) << "Op [" << op_name << "] create single kernel json failed."; | |||
| return false; | |||
| } | |||
| // No need for composite op. | |||
| node_json.erase("id"); | |||
| node_json.erase("op"); | |||
| node_json.erase("composite"); | |||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| if (primitive->GetAttr("fusion") != nullptr) { | |||
| node_json["fusion"] = primitive->GetAttr("fusion")->ToString(); | |||
| } | |||
| (*node_json_map)[anf_node] = node_json; | |||
| } | |||
| return true; | |||
| } | |||
| bool AkgAscendKernelBuilder::CollectFusedJson(const std::vector<AnfNodePtr> &anf_nodes, | |||
| const std::vector<AnfNodePtr> &input_list, | |||
| const std::vector<AnfNodePtr> &output_list) { | |||
| if (anf_nodes.empty() || input_list.empty()) { | |||
| MS_LOG(ERROR) << "Invalid input size, anf_nodes [" << anf_nodes.size() << "], input_list [" << input_list.size() | |||
| << "]."; | |||
| return false; | |||
| } | |||
| MS_LOG(INFO) << "anf_nodes [" << output_list.size() << "], input_list [" << anf_nodes.size() << "], output_list [" | |||
| << input_list.size() << "]."; | |||
| std::map<AnfNodePtr, nlohmann::json> node_json_map; | |||
| if (!GenJsonAndPreprocess4Fused(anf_nodes, &node_json_map)) { | |||
| return false; | |||
| } | |||
| UpdateTensorNameInJson(anf_nodes, &node_json_map); | |||
| nlohmann::json fused_node_json; | |||
| std::vector<nlohmann::json> node_json_desc; | |||
| std::transform(anf_nodes.begin(), anf_nodes.end(), std::back_inserter(node_json_desc), | |||
| [&node_json_map](const AnfNodePtr &anf_node) { return node_json_map[anf_node]; }); | |||
| fused_node_json[kOpDesc] = node_json_desc; | |||
| fused_node_json[kInputDesc] = GetInputsJson(anf_nodes, input_list, &node_json_map); | |||
| fused_node_json[kOutputDesc] = | |||
| GetOutputsJson(anf_nodes, input_list, output_list, fused_node_json[kInputDesc], &node_json_map); | |||
| size_t hash_id = std::hash<std::string>()(fused_node_json.dump()); | |||
| json_name_ = "Fused_"; | |||
| auto fg = anf_nodes[0]->func_graph(); | |||
| MS_EXCEPTION_IF_NULL(fg); | |||
| auto attr_val = fg->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL); | |||
| if (attr_val != nullptr) { | |||
| auto fg_attr = GetValue<std::string>(attr_val); | |||
| (void)json_name_.append(fg_attr).append("_"); | |||
| } | |||
| (void)json_name_.append(std::to_string(hash_id)); | |||
| fused_node_json["composite_graph"] = fg->ToString(); | |||
| fused_node_json["op"] = json_name_; | |||
| fused_node_json["platform"] = "AKG"; | |||
| fused_node_json["process"] = "aicore"; | |||
| fused_node_json["composite"] = true; | |||
| kernel_json_ = fused_node_json.dump(); | |||
| if (!GetIOSize(fused_node_json, &input_size_list_, &output_size_list_)) { | |||
| MS_LOG(ERROR) << "Cal mem size failed."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| void GenParallelCompileFuncArgs(const std::vector<std::string> &kernel_jsons, PyObject **p_args) { | |||
| MS_EXCEPTION_IF_NULL(p_args); | |||
| *p_args = PyTuple_New(PARALLEL_ARGS_SIZE); | |||
| PyObject *arg1 = PyList_New(kernel_jsons.size()); | |||
| for (int i = 0; i < PyList_Size(arg1); ++i) { | |||
| PyList_SetItem(arg1, i, Py_BuildValue("s", kernel_jsons[i].c_str())); | |||
| } | |||
| PyObject *arg2 = Py_BuildValue("i", PROCESS_NUM); | |||
| PyObject *arg3 = Py_BuildValue("i", TIME_OUT); | |||
| (void)PyTuple_SetItem(*p_args, 0, arg1); | |||
| (void)PyTuple_SetItem(*p_args, 1, arg2); | |||
| (void)PyTuple_SetItem(*p_args, 2, arg3); | |||
| } | |||
| bool AkgOpParallelBuild(const std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> &build_args) { | |||
| auto [jsons, repeat_nodes] = PreProcessJsonForBuild(build_args); | |||
| if (jsons.empty()) { | |||
| return true; | |||
| } | |||
| // Try to call python method to compile nodes parallely. | |||
| PyObject *p_module = nullptr; | |||
| PyObject *p_func = nullptr; | |||
| PyObject *p_arg = nullptr; | |||
| PyObject *p_res = nullptr; | |||
| p_module = PyImport_ImportModule(kMultiProcModule); | |||
| if (p_module == nullptr) { | |||
| MS_LOG(ERROR) << "Failed to import [" << kMultiProcModule << "]."; | |||
| return false; | |||
| } | |||
| p_func = PyObject_GetAttrString(p_module, kCompileAkgKernelParallelFunc); | |||
| GenParallelCompileFuncArgs(jsons, &p_arg); | |||
| MS_LOG(DEBUG) << "Call function [" << kCompileAkgKernelParallelFunc << "], try to compile " << jsons.size() | |||
| << " Akg kernels parallelly."; | |||
| p_res = PyEval_CallObject(p_func, p_arg); | |||
| if (p_res == nullptr) { | |||
| PyErr_Print(); | |||
| MS_LOG(ERROR) << "No ret got, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n(" | |||
| << AkgKernelBuild::PyObjectToStr(p_arg) << ")."; | |||
| return false; | |||
| } | |||
| if (PyObject_IsTrue(p_res) != 1) { | |||
| PyErr_Print(); | |||
| MS_LOG(ERROR) << "Illegal ret, failed to call function [" << kCompileAkgKernelParallelFunc << "], args:\n(" | |||
| << AkgKernelBuild::PyObjectToStr(p_arg) << ")."; | |||
| return false; | |||
| } | |||
| if (!PostProcessAfterCompile(build_args, repeat_nodes)) { | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| bool AkgAscendKernelParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | |||
| std::vector<std::pair<AkgAscendKernelBuilder, AnfNodePtr>> json_and_node; | |||
| for (const auto &anf_node : anf_nodes) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| AkgAscendKernelBuilder akg_cce_kernel_builder; | |||
| KernelPackPtr kernel_pack = nullptr; | |||
| auto cnode = anf_node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::IsGraphKernel(cnode)) { | |||
| auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(cnode); | |||
| auto mng = func_graph->manager(); | |||
| if (mng == nullptr) { | |||
| mng = Manage(func_graph, true); | |||
| func_graph->set_manager(mng); | |||
| } | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| std::vector<AnfNodePtr> node_list; | |||
| std::vector<AnfNodePtr> input_list; | |||
| std::vector<AnfNodePtr> output_list; | |||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||
| MS_LOG(INFO) << "Akg start compile composite op[" << op_name << "]"; | |||
| GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list); | |||
| if (!akg_cce_kernel_builder.CollectFusedJson(node_list, input_list, output_list)) { | |||
| MS_EXCEPTION(UnknownError) << "Akg build failed composite op[" << op_name << "]."; | |||
| } | |||
| } else { | |||
| if (!akg_cce_kernel_builder.CollectJson(anf_node)) { | |||
| MS_EXCEPTION(UnknownError) << "Akg build failed op[" << AnfAlgo::GetCNodeName(anf_node) << "]."; | |||
| } | |||
| } | |||
| json_and_node.push_back({akg_cce_kernel_builder, anf_node}); | |||
| } | |||
| if (json_and_node.empty()) { | |||
| MS_LOG(DEBUG) << "There is no kernel needed to be compiled."; | |||
| return true; | |||
| } | |||
| return AkgOpParallelBuild(json_and_node); | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,56 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ | |||
| #include <string> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <map> | |||
| #include "ir/anf.h" | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| #include "backend/kernel_compiler/akg/akg_kernel_build.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class AkgAscendKernelBuilder : public AkgKernelBuild { | |||
| public: | |||
| AkgAscendKernelBuilder() = default; | |||
| ~AkgAscendKernelBuilder() = default; | |||
| bool CollectJson(const AnfNodePtr &anf_node); | |||
| bool CollectFusedJson(const std::vector<AnfNodePtr> &anf_nodes, const std::vector<AnfNodePtr> &input_list, | |||
| const std::vector<AnfNodePtr> &output_list); | |||
| std::string json_name() const { return json_name_; } | |||
| std::string kernel_json() const { return kernel_json_; } | |||
| const std::vector<size_t> &input_size_list() const { return input_size_list_; } | |||
| const std::vector<size_t> &output_size_list() const { return output_size_list_; } | |||
| private: | |||
| bool GenJsonAndPreprocess4Fused(const std::vector<AnfNodePtr> &anf_nodes, | |||
| std::map<AnfNodePtr, nlohmann::json> *node_json_map); | |||
| std::string kernel_json_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| }; | |||
| bool AkgAscendKernelParallelBuild(const std::vector<AnfNodePtr> &anf_nodes); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_BUILD_H_ | |||
| @@ -0,0 +1,132 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h" | |||
| #include <algorithm> | |||
| #include <fstream> | |||
| #include <map> | |||
| #include <memory> | |||
| #include <mutex> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| #include "nlohmann/json.hpp" | |||
| #include "runtime/rt.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "utils/convert_utils.h" | |||
| #include "utils/context/ms_context.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| using std::fstream; | |||
| using std::map; | |||
| using std::mutex; | |||
| using std::string; | |||
| using TbeTaskInfoPtr = std::shared_ptr<ge::model_runner::TbeTaskInfo>; | |||
| using tbe::KernelManager; | |||
| constexpr uint32_t DEFAULT_BLOCK_DIM = 1; | |||
| /** | |||
| * @brief infotable contain func_stub\blockdim\kernel file buffer | |||
| */ | |||
| AkgKernelMod::AkgKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {} | |||
| void AkgKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; } | |||
| void AkgKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; } | |||
| void AkgKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; } | |||
| const std::vector<size_t> &AkgKernelMod::GetInputSizeList() const { return input_size_list_; } | |||
| const std::vector<size_t> &AkgKernelMod::GetOutputSizeList() const { return output_size_list_; } | |||
| const std::vector<size_t> &AkgKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } | |||
| bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) { | |||
| if (stream_ptr == nullptr) { | |||
| MS_LOG(ERROR) << "stream_ptr should not be nullptr."; | |||
| return false; | |||
| } | |||
| if (kernel_pack_ == nullptr) { | |||
| MS_LOG(ERROR) << "kernel pack should not be nullptr."; | |||
| return false; | |||
| } | |||
| uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1. | |||
| auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); | |||
| if (func_stub == 0) { | |||
| MS_LOG(ERROR) << "GenFuncStub failed."; | |||
| return false; | |||
| } | |||
| // pack all addresses into a vector. | |||
| std::vector<void *> runtime_args; | |||
| (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtime_args), | |||
| [](const AddressPtr &input) -> void * { return input->addr; }); | |||
| (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtime_args), | |||
| [](const AddressPtr &output) -> void * { return output->addr; }); | |||
| rtL2Ctrl_t *l2ctrl = nullptr; | |||
| auto stream = reinterpret_cast<rtStream_t *>(stream_ptr); | |||
| if (RT_ERROR_NONE != rtKernelLaunch(reinterpret_cast<void *>(func_stub), block_dim, runtime_args.data(), | |||
| SizeToUint(sizeof(void *) * runtime_args.size()), l2ctrl, stream)) { | |||
| MS_LOG(ERROR) << "Call runtime rtKernelLaunch error."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, uint32_t stream_id) { | |||
| if (kernel_pack_ == nullptr) { | |||
| MS_LOG(EXCEPTION) << "kernel pack should not be nullptr."; | |||
| } | |||
| std::vector<uint8_t> args; | |||
| const uint32_t args_size = 0; | |||
| std::vector<uint8_t> sm_desc; | |||
| void *binary = nullptr; | |||
| const uint32_t binary_size = 0; | |||
| std::vector<uint8_t> meta_data; | |||
| std::vector<void *> input_data_addrs; | |||
| std::vector<void *> output_data_addrs; | |||
| std::vector<void *> workspace_addrs; | |||
| // pack all addresses into a vector. | |||
| (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs), | |||
| [](const AddressPtr &input) -> void * { return input->addr; }); | |||
| (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs), | |||
| [](const AddressPtr &output) -> void * { return output->addr; }); | |||
| uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1. | |||
| auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); | |||
| if (func_stub == 0) { | |||
| MS_LOG(EXCEPTION) << "GenFuncStub failed."; | |||
| } | |||
| std::string stub_func = KernelManager::GetStubFuncName(kernel_pack_); | |||
| MS_LOG(DEBUG) << "The block_dim is:" << block_dim; | |||
| TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>( | |||
| kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, | |||
| input_data_addrs, output_data_addrs, workspace_addrs, NeedDump()); | |||
| return {task_info_ptr}; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,54 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ | |||
| #include <string> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/ascend_kernel_mod.h" | |||
| #include "backend/kernel_compiler/tbe/tbe_utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class AkgKernelMod : public AscendKernelMod { | |||
| public: | |||
| explicit AkgKernelMod(const KernelPackPtr &kernel_pack); | |||
| ~AkgKernelMod() final {} | |||
| void SetInputSizeList(const std::vector<size_t> &size_list); | |||
| void SetOutputSizeList(const std::vector<size_t> &size_list); | |||
| void SetWorkspaceSizeList(const std::vector<size_t> &size_list); | |||
| const std::vector<size_t> &GetInputSizeList() const override; | |||
| const std::vector<size_t> &GetOutputSizeList() const override; | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override; | |||
| std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, uint32_t stream_id) override; | |||
| private: | |||
| KernelPackPtr kernel_pack_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| }; | |||
| using AkgKernelModPtr = std::shared_ptr<AkgKernelMod>; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_ASCEND_AKG_ASCEND_KERNEL_MOD_H_ | |||
| @@ -0,0 +1,43 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h" | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| #include "backend/kernel_compiler/akg/akg_kernel_build.h" | |||
| #include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h" | |||
| #include "common/utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| KernelModPtr AkgGpuKernelBuild(const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| AkgKernelBuild akg_kernel_build; | |||
| std::vector<size_t> input_size_list; | |||
| std::vector<size_t> output_size_list; | |||
| KernelPackPtr kernel_pack = akg_kernel_build.BuildByJson(anf_node, &input_size_list, &output_size_list); | |||
| MS_EXCEPTION_IF_NULL(kernel_pack); | |||
| auto kernel_mod_ptr = std::make_shared<GpuKernelMod>(kernel_pack); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod_ptr); | |||
| kernel_mod_ptr->SetInputSizeList(input_size_list); | |||
| kernel_mod_ptr->SetOutputSizeList(output_size_list); | |||
| return kernel_mod_ptr; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,28 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_ | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| #include "base/base.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| KernelModPtr AkgGpuKernelBuild(const AnfNodePtr &anf_node); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_ | |||
| @@ -0,0 +1,116 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h" | |||
| #include <fstream> | |||
| #include <algorithm> | |||
| #include "nlohmann/json.hpp" | |||
| #include "common/utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| using std::fstream; | |||
| using std::string; | |||
| using std::vector; | |||
| GpuKernelManagerPtr GpuKernelMod::kernelmanager_ = std::make_shared<GpuKernelManager>(); | |||
| GpuKernelManager::GpuKernelManager() {} | |||
| CUresult GpuKernelManager::GetFunction(const KernelPackPtr &kernel_pack, bool force_reload, | |||
| vector<uint32_t> *thread_info, CUfunction *func) { | |||
| if (kernel_pack->GetJson() == nullptr || kernel_pack->GetJson()->contents == nullptr || | |||
| kernel_pack->GetKernel() == nullptr || kernel_pack->GetKernel()->contents == nullptr) { | |||
| MS_LOG(ERROR) << "GPU:Invalid kernel pack, json or kernel is nullptr."; | |||
| return CUDA_ERROR_INVALID_IMAGE; | |||
| } | |||
| auto js = nlohmann::json::parse(kernel_pack->GetJson()->contents, | |||
| kernel_pack->GetJson()->contents + kernel_pack->GetJson()->len); | |||
| string fn = js["kernelName"]; | |||
| if (!force_reload) { | |||
| auto iter = infotable_.find(fn); | |||
| if (iter != infotable_.end()) { | |||
| auto kernelmeta = iter->second; | |||
| *thread_info = kernelmeta->thread_info_; | |||
| *func = kernelmeta->func_addr_; | |||
| return CUDA_SUCCESS; | |||
| } | |||
| } | |||
| thread_info->emplace_back(js["blockIdx.x"]); | |||
| thread_info->emplace_back(js["blockIdx.y"]); | |||
| thread_info->emplace_back(js["blockIdx.z"]); | |||
| thread_info->emplace_back(js["threadIdx.x"]); | |||
| thread_info->emplace_back(js["threadIdx.y"]); | |||
| thread_info->emplace_back(js["threadIdx.z"]); | |||
| CUmodule module; | |||
| CUresult result = cuModuleLoadData(&module, kernel_pack->GetKernel()->contents); | |||
| if (result != CUDA_SUCCESS) { | |||
| MS_LOG(ERROR) << "cuModuleLoadData failed."; | |||
| return result; | |||
| } | |||
| result = cuModuleGetFunction(func, module, fn.c_str()); | |||
| if (result != CUDA_SUCCESS) { | |||
| MS_LOG(ERROR) << "cuModuleGetFunction failed."; | |||
| return result; | |||
| } | |||
| infotable_[fn] = std::make_shared<GpuKernelMeta>(*func, module, *thread_info); | |||
| return result; | |||
| } | |||
| GpuKernelMod::GpuKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {} | |||
| void GpuKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; } | |||
| void GpuKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; } | |||
| const std::vector<size_t> &GpuKernelMod::GetInputSizeList() const { return input_size_list_; } | |||
| const std::vector<size_t> &GpuKernelMod::GetOutputSizeList() const { return output_size_list_; } | |||
| const std::vector<size_t> &GpuKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } | |||
| bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) { | |||
| if (stream_ptr == 0) { | |||
| MS_LOG(ERROR) << "stream_ptr should not be nullptr."; | |||
| return false; | |||
| } | |||
| if (kernel_pack_ == nullptr) { | |||
| MS_LOG(ERROR) << "kernel pack should not be nullptr."; | |||
| return false; | |||
| } | |||
| vector<uint32_t> thread_info; | |||
| CUfunction kernel_addr; | |||
| CUresult result = kernelmanager_->GetFunction(kernel_pack_, false, &thread_info, &kernel_addr); | |||
| if (result != CUDA_SUCCESS) { | |||
| MS_LOG(ERROR) << "GetFunction failed."; | |||
| return false; | |||
| } | |||
| std::vector<void *> runtimeargs; | |||
| (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtimeargs), | |||
| [](const AddressPtr &input) -> void * { return reinterpret_cast<void *>(&(input->addr)); }); | |||
| (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs), | |||
| [](const AddressPtr &output) -> void * { return reinterpret_cast<void *>(&(output->addr)); }); | |||
| result = cuLaunchKernel(kernel_addr, thread_info[0], thread_info[1], thread_info[2], thread_info[3], thread_info[4], | |||
| thread_info[5], 0, reinterpret_cast<CUstream>(stream_ptr), | |||
| reinterpret_cast<void **>(&runtimeargs[0]), 0); | |||
| if (result != CUDA_SUCCESS) { | |||
| MS_LOG(ERROR) << "Launch Kernel failed."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,82 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_ | |||
| #include <cuda.h> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <unordered_map> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| struct GpuKernelMeta { | |||
| CUfunction func_addr_; | |||
| CUmodule module_; | |||
| std::vector<uint32_t> thread_info_; | |||
| GpuKernelMeta(CUfunction funcAddr, CUmodule module, const std::vector<uint32_t> &thread_info) | |||
| : func_addr_(funcAddr), module_(module), thread_info_(thread_info) {} | |||
| }; | |||
| using GpuKernelMetaPtr = std::shared_ptr<GpuKernelMeta>; | |||
| class GpuKernelManager { | |||
| public: | |||
| GpuKernelManager(); | |||
| virtual ~GpuKernelManager() { | |||
| for (auto iter = infotable_.begin(); iter != infotable_.end(); ++iter) { | |||
| CUresult ret = cuModuleUnload(iter->second->module_); | |||
| if (ret != CUDA_SUCCESS && ret != CUDA_ERROR_DEINITIALIZED) { | |||
| MS_LOG(ERROR) << "Unload GPU Module failed."; | |||
| } | |||
| } | |||
| } | |||
| CUresult GetFunction(const KernelPackPtr &kernel_pack, bool force_reload, std::vector<uint32_t> *thread_info, | |||
| CUfunction *func); | |||
| private: | |||
| std::unordered_map<std::string, GpuKernelMetaPtr> infotable_; | |||
| }; | |||
| using GpuKernelManagerPtr = std::shared_ptr<GpuKernelManager>; | |||
| class GpuKernelMod : public KernelMod { | |||
| public: | |||
| explicit GpuKernelMod(const KernelPackPtr &kernel_pack); | |||
| virtual ~GpuKernelMod() {} | |||
| void SetInputSizeList(const std::vector<size_t> &size_list); | |||
| void SetOutputSizeList(const std::vector<size_t> &size_list); | |||
| const std::vector<size_t> &GetInputSizeList() const override; | |||
| const std::vector<size_t> &GetOutputSizeList() const override; | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override; | |||
| static GpuKernelManagerPtr kernelmanager_; | |||
| private: | |||
| KernelPackPtr kernel_pack_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| }; | |||
| using GpuKernelModPtr = std::shared_ptr<GpuKernelMod>; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_MOD_H_ | |||
| @@ -0,0 +1,52 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "framework/ge_runtime/task_info.h" | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| #ifdef ENABLE_DATA_DUMP | |||
| #include "debug/data_dump_parser.h" | |||
| #endif | |||
| using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>; | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class AscendKernelMod : public KernelMod { | |||
| public: | |||
| virtual std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &, uint32_t) = 0; | |||
| uint32_t block_dim() { return block_dim_; } | |||
| uint32_t stream_id() { return stream_id_; } | |||
| virtual bool NeedDump() { | |||
| #ifdef ENABLE_DATA_DUMP | |||
| return DataDumpParser::GetInstance().NeedDump(kernel_name_); | |||
| #else | |||
| return false; | |||
| #endif | |||
| } | |||
| protected: | |||
| uint32_t block_dim_{1}; | |||
| uint32_t stream_id_{0}; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_ | |||
| @@ -0,0 +1,145 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_ | |||
| #include <dirent.h> | |||
| #include <memory> | |||
| #include <unordered_map> | |||
| #include <unordered_set> | |||
| #include <map> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <utility> | |||
| #include <nlohmann/json.hpp> | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| #include "backend/kernel_compiler/oplib/opinfo.h" | |||
| #include "backend/kernel_compiler/kernel_build_info.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr auto kCceKernelMeta = "./kernel_meta/"; | |||
| constexpr auto kGpuKernelMeta = "./cuda_meta"; | |||
| constexpr auto kProcessorAiCore = "aicore"; | |||
| constexpr auto kProcessorAiCpu = "aicpu"; | |||
| constexpr auto kProcessorCuda = "cuda"; | |||
| constexpr auto kJsonSuffix = ".json"; | |||
| constexpr auto kInfoSuffix = ".info"; | |||
| constexpr unsigned int AUTODIFF_COMPILE_OVERTIME = 600; | |||
| constexpr auto kAkgModule = "_akg"; | |||
| constexpr auto kArgDataformat = "data_format"; | |||
| const std::vector<std::string> support_devices = {"aicore", "aicpu", "cuda"}; | |||
| struct KernelMetaInfo { | |||
| uintptr_t func_stub_; | |||
| uint32_t block_dim_; | |||
| }; | |||
| using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>; | |||
| class KernelMeta { | |||
| public: | |||
| KernelMeta() = default; | |||
| void Initialize(); | |||
| void RemoveKernelCache(); | |||
| std::string Search(const std::string &kernel_name) const; | |||
| bool Insert(const std::string &kernel_name, const std::string &kernel_json); | |||
| std::string GetKernelMetaPath() { return kernel_meta_path_; } | |||
| static KernelMeta *GetInstance() { | |||
| static KernelMeta kernel_meta; | |||
| return &kernel_meta; | |||
| } | |||
| ~KernelMeta() = default; | |||
| private: | |||
| bool initialized_ = false; | |||
| std::string kernel_meta_path_; | |||
| std::unordered_map<std::string, std::string> kernel_meta_map_; | |||
| }; | |||
| struct SparseGradient { | |||
| float *value_; | |||
| int *indices_; | |||
| size_t indices_size_; | |||
| }; | |||
| struct MultiThreadComputeParams { | |||
| float *var_; | |||
| float *accum_; | |||
| float *linear_; | |||
| float *m_; | |||
| float *m_t_; | |||
| float *v_; | |||
| float lr_; | |||
| float l1_; | |||
| float l2_; | |||
| float lr_power_; | |||
| float beta1_; | |||
| float beta2_; | |||
| float epsilon_; | |||
| SparseGradient sparse_grad_; | |||
| size_t var_first_dim_size_; | |||
| size_t var_outer_dim_size_; | |||
| bool use_nesterov_; | |||
| }; | |||
| using MultiThreadComputeFunc = std::function<void(MultiThreadComputeParams *param, size_t start, size_t end)>; | |||
| bool CheckCache(const std::string &kernel_name); | |||
| KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor); | |||
| KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor); | |||
| TypeId DtypeToTypeId(const std::string &dtypes); | |||
| std::string Dtype2ShortType(const std::string &dtypes); | |||
| std::string TypeId2String(TypeId type_id); | |||
| size_t GetDtypeNbyte(const std::string &dtypes); | |||
| bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpInfo> &op_info_ptr, Processor processor, | |||
| std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list); | |||
| void SaveJsonInfo(const std::string &json_name, const std::string &info); | |||
| std::string GetProcessor(const AnfNodePtr &anf_node); | |||
| bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b); | |||
| int Sign(float x); | |||
| void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim, | |||
| size_t outer_dim); | |||
| void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim, | |||
| size_t outer_dim, bool use_multi_threads = true); | |||
| std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index); | |||
| std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list, | |||
| const std::vector<AnfNodePtr> &input_list); | |||
| std::vector<std::pair<AnfNodePtr, size_t>> GetOutputIndex(const std::vector<AnfNodePtr> &node_list, | |||
| const std::vector<AnfNodePtr> &input_list, | |||
| const std::vector<AnfNodePtr> &output_list); | |||
| void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list, | |||
| std::vector<AnfNodePtr> *input_list, std::vector<AnfNodePtr> *output_list); | |||
| void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list); | |||
| bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json); | |||
| void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list); | |||
| bool IsWeightBoundary(const AnfNodePtr &node); | |||
| void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, | |||
| size_t total_compute_size); | |||
| void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, | |||
| size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices, | |||
| std::vector<size_t> *slice_positions); | |||
| void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads, | |||
| SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim, | |||
| size_t outer_dim); | |||
| void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad, | |||
| SparseGradient *unique_grad, size_t first_dim, size_t outer_dim); | |||
| std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_COMMON_UTILS_H_ | |||
| @@ -0,0 +1,65 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/addn_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void AddNCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| input_num_ = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| CPUKernelUtils::ExpandDimsTo4(&output_shape_); | |||
| } | |||
| bool AddNCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| size_t offset = 0; | |||
| for (size_t i = 0; i < output_shape_[0]; ++i) { | |||
| for (size_t j = 0; j < output_shape_[1]; ++j) { | |||
| for (size_t k = 0; k < output_shape_[2]; ++k) { | |||
| for (size_t m = 0; m < output_shape_[3]; ++m) { | |||
| float sum = 0; | |||
| for (size_t index = 0; index < input_num_; ++index) { | |||
| auto input_addr = reinterpret_cast<float *>(inputs[index]->addr); | |||
| sum += input_addr[offset]; | |||
| } | |||
| output_addr[offset++] = sum; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| void AddNCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (input_shape.size() > 4) { | |||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but AddNCPUKernel olny support 4d or lower."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but AddNCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,48 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class AddNCPUKernel : public CPUKernel { | |||
| public: | |||
| AddNCPUKernel() : input_num_(0) {} | |||
| ~AddNCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| size_t input_num_; | |||
| std::vector<size_t> output_shape_; | |||
| }; | |||
| MS_REG_CPU_KERNEL(AddN, | |||
| KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| AddNCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,53 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/allgather_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "runtime/device/cpu/mpi/mpi_adapter.h" | |||
| #include "utils/log_adapter.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr auto kRanksGroup = "group"; | |||
| constexpr auto kAllGatherInputNum = 1; | |||
| } // namespace | |||
| void AllGatherCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != kAllGatherInputNum) { | |||
| MS_LOG(EXCEPTION) << "allgather input num:" << input_num; | |||
| } | |||
| auto ranks_group = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kRanksGroup); | |||
| if (ranks_group != nullptr) { | |||
| ranks_group_ = GetValue<std::vector<int>>(ranks_group); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Miss attribute " << kRanksGroup; | |||
| } | |||
| } | |||
| bool AllGatherCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto input_data_num = inputs[0]->size / sizeof(float); | |||
| auto mpi_instance = device::cpu::MPIAdapter::Instance(); | |||
| MS_EXCEPTION_IF_NULL(mpi_instance); | |||
| return mpi_instance->AllGather(input_addr, output_addr, ranks_group_, input_data_num); | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,44 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class AllGatherCPUKernel : public CPUKernel { | |||
| public: | |||
| AllGatherCPUKernel() = default; | |||
| ~AllGatherCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| std::vector<int> ranks_group_; | |||
| }; | |||
| MS_REG_CPU_KERNEL(_HostAllGather, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| AllGatherCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,47 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "common/utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void ApplyMomentumCPUKernel::InitKernel(const CNodePtr & /*kernel_node*/) {} | |||
| bool ApplyMomentumCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> & /*outputs*/) { | |||
| if (inputs.size() < 5) { | |||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||
| } | |||
| if (inputs[0]->size != inputs[1]->size || inputs[0]->size != inputs[3]->size) { | |||
| MS_LOG(EXCEPTION) << "error input data size!"; | |||
| } | |||
| auto weight = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto accumulate = reinterpret_cast<float *>(inputs[1]->addr); | |||
| float learning_rate = reinterpret_cast<float *>(inputs[2]->addr)[0]; | |||
| auto gradient = reinterpret_cast<float *>(inputs[3]->addr); | |||
| float moment = reinterpret_cast<float *>(inputs[4]->addr)[0]; | |||
| size_t elem_num = inputs[0]->size / sizeof(float); | |||
| for (size_t i = 0; i < elem_num; ++i) { | |||
| accumulate[i] = accumulate[i] * moment + gradient[i]; | |||
| weight[i] -= accumulate[i] * learning_rate; | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,58 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class ApplyMomentumCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| ApplyMomentumCPUKernel() = default; | |||
| ~ApplyMomentumCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL(ApplyMomentum, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| ApplyMomentumCPUKernel); | |||
| MS_REG_CPU_KERNEL(ApplyMomentum, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| ApplyMomentumCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,67 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/argmax_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void ArgmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (shape.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "argmax kernel dims invalid " << shape.size(); | |||
| } | |||
| batch_size_ = shape[0]; | |||
| class_num_ = shape[1]; | |||
| int axis = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS); | |||
| if (axis != -1 && axis != 1) { | |||
| MS_LOG(EXCEPTION) << "argmax kernel not support axis " << axis; | |||
| } | |||
| } | |||
| bool ArgmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspaces*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.empty() || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "input or output empty!"; | |||
| } | |||
| size_t batch_float_size = batch_size_ * sizeof(float); | |||
| size_t batch_class_float_size = class_num_ * batch_float_size; | |||
| if (inputs[0]->size != batch_class_float_size || outputs[0]->size != batch_float_size) { | |||
| MS_LOG(EXCEPTION) << "invalid input or output data size!"; | |||
| } | |||
| auto input = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto output = reinterpret_cast<int *>(outputs[0]->addr); | |||
| size_t row_start = 0; | |||
| for (size_t i = 0; i < batch_size_; ++i) { | |||
| size_t max_index = 0; | |||
| float max_value = input[row_start]; | |||
| for (size_t j = 1; j < class_num_; ++j) { | |||
| size_t index = row_start + j; | |||
| if (input[index] > max_value) { | |||
| max_value = input[index]; | |||
| max_index = j; | |||
| } | |||
| } | |||
| output[i] = SizeToInt(max_index); | |||
| row_start += class_num_; | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,45 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class ArgmaxCPUKernel : public CPUKernel { | |||
| public: | |||
| ArgmaxCPUKernel() = default; | |||
| ~ArgmaxCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| size_t class_num_{0}; | |||
| size_t batch_size_{0}; | |||
| }; | |||
| MS_REG_CPU_KERNEL(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32), | |||
| ArgmaxCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,82 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/bias_add_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void BiasAddCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| bias_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| if (input_shape_.size() == 4) { | |||
| data_shape_ = 4; | |||
| } else if (input_shape_.size() == 2) { | |||
| data_shape_ = 2; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "bias add input data format should be NCHW or NC"; | |||
| } | |||
| if (input_shape_.size() != 2 && input_shape_.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "bias add input shape nchw or nc"; | |||
| } | |||
| if (bias_shape_.size() != 1) { | |||
| MS_LOG(EXCEPTION) << "bias shape invalid"; | |||
| } | |||
| if (input_shape_[1] != bias_shape_[0]) { | |||
| MS_LOG(EXCEPTION) << "bias shape not match"; | |||
| } | |||
| } | |||
| bool BiasAddCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| if (inputs.size() != 2 || outputs.size() != 1) { | |||
| MS_LOG(EXCEPTION) << "inputs outputs size not supoort"; | |||
| } | |||
| auto src_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto bias_addr = reinterpret_cast<float *>(inputs[1]->addr); | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| if (data_shape_ == 4) { | |||
| size_t h_size = input_shape_[3]; | |||
| size_t c_size = input_shape_[2] * h_size; | |||
| size_t n_size = input_shape_[1] * c_size; | |||
| size_t hw_size = input_shape_[2] * input_shape_[3]; | |||
| size_t n_offset = 0; | |||
| for (size_t n = 0; n < input_shape_[0]; ++n) { | |||
| size_t c_offset = 0; | |||
| for (size_t c = 0; c < input_shape_[1]; ++c) { | |||
| for (size_t hw = 0; hw < hw_size; ++hw) { | |||
| size_t offset = n_offset + c_offset + hw; | |||
| output_addr[offset] = src_addr[offset] + bias_addr[c]; | |||
| } | |||
| c_offset += c_size; | |||
| } | |||
| n_offset += n_size; | |||
| } | |||
| } else { | |||
| size_t n_offset = 0; | |||
| for (size_t n = 0; n < input_shape_[0]; ++n) { | |||
| for (size_t c = 0; c < input_shape_[1]; ++c) { | |||
| output_addr[n_offset + c] = src_addr[n_offset + c] + bias_addr[c]; | |||
| } | |||
| n_offset += input_shape_[1]; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,46 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class BiasAddCPUKernel : public CPUKernel { | |||
| public: | |||
| BiasAddCPUKernel() = default; | |||
| ~BiasAddCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| uint8_t data_shape_{0}; | |||
| std::vector<size_t> input_shape_; | |||
| std::vector<size_t> bias_shape_; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| BiasAdd, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| BiasAddCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,68 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void BiasAddGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (input_shape_.size() != 4 && input_shape_.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "input data format not support"; | |||
| } | |||
| } | |||
| bool BiasAddGradCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/, | |||
| const std::vector<AddressPtr> &outputs) { | |||
| if (inputs.size() != 1 || outputs.size() != 1) { | |||
| MS_LOG(EXCEPTION) << "input output size not support"; | |||
| } | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| if (input_shape_.size() == 4) { | |||
| size_t h_size = input_shape_[3]; | |||
| size_t c_size = h_size * input_shape_[2]; | |||
| size_t n_size = c_size * input_shape_[1]; | |||
| size_t hw_size = input_shape_[2] * input_shape_[3]; | |||
| size_t c_offset = 0; | |||
| for (size_t c = 0; c < input_shape_[1]; ++c) { | |||
| output_addr[c] = 0; | |||
| size_t n_offset = 0; | |||
| for (size_t n = 0; n < input_shape_[0]; ++n) { | |||
| for (size_t hw = 0; hw < hw_size; ++hw) { | |||
| size_t offset = c_offset + n_offset + hw; | |||
| output_addr[c] += input_addr[offset]; | |||
| } | |||
| n_offset += n_size; | |||
| } | |||
| c_offset += c_size; | |||
| } | |||
| } else if (input_shape_.size() == 2) { | |||
| for (size_t c = 0; c < input_shape_[1]; ++c) { | |||
| output_addr[c] = 0; | |||
| size_t n_offset = 0; | |||
| for (size_t n = 0; n < input_shape_[0]; ++n) { | |||
| output_addr[c] += input_addr[c + n_offset]; | |||
| n_offset += input_shape_[1]; | |||
| } | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,43 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class BiasAddGradCPUKernel : public CPUKernel { | |||
| public: | |||
| BiasAddGradCPUKernel() = default; | |||
| ~BiasAddGradCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| std::vector<size_t> input_shape_; | |||
| }; | |||
| MS_REG_CPU_KERNEL(BiasAddGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| BiasAddGradCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_ | |||
| @@ -0,0 +1,106 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/concat_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void ConcatCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| axis_ = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS); | |||
| auto input_1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (axis_ < 0) { | |||
| axis_ = axis_ + SizeToInt(input_1_shape.size()); | |||
| } | |||
| axis_ += 4 - input_1_shape.size(); | |||
| auto input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| for (size_t i = 0; i < input_num; i++) { | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); | |||
| CPUKernelUtils::ExpandDimsTo4(&input_shape); | |||
| input_shape_list_.push_back(input_shape); | |||
| } | |||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| CPUKernelUtils::ExpandDimsTo4(&output_shape_); | |||
| } | |||
| bool ConcatCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto buff_size = outputs[0]->size; | |||
| size_t dim0 = output_shape_[0]; | |||
| size_t dim1 = output_shape_[1]; | |||
| size_t dim2 = output_shape_[2]; | |||
| if (axis_ == 3) { | |||
| for (size_t i = 0; i < dim0; ++i) { | |||
| for (size_t j = 0; j < dim1; ++j) { | |||
| for (size_t k = 0; k < dim2; ++k) { | |||
| CopyDataToOutput(inputs, i, j, k, &output_addr, &buff_size); | |||
| } | |||
| } | |||
| } | |||
| } else if (axis_ == 2) { | |||
| for (size_t i = 0; i < dim0; ++i) { | |||
| for (size_t j = 0; j < dim1; ++j) { | |||
| CopyDataToOutput(inputs, i, j, 0, &output_addr, &buff_size); | |||
| } | |||
| } | |||
| } else if (axis_ == 1) { | |||
| for (size_t i = 0; i < dim0; ++i) { | |||
| CopyDataToOutput(inputs, i, 0, 0, &output_addr, &buff_size); | |||
| } | |||
| } else if (axis_ == 0) { | |||
| CopyDataToOutput(inputs, 0, 0, 0, &output_addr, &buff_size); | |||
| } | |||
| return true; | |||
| } | |||
| void ConcatCPUKernel::CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, | |||
| size_t dim2, float **output_addr, size_t *buff_size) { | |||
| for (size_t i = 0; i < input_shape_list_.size(); ++i) { | |||
| auto input_i_shape = input_shape_list_[i]; | |||
| auto input_i_addr = reinterpret_cast<float *>(inputs[i]->addr); | |||
| size_t num = CPUKernelUtils::GetElementNumOnAxis(input_i_shape, axis_); | |||
| num *= input_i_shape[axis_]; | |||
| auto pos = CPUKernelUtils::CalcOffset(input_i_shape, dim0, dim1, dim2, 0); | |||
| auto ret = memcpy_s(*output_addr, *buff_size, input_i_addr + pos, num * sizeof(float)); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "memcpy failed."; | |||
| } | |||
| *output_addr += num; | |||
| *buff_size -= num * sizeof(float); | |||
| } | |||
| } | |||
| void ConcatCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (input_shape.size() > 4) { | |||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but ConcatCPUKernel olny support 4d or lower."; | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (output_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ConcatCPUKernel needs 1 output."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,50 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class ConcatCPUKernel : public CPUKernel { | |||
| public: | |||
| ConcatCPUKernel() : axis_(0) {} | |||
| ~ConcatCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| void CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2, | |||
| float **output_addr, size_t *buff_size); | |||
| int axis_; | |||
| std::vector<std::vector<size_t>> input_shape_list_; | |||
| std::vector<size_t> output_shape_; | |||
| }; | |||
| MS_REG_CPU_KERNEL(Concat, | |||
| KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| ConcatCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,80 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void CPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| size_t type_size = sizeof(float); | |||
| for (size_t input_index = 0; input_index < input_num; ++input_index) { | |||
| std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, input_index); | |||
| size_t tensor_size = | |||
| shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies<size_t>()); | |||
| input_size_list_.emplace_back(tensor_size); | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| for (size_t output_index = 0; output_index < output_num; ++output_index) { | |||
| std::vector<size_t> shape = AnfAlgo::GetOutputDeviceShape(kernel_node, output_index); | |||
| size_t tensor_size = | |||
| shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies<size_t>()); | |||
| output_size_list_.emplace_back(tensor_size); | |||
| } | |||
| } | |||
| void CPUKernel::Init(const CNodePtr &kernel_node) { | |||
| InitKernel(kernel_node); | |||
| InitInputOutputSize(kernel_node); | |||
| } | |||
| void CPUKernelUtils::ExpandDimsTo4(std::vector<size_t> *shape) { | |||
| auto len = shape->size(); | |||
| if (len < 4) { | |||
| for (size_t i = 0; i < 4 - len; ++i) { | |||
| shape->insert(shape->begin(), 1); | |||
| } | |||
| } | |||
| } | |||
| size_t CPUKernelUtils::CalcOffset(const std::vector<size_t> &shape, size_t dim0, size_t dim1, size_t dim2, | |||
| size_t dim3) { | |||
| size_t offset = dim0 * shape[1] * shape[2] * shape[3] + dim1 * shape[2] * shape[3] + dim2 * shape[3] + dim3; | |||
| return offset; | |||
| } | |||
| size_t CPUKernelUtils::GetElementNumOnAxis(const std::vector<size_t> &shape, int axis) { | |||
| if (axis < 0) { | |||
| axis = axis + SizeToInt(shape.size()); | |||
| } | |||
| size_t result = 1; | |||
| for (int j = 3; j > axis; --j) { | |||
| result *= shape[j]; | |||
| } | |||
| return result; | |||
| } | |||
| void CPUKernelUtils::GetElementNumEveryDim(const std::vector<size_t> &shape, std::vector<size_t> *element_num) { | |||
| size_t accumulation = 1; | |||
| element_num->emplace_back(1); | |||
| for (size_t i = shape.size() - 1; i > 0; --i) { | |||
| accumulation *= shape[i]; | |||
| element_num->emplace_back(accumulation); | |||
| } | |||
| std::reverse(element_num->begin(), element_num->end()); | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,87 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_ | |||
| #include <string> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <numeric> | |||
| #include <functional> | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| #include "ir/anf.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| using mindspore::kernel::Address; | |||
| using mindspore::kernel::AddressPtr; | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| const char KSIZE[] = "ksize"; | |||
| const char STRIDE[] = "stride"; | |||
| const char STRIDES[] = "strides"; | |||
| const char DILATION[] = "dilation"; | |||
| const char PAD[] = "pad"; | |||
| const char PAD_MODE[] = "pad_mode"; | |||
| const char PADDING[] = "padding"; | |||
| const char PAD_MODE_LOWER_SAME[] = "same"; | |||
| const char PAD_MODE_LOWER_VALID[] = "valid"; | |||
| const char PAD_MODE_UPPER_SAME[] = "SAME"; | |||
| const char PAD_MODE_UPPER_VALID[] = "VALID"; | |||
| const char TRANSPOSE_A[] = "transpose_a"; | |||
| const char TRANSPOSE_B[] = "transpose_b"; | |||
| const char IS_GRAD[] = "is_grad"; | |||
| const char TRANSPOSE_NO = 'N'; | |||
| const char TRANSPOSE_YES = 'T'; | |||
| const char AXIS[] = "axis"; | |||
| const char BEGIN[] = "begin"; | |||
| const char END[] = "end"; | |||
| const char SIZE[] = "size"; | |||
| const char USE_NESTEROV[] = "use_nesterov"; | |||
| class CPUKernel : public kernel::KernelMod { | |||
| public: | |||
| CPUKernel() = default; | |||
| ~CPUKernel() override = default; | |||
| virtual void Init(const CNodePtr &kernel_node); | |||
| virtual void InitKernel(const CNodePtr &kernel_node) = 0; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void * /*stream_ptr*/) override { | |||
| return Launch(inputs, workspace, outputs); | |||
| }; | |||
| virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) = 0; | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| protected: | |||
| virtual void InitInputOutputSize(const CNodePtr &kernel_node); | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| }; | |||
| class CPUKernelUtils { | |||
| public: | |||
| static void ExpandDimsTo4(std::vector<size_t> *shape); | |||
| static size_t CalcOffset(const std::vector<size_t> &shape, size_t dim0, size_t dim1, size_t dim2, size_t dim3); | |||
| static size_t GetElementNumOnAxis(const std::vector<size_t> &shape, int axis); | |||
| static void GetElementNumEveryDim(const std::vector<size_t> &shape, std::vector<size_t> *element_num); | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,104 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| #include <memory> | |||
| #include <iostream> | |||
| #include <string> | |||
| #include "runtime/device/kernel_info.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| CPUKernelFactory &CPUKernelFactory::GetInstance() { | |||
| static CPUKernelFactory instance; | |||
| return instance; | |||
| } | |||
| void CPUKernelFactory::Register(const std::string &kernel_name, const KernelAttr &kernel_attr, | |||
| CPUKernelCreator &&kernel_creator) { | |||
| (void)name_to_attr_creator_[kernel_name].emplace_back(kernel_attr, kernel_creator); | |||
| #if !defined(_WIN32) && !defined(_WIN64) | |||
| MS_LOG(DEBUG) << "CPUKernelFactory register operator: " << kernel_name; | |||
| #endif | |||
| } | |||
| std::shared_ptr<CPUKernel> CPUKernelFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) { | |||
| auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info()); | |||
| MS_EXCEPTION_IF_NULL(kernel_info); | |||
| const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info(); | |||
| MS_EXCEPTION_IF_NULL(kernel_build_Info); | |||
| std::pair<bool, size_t> ret_pair = CPUKernelAttrCheck(kernel_name, *kernel_build_Info); | |||
| if (ret_pair.first) { | |||
| return (name_to_attr_creator_.find(kernel_name)->second)[ret_pair.second].second(); | |||
| } | |||
| return nullptr; | |||
| } | |||
| std::pair<bool, size_t> CPUKernelFactory::CPUKernelAttrCheck(const std::string &kernel_name, | |||
| const KernelBuildInfo &kernel_info) { | |||
| auto iter = name_to_attr_creator_.find(kernel_name); | |||
| if (iter == name_to_attr_creator_.end()) { | |||
| MS_LOG(INFO) << "Not registered CPU kernel: op[" << kernel_name << "]!"; | |||
| return std::make_pair(false, 0); | |||
| } | |||
| auto creators = iter->second; | |||
| for (size_t index = 0; index < creators.size(); ++index) { | |||
| auto attr_creator = creators[index]; | |||
| if (CPUKernelSingleAttrCheck(attr_creator.first, kernel_info)) { | |||
| return std::make_pair(true, index); | |||
| } | |||
| } | |||
| return std::make_pair(false, 0); | |||
| } | |||
| bool CPUKernelFactory::CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info) { | |||
| for (size_t i = 0; i < kernel_info.GetInputNum(); ++i) { | |||
| auto dtype = kernel_attr.GetAllSame() ? kernel_attr.GetInputAttr(0).first : kernel_attr.GetInputAttr(i).first; | |||
| if (kernel_info.GetInputDeviceType(i) != dtype) { | |||
| MS_LOG(DEBUG) << "input index:" << i << ", kernel info type:" << kernel_info.GetInputDeviceType(i) | |||
| << ", register type:" << dtype; | |||
| return false; | |||
| } | |||
| } | |||
| for (size_t i = 0; i < kernel_info.GetOutputNum(); ++i) { | |||
| auto dtype = kernel_attr.GetAllSame() ? kernel_attr.GetOutputAttr(0).first : kernel_attr.GetOutputAttr(i).first; | |||
| if (kernel_info.GetOutputDeviceType(i) != dtype) { | |||
| MS_LOG(DEBUG) << "output index:" << i << ", kernel info type:" << kernel_info.GetOutputDeviceType(i) | |||
| << ", register type:" << dtype; | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| std::vector<KernelAttr> CPUKernelFactory::GetSupportedKernelAttrList(const std::string &kernel_name) { | |||
| std::vector<KernelAttr> result; | |||
| auto iter = name_to_attr_creator_.find(kernel_name); | |||
| if (iter == name_to_attr_creator_.end()) { | |||
| MS_LOG(WARNING) << "Not registered CPU kernel: op[" << kernel_name << "]!"; | |||
| return result; | |||
| } | |||
| auto creators = iter->second; | |||
| for (size_t index = 0; index < creators.size(); ++index) { | |||
| auto attr_creator = creators[index]; | |||
| result.push_back(attr_creator.first); | |||
| } | |||
| return result; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,79 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_ | |||
| #include <functional> | |||
| #include <map> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include "common/utils.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "runtime/device/cpu/kernel_select_cpu.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| using mindspore::device::cpu::KernelAttr; | |||
| using CPUKernelCreator = std::function<std::shared_ptr<CPUKernel>()>; | |||
| class CPUKernelFactory { | |||
| public: | |||
| static CPUKernelFactory &GetInstance(); | |||
| void Register(const std::string &kernel_name, const KernelAttr &kernel_attr, CPUKernelCreator &&kernel_creator); | |||
| std::shared_ptr<CPUKernel> Create(const std::string &kernel_name, const CNodePtr &apply_kernel); | |||
| std::vector<KernelAttr> GetSupportedKernelAttrList(const std::string &kernel_name); | |||
| private: | |||
| CPUKernelFactory() = default; | |||
| ~CPUKernelFactory() = default; | |||
| DISABLE_COPY_AND_ASSIGN(CPUKernelFactory) | |||
| std::pair<bool, size_t> CPUKernelAttrCheck(const std::string &kernel_name, const KernelBuildInfo &kernel_info); | |||
| bool CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info); | |||
| std::map<std::string, std::vector<std::pair<KernelAttr, CPUKernelCreator>>> name_to_attr_creator_; | |||
| }; | |||
| class CPUKernelRegistrar { | |||
| public: | |||
| CPUKernelRegistrar(const std::string &kernel_name, const KernelAttr &kernel_attr, CPUKernelCreator &&kernel_creator) { | |||
| CPUKernelFactory::GetInstance().Register(kernel_name, kernel_attr, std::move(kernel_creator)); | |||
| } | |||
| ~CPUKernelRegistrar() = default; | |||
| }; | |||
| #define MS_REG_CPU_KERNEL(OPNAME, ATTR, OPCLASS) MS_REG_CPU_KERNEL_(__COUNTER__, OPNAME, ATTR, OPCLASS) | |||
| #define MS_REG_CPU_KERNEL_(COUNT, OPNAME, ATTR, OPCLASS) _MS_REG_CPU_KERNEL_(COUNT, OPNAME, ATTR, OPCLASS) | |||
| #define _MS_REG_CPU_KERNEL_(COUNT, OPNAME, ATTR, OPCLASS) \ | |||
| static_assert(std::is_base_of<CPUKernel, OPCLASS>::value, " must be base of CPUKernel"); \ | |||
| static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_reg(#OPNAME, ATTR, \ | |||
| []() { return std::make_shared<OPCLASS>(); }); | |||
| #define MS_REG_CPU_KERNEL_T(OPNAME, ATTR, OPCLASS, T) MS_REG_CPU_KERNEL_T_(__COUNTER__, OPNAME, ATTR, OPCLASS, T) | |||
| #define MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) | |||
| #define _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) \ | |||
| static_assert(std::is_base_of<CPUKernel, OPCLASS<T>>::value, " must be base of CPUKernel"); \ | |||
| static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_##OPNAME##_##T##_reg( \ | |||
| #OPNAME, ATTR, []() { return std::make_shared<OPCLASS<T>>(); }); | |||
| #define MS_REG_CPU_KERNEL_T_S(OPNAME, ATTR, OPCLASS, T, S) \ | |||
| static_assert(std::is_base_of<CPUKernel, OPCLASS<T, S>>::value, " must be base of CPUKernel"); \ | |||
| static const CPUKernelRegistrar g_cpu_kernel_##OPNAME##_##T##_##S##_reg( \ | |||
| #OPNAME, ATTR, []() { return std::make_shared<OPCLASS<T, S>>(); }); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_ | |||
| @@ -0,0 +1,50 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/debug_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "common/utils.h" | |||
| #ifdef ENABLE_DEBUGGER | |||
| #include "debug/debugger/debugger.h" | |||
| #endif | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void DebugCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); } | |||
| bool DebugCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 1 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << " input or output empty!"; | |||
| } | |||
| auto val = reinterpret_cast<float *>(inputs[0]->addr); | |||
| MS_LOG(DEBUG) << " launch DebugCountCPUKernel val " << *val; | |||
| auto output = reinterpret_cast<int *>(outputs[0]->addr); | |||
| size_t elem_num = inputs[0]->size / sizeof(int); | |||
| for (size_t i = 0; i < elem_num; i++) { | |||
| output[i] = val[i]; | |||
| } | |||
| #ifdef ENABLE_DEBUGGER | |||
| // debugger will suspend execution is neccessary | |||
| Debugger::GetInstance()->PostDebugOp(); | |||
| #endif | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,41 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class DebugCPUKernel : public CPUKernel { | |||
| public: | |||
| DebugCPUKernel() = default; | |||
| ~DebugCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL(Debug, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32), DebugCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,78 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <thread> | |||
| #include "backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "runtime/device/cpu/mpi/mpi_adapter.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| split_num_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "split_num"); | |||
| MS_LOG(INFO) << "split_num: " << split_num_; | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (input_shape[0] % split_num_ != 0) { | |||
| MS_LOG(EXCEPTION) << "Input shape[0] is " << input_shape[0] << ", but it must be multiple of split_num."; | |||
| } | |||
| } | |||
| bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| #if defined(_WIN32) || defined(_WIN64) | |||
| auto start_time = std::chrono::steady_clock::now(); | |||
| #else | |||
| struct timeval start_time, end_time; | |||
| (void)gettimeofday(&start_time, nullptr); | |||
| #endif | |||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| size_t input_size = inputs[0]->size; | |||
| size_t output_size = outputs[0]->size; | |||
| MS_LOG(DEBUG) << "input addr: " << input_addr << "input size: " << input_size; | |||
| MS_LOG(DEBUG) << "output addr: " << output_addr << "output size: " << output_size; | |||
| memset_s(output_addr, output_size, 0, output_size); | |||
| const std::vector<int> &rank_group = {0, 1, 2, 3, 4, 5, 6, 7}; | |||
| size_t input_split_lens = input_size / split_num_ / sizeof(float_t); | |||
| size_t output_split_lens = output_size / split_num_ / sizeof(float_t); | |||
| auto mpi_instance = device::cpu::MPIAdapter::Instance(); | |||
| MS_EXCEPTION_IF_NULL(mpi_instance); | |||
| for (int i = 0; i < split_num_; i++) { | |||
| mpi_instance->AllGather(input_addr + i * input_split_lens, output_addr + i * output_split_lens, rank_group, | |||
| input_split_lens); | |||
| } | |||
| #if defined(_WIN32) || defined(_WIN64) | |||
| auto end_time = std::chrono::steady_clock::now(); | |||
| std::chrono::duration<double, std::ratio<1, 1000000>> cost = end_time - start_time; | |||
| MS_LOG(INFO) << "EmbeddingLookUpCommGradCPUKernel, used time: " << cost.count() << " us"; | |||
| #else | |||
| (void)gettimeofday(&end_time, nullptr); | |||
| uint64_t time = 1000000 * static_cast<uint64_t>(end_time.tv_sec - start_time.tv_sec); | |||
| time += static_cast<uint64_t>(end_time.tv_usec - start_time.tv_usec); | |||
| MS_LOG(INFO) << "EmbeddingLookUpCommGradCPUKernel, used time: " << time << " us"; | |||
| #endif | |||
| return true; | |||
| } | |||
| void EmbeddingLookUpCommGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 1) { | |||
| MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCommGradCPUKernel needs 1."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,46 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class EmbeddingLookUpCommGradCPUKernel : public CPUKernel { | |||
| public: | |||
| EmbeddingLookUpCommGradCPUKernel() : split_num_(1) {} | |||
| ~EmbeddingLookUpCommGradCPUKernel() override{}; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| int split_num_; | |||
| }; | |||
| MS_REG_CPU_KERNEL(EmbeddingLookupCommGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| EmbeddingLookUpCommGradCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,212 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <thread> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "runtime/device/cpu/mpi/mpi_adapter.h" | |||
| #include "ir/primitive.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| input_lens_ = 1; | |||
| for (auto shape : input_shape_) { | |||
| input_lens_ = input_lens_ * shape; | |||
| } | |||
| indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| indices_lens_ = 1; | |||
| for (auto shape : indices_shape_) { | |||
| indices_lens_ = indices_lens_ * shape; | |||
| } | |||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| axis_ = 4 - input_shape_.size(); | |||
| if (AnfAlgo::HasNodeAttr(kAttrReduceScatterFlag, kernel_node)) { | |||
| reduce_scatter_flag_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, kAttrReduceScatterFlag); | |||
| } | |||
| #ifdef ENABLE_MPI | |||
| if (reduce_scatter_flag_) { | |||
| size_t gatherv2_out_lens = 1; | |||
| for (int i = 0; i < SizeToInt(input_shape_.size()); i++) { | |||
| if (i == 0) { | |||
| for (int j = 0; j < SizeToInt(indices_shape_.size()); j++) { | |||
| gatherv2_out_lens = gatherv2_out_lens * indices_shape_[j]; | |||
| } | |||
| } else { | |||
| gatherv2_out_lens = gatherv2_out_lens * input_shape_[i]; | |||
| } | |||
| } | |||
| gatherv2_out_lens_ = gatherv2_out_lens * sizeof(float); | |||
| gather_v2_out_ = malloc(gatherv2_out_lens_); | |||
| if (gather_v2_out_ == nullptr) { | |||
| MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel malloc failed, malloc lens: " << gatherv2_out_lens_; | |||
| } | |||
| auto ret = memset_s(gather_v2_out_, gatherv2_out_lens_, 0, gatherv2_out_lens_); | |||
| if (ret != 0) { | |||
| MS_LOG(EXCEPTION) << "EmbeddingLookUpCPUKernel memset gatherv2 out buff failed"; | |||
| } | |||
| split_num_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "split_num"); | |||
| } | |||
| #else | |||
| if (reduce_scatter_flag_) { | |||
| MS_LOG(EXCEPTION) << "Not Enable MPI, please build version with -M on when set reduce_scatter_flag true"; | |||
| } | |||
| #endif | |||
| if (AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) { | |||
| offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, kAttrOffset); | |||
| } | |||
| CPUKernelUtils::ExpandDimsTo4(&input_shape_); | |||
| CPUKernelUtils::ExpandDimsTo4(&output_shape_); | |||
| } | |||
| bool EmbeddingLookUpCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| float *gather_out_addr = reduce_scatter_flag_ ? reinterpret_cast<float *>(gather_v2_out_) : output_addr; | |||
| size_t dim0 = input_shape_[0]; | |||
| size_t dim1 = input_shape_[1]; | |||
| size_t dim2 = input_shape_[2]; | |||
| if (axis_ == 3) { | |||
| for (size_t i = 0; i < dim0; ++i) { | |||
| for (size_t j = 0; j < dim1; ++j) { | |||
| for (size_t k = 0; k < dim2; ++k) { | |||
| LookUpTable(inputs, i, j, k, &gather_out_addr); | |||
| } | |||
| } | |||
| } | |||
| } else if (axis_ == 2) { | |||
| for (size_t i = 0; i < dim0; ++i) { | |||
| for (size_t j = 0; j < dim1; ++j) { | |||
| LookUpTable(inputs, i, j, 0, &gather_out_addr); | |||
| } | |||
| } | |||
| } else if (axis_ == 1) { | |||
| for (size_t i = 0; i < dim0; ++i) { | |||
| LookUpTable(inputs, i, 0, 0, &gather_out_addr); | |||
| } | |||
| } else if (axis_ == 0) { | |||
| LookUpTable(inputs, 0, 0, 0, &gather_out_addr); | |||
| } | |||
| #ifdef ENABLE_MPI | |||
| if (reduce_scatter_flag_) { | |||
| size_t one_split_lens = gatherv2_out_lens_ / split_num_ / sizeof(float); | |||
| size_t reduce_scatter_out_lens = one_split_lens / 8; | |||
| const std::vector<int> &group = {0, 1, 2, 3, 4, 5, 6, 7}; | |||
| auto mpi_instance = device::cpu::MPIAdapter::Instance(); | |||
| MS_EXCEPTION_IF_NULL(mpi_instance); | |||
| for (int i = 0; i < split_num_; i++) { | |||
| mpi_instance->ReduceScatter(reinterpret_cast<float *>(gather_v2_out_) + i * one_split_lens, | |||
| output_addr + i * reduce_scatter_out_lens, group, one_split_lens / 8, "sum"); | |||
| } | |||
| } | |||
| #endif | |||
| return true; | |||
| } | |||
| void LookUpTable_task(const float *input_addr, float *output_addr, const int *indices_addr, size_t indices_lens, | |||
| size_t num, size_t dim0, size_t dim1, size_t dim2, int offset, size_t axis, | |||
| std::vector<size_t> input_shape, size_t input_lens) { | |||
| size_t lens = num * sizeof(float); | |||
| for (size_t i = 0; i < indices_lens; ++i) { | |||
| int indices = indices_addr[i] - offset; | |||
| if (indices >= 0) { | |||
| size_t index = IntToSize(indices); | |||
| if (index < input_shape[axis]) { | |||
| size_t pos = 0; | |||
| if (axis == 3) { | |||
| pos = CPUKernelUtils::CalcOffset(input_shape, dim0, dim1, dim2, index); | |||
| } else if (axis == 2) { | |||
| pos = CPUKernelUtils::CalcOffset(input_shape, dim0, dim1, index, 0); | |||
| } else if (axis == 1) { | |||
| pos = CPUKernelUtils::CalcOffset(input_shape, dim0, index, 0, 0); | |||
| } else if (axis == 0) { | |||
| pos = CPUKernelUtils::CalcOffset(input_shape, index, 0, 0, 0); | |||
| } | |||
| if (pos + num <= input_lens) { | |||
| auto ret = memcpy_s(output_addr, lens, input_addr + pos, lens); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "LookUpTable task memcpy failed."; | |||
| } | |||
| } else { | |||
| auto ret = memset_s(output_addr, lens, 0, lens); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "LookUpTable task memset failed."; | |||
| } | |||
| } | |||
| } else { | |||
| auto ret = memset_s(output_addr, lens, 0, lens); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "LookUpTable task memset failed."; | |||
| } | |||
| } | |||
| } else { | |||
| auto ret = memset_s(output_addr, lens, 0, lens); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "LookUpTable task memset failed."; | |||
| } | |||
| } | |||
| output_addr += num; | |||
| } | |||
| } | |||
| void EmbeddingLookUpCPUKernel::LookUpTable(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, | |||
| size_t dim2, float **output_addr) { | |||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr); | |||
| size_t num = CPUKernelUtils::GetElementNumOnAxis(input_shape_, axis_); | |||
| float *task_out_addr = *output_addr; | |||
| const size_t thread_num = 8; | |||
| std::thread threads[8]; | |||
| size_t task_proc_lens = (indices_lens_ + thread_num - 1) / thread_num; | |||
| size_t i; | |||
| size_t task_offset = 0; | |||
| MS_LOG(DEBUG) << "indices_lens_: " << indices_lens_ << " one task proc lens:" << task_proc_lens; | |||
| for (i = 0; i < thread_num; i++) { | |||
| if (task_offset >= indices_lens_) { | |||
| break; | |||
| } | |||
| MS_LOG(DEBUG) << "task_offset: " << task_offset << " task_proc_lenss:" << task_proc_lens; | |||
| threads[i] = | |||
| std::thread(LookUpTable_task, input_addr, task_out_addr + task_offset * num, indices_addr + task_offset, | |||
| task_proc_lens, num, dim0, dim1, dim2, offset_, axis_, input_shape_, input_lens_); | |||
| task_offset += task_proc_lens; | |||
| if (task_offset + task_proc_lens > indices_lens_) { | |||
| task_proc_lens = indices_lens_ - task_offset; | |||
| } | |||
| } | |||
| for (size_t j = 0; j < i; j++) { | |||
| threads[j].join(); | |||
| } | |||
| *output_addr += num * indices_lens_; | |||
| } | |||
| void EmbeddingLookUpCPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (input_shape.size() > 4) { | |||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() | |||
| << ", but EmbeddingLookUpCPUKernel olny support 4d or lower."; | |||
| } | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but EmbeddingLookUpCPUKernel needs 2."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,74 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class EmbeddingLookUpCPUKernel : public CPUKernel { | |||
| public: | |||
| EmbeddingLookUpCPUKernel() { | |||
| axis_ = 0; | |||
| offset_ = 0; | |||
| split_num_ = 0; | |||
| input_lens_ = 0; | |||
| indices_lens_ = 0; | |||
| gatherv2_out_lens_ = 0; | |||
| reduce_scatter_flag_ = false; | |||
| gather_v2_out_ = nullptr; | |||
| } | |||
| ~EmbeddingLookUpCPUKernel() override { | |||
| if (gather_v2_out_ != nullptr) { | |||
| free(gather_v2_out_); | |||
| gather_v2_out_ = nullptr; | |||
| } | |||
| } | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void LookUpTable(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2, | |||
| float **output_addr); | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> input_shape_; | |||
| std::vector<size_t> indices_shape_; | |||
| std::vector<size_t> output_shape_; | |||
| int axis_; | |||
| int offset_; | |||
| int split_num_; | |||
| size_t input_lens_; | |||
| size_t indices_lens_; | |||
| size_t gatherv2_out_lens_; | |||
| bool reduce_scatter_flag_; | |||
| void *gather_v2_out_; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| EmbeddingLookup, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), | |||
| EmbeddingLookUpCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,46 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/equal_count_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void EqualCountCPUKernel::InitKernel(const CNodePtr & /*kernel_node*/) {} | |||
| bool EqualCountCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "input or output empty!"; | |||
| } | |||
| if (inputs[0]->size != inputs[1]->size) { | |||
| MS_LOG(EXCEPTION) << "input or output size!"; | |||
| } | |||
| int count = 0; | |||
| auto left = reinterpret_cast<int *>(inputs[0]->addr); | |||
| auto right = reinterpret_cast<int *>(inputs[1]->addr); | |||
| size_t elem_num = inputs[0]->size / sizeof(int); | |||
| for (size_t i = 0; i < elem_num; i++) { | |||
| if (left[i] == right[i]) { | |||
| count++; | |||
| } | |||
| } | |||
| auto output = reinterpret_cast<int *>(outputs[0]->addr); | |||
| output[0] = count; | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,43 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class EqualCountCPUKernel : public CPUKernel { | |||
| public: | |||
| EqualCountCPUKernel() = default; | |||
| ~EqualCountCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| EqualCount, | |||
| KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| EqualCountCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,115 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/gather_cpu_kernel.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void GatherV2CPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| CheckParam(kernel_node); | |||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||
| axis_ = AnfAlgo::GetNodeAttr<int>(kernel_node, AXIS); | |||
| if (axis_ < 0) { | |||
| axis_ = axis_ + SizeToInt(input_shape_.size()); | |||
| } | |||
| axis_ += 4 - input_shape_.size(); | |||
| CPUKernelUtils::ExpandDimsTo4(&input_shape_); | |||
| CPUKernelUtils::ExpandDimsTo4(&output_shape_); | |||
| } | |||
| bool GatherV2CPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| auto output_addr = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto buff_size = outputs[0]->size; | |||
| size_t dim0 = input_shape_[0]; | |||
| size_t dim1 = input_shape_[1]; | |||
| size_t dim2 = input_shape_[2]; | |||
| if (axis_ == 3) { | |||
| for (size_t i = 0; i < dim0; ++i) { | |||
| for (size_t j = 0; j < dim1; ++j) { | |||
| for (size_t k = 0; k < dim2; ++k) { | |||
| CopyDataToOutput(inputs, i, j, k, &output_addr, &buff_size); | |||
| } | |||
| } | |||
| } | |||
| } else if (axis_ == 2) { | |||
| for (size_t i = 0; i < dim0; ++i) { | |||
| for (size_t j = 0; j < dim1; ++j) { | |||
| CopyDataToOutput(inputs, i, j, 0, &output_addr, &buff_size); | |||
| } | |||
| } | |||
| } else if (axis_ == 1) { | |||
| for (size_t i = 0; i < dim0; ++i) { | |||
| CopyDataToOutput(inputs, i, 0, 0, &output_addr, &buff_size); | |||
| } | |||
| } else if (axis_ == 0) { | |||
| CopyDataToOutput(inputs, 0, 0, 0, &output_addr, &buff_size); | |||
| } | |||
| return true; | |||
| } | |||
| void GatherV2CPUKernel::CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, | |||
| size_t dim2, float **output_addr, size_t *buff_size) { | |||
| auto input_addr = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr); | |||
| size_t elem_num = inputs[1]->size / 4; | |||
| size_t num = CPUKernelUtils::GetElementNumOnAxis(input_shape_, axis_); | |||
| for (size_t i = 0; i < elem_num; ++i) { | |||
| if (indices_addr[i] < 0) { | |||
| MS_LOG(EXCEPTION) << "The indices value is less than 0."; | |||
| } | |||
| size_t index = IntToSize(indices_addr[i]); | |||
| if (index >= input_shape_[IntToSize(axis_)]) { | |||
| auto ret = memset_s(*output_addr, *buff_size, 0., num * sizeof(float)); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "memset failed."; | |||
| } | |||
| } else { | |||
| size_t pos = 0; | |||
| if (axis_ == 3) { | |||
| pos = CPUKernelUtils::CalcOffset(input_shape_, dim0, dim1, dim2, index); | |||
| } else if (axis_ == 2) { | |||
| pos = CPUKernelUtils::CalcOffset(input_shape_, dim0, dim1, index, 0); | |||
| } else if (axis_ == 1) { | |||
| pos = CPUKernelUtils::CalcOffset(input_shape_, dim0, index, 0, 0); | |||
| } else if (axis_ == 0) { | |||
| pos = CPUKernelUtils::CalcOffset(input_shape_, index, 0, 0, 0); | |||
| } | |||
| auto ret = memcpy_s(*output_addr, *buff_size, input_addr + pos, num * sizeof(float)); | |||
| if (ret != EOK) { | |||
| MS_LOG(EXCEPTION) << "memcpy failed."; | |||
| } | |||
| } | |||
| *output_addr += num; | |||
| *buff_size -= num * sizeof(float); | |||
| } | |||
| } // namespace kernel | |||
| void GatherV2CPUKernel::CheckParam(const CNodePtr &kernel_node) { | |||
| auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| if (input_shape.size() > 4) { | |||
| MS_LOG(EXCEPTION) << "Input dims is " << input_shape.size() << ", but GatherV2CPUKernel olny support 4d or lower."; | |||
| } | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| if (input_num != 2) { | |||
| MS_LOG(EXCEPTION) << "Argument number is " << input_num << ", but GatherV2CPUKernel needs 2."; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,52 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class GatherV2CPUKernel : public CPUKernel { | |||
| public: | |||
| GatherV2CPUKernel() : axis_(0) {} | |||
| ~GatherV2CPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void CopyDataToOutput(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2, | |||
| float **output_addr, size_t *buff_size); | |||
| void CheckParam(const CNodePtr &kernel_node); | |||
| std::vector<size_t> input_shape_; | |||
| std::vector<size_t> indices_shape_; | |||
| std::vector<size_t> output_shape_; | |||
| int axis_; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| GatherV2, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), | |||
| GatherV2CPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,91 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h" | |||
| #include <string> | |||
| #include "common/utils.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| if (src_shape.size() != 4 || weight_shape.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "conv2d only support nchw input!"; | |||
| } | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); | |||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||
| int kernel_size = SizeToInt(weight_shape[3]); | |||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE); | |||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION); | |||
| if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) { | |||
| MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!"; | |||
| } | |||
| if (stride_ori[0] != 1 || stride_ori[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!"; | |||
| } | |||
| if (dilation_ori.size() != 4 || dilation_ori[2] != 1 || dilation_ori[3] != 1) { | |||
| MS_LOG(EXCEPTION) << "conv2d dilation only support 1, and dilation must be 4d!"; | |||
| } | |||
| if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!"; | |||
| } | |||
| int stride = stride_ori[2]; | |||
| int dilation = dilation_ori[2]; | |||
| dnnl::memory::dims strides{stride, stride}; | |||
| dnnl::memory::dims dilates{dilation - 1, dilation - 1}; | |||
| std::vector<int> int_padding_l; | |||
| std::vector<int> int_padding_r; | |||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE); | |||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); | |||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "get padding failed"; | |||
| } | |||
| dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]}; | |||
| dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]}; | |||
| dnnl::convolution_forward::desc desc = | |||
| dnnl::convolution_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_auto, src_desc, | |||
| weights_desc, dst_desc, strides, dilates, padding_l, padding_r); | |||
| auto prim_desc = dnnl::convolution_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||
| primitive_ = std::make_shared<dnnl::convolution_forward>(prim_desc); | |||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_WEIGHTS, weights_desc); | |||
| AddArgument(DNNL_ARG_DST, dst_desc); | |||
| } | |||
| bool Conv2dCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,43 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class Conv2dCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| Conv2dCPUKernel() = default; | |||
| ~Conv2dCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| Conv2D, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| Conv2dCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,93 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h" | |||
| #include <string> | |||
| #include "common/utils.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> weight_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (src_shape.size() != 4 || weight_shape.size() != 4) { | |||
| MS_LOG(EXCEPTION) << ("conv2d grad filter only support nchw input!"); | |||
| } | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); | |||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||
| int kernel_size = SizeToInt(weight_shape[3]); | |||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE); | |||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION); | |||
| if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel only support equal stride, and stride must be 2d!"; | |||
| } | |||
| if (dilation_ori.size() != 4 || dilation_ori[2] != 1 || dilation_ori[3] != 1) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel dilation only support 1, and dilation must be 4d!"; | |||
| } | |||
| if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradFilterCPUKernel dilation only support 1 in N axis and C axis!"; | |||
| } | |||
| int stride = stride_ori[0]; | |||
| int dilation = dilation_ori[2]; | |||
| dnnl::memory::dims strides{stride, stride}; | |||
| dnnl::memory::dims dilates{dilation - 1, dilation - 1}; | |||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE); | |||
| std::vector<int> int_padding_l; | |||
| std::vector<int> int_padding_r; | |||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); | |||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "get padding failed"; | |||
| } | |||
| dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]}; | |||
| dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]}; | |||
| dnnl::convolution_forward::desc forward_desc = | |||
| dnnl::convolution_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_auto, src_desc, | |||
| weights_desc, dst_desc, strides, dilates, padding_l, padding_r); | |||
| auto forward_prim_desc = dnnl::convolution_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine()); | |||
| dnnl::convolution_backward_weights::desc backward_desc = dnnl::convolution_backward_weights::desc( | |||
| dnnl::algorithm::convolution_auto, src_desc, weights_desc, dst_desc, strides, dilates, padding_l, padding_r); | |||
| auto backward_prim_desc = dnnl::convolution_backward_weights::primitive_desc( | |||
| backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc); | |||
| primitive_ = std::make_shared<dnnl::convolution_backward_weights>(backward_prim_desc); | |||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_DIFF_DST, dst_desc); | |||
| AddArgument(DNNL_ARG_DIFF_WEIGHTS, weights_desc); | |||
| } | |||
| bool Conv2dGradFilterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,43 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class Conv2dGradFilterCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| Conv2dGradFilterCPUKernel() = default; | |||
| ~Conv2dGradFilterCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| Conv2DBackpropFilter, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| Conv2dGradFilterCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,92 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h" | |||
| #include <string> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "common/utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (src_shape.size() != 4 || weight_shape.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "conv2d grad filter only support nchw input!"; | |||
| } | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); | |||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||
| int kernel_size = SizeToInt(weight_shape[3]); | |||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE); | |||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION); | |||
| if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel only support equal stride, and stride must be 2d!"; | |||
| } | |||
| if (dilation_ori.size() != 4 || dilation_ori[2] != 1 || dilation_ori[3] != 1) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel dilation only support 1, and dilation must be 4d!"; | |||
| } | |||
| if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { | |||
| MS_LOG(EXCEPTION) << "Conv2dGradInputCPUKernel dilation only support 1 in N axis and C axis!"; | |||
| } | |||
| int stride = stride_ori[0]; | |||
| int dilation = dilation_ori[2]; | |||
| dnnl::memory::dims strides{stride, stride}; | |||
| dnnl::memory::dims dilates{dilation - 1, dilation - 1}; | |||
| std::vector<int> int_padding_l; | |||
| std::vector<int> int_padding_r; | |||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE); | |||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); | |||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "conv2d grad get padding failed"; | |||
| } | |||
| dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]}; | |||
| dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]}; | |||
| dnnl::convolution_forward::desc forward_desc = | |||
| dnnl::convolution_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_auto, src_desc, | |||
| weights_desc, dst_desc, strides, dilates, padding_l, padding_r); | |||
| auto forward_prim_desc = dnnl::convolution_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine()); | |||
| dnnl::convolution_backward_data::desc backward_desc = dnnl::convolution_backward_data::desc( | |||
| dnnl::algorithm::convolution_auto, src_desc, weights_desc, dst_desc, strides, dilates, padding_l, padding_r); | |||
| auto backward_prim_desc = | |||
| dnnl::convolution_backward_data::primitive_desc(backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc); | |||
| primitive_ = std::make_shared<dnnl::convolution_backward_data>(backward_prim_desc); | |||
| AddArgument(DNNL_ARG_DIFF_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_DIFF_DST, dst_desc); | |||
| AddArgument(DNNL_ARG_WEIGHTS, weights_desc); | |||
| } | |||
| bool Conv2dGradInputCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,43 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class Conv2dGradInputCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| Conv2dGradInputCPUKernel() = default; | |||
| ~Conv2dGradInputCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| Conv2DBackpropInput, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| Conv2dGradInputCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,141 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h" | |||
| #include <string> | |||
| #include "common/utils.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| #ifdef PLATFORM_86 | |||
| _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); | |||
| _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); | |||
| #endif | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| using tag = dnnl::memory::format_tag; | |||
| using dim = dnnl::memory::dims; | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); | |||
| bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional"); | |||
| input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size"); | |||
| hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size"); | |||
| num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers"); | |||
| has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias"); | |||
| batch_size_ = SizeToInt(src_shape[1]); | |||
| seq_len_ = SizeToInt(src_shape[0]); | |||
| num_directions_ = 1; | |||
| if (bidirectional_) { | |||
| num_directions_ = 2; | |||
| } | |||
| if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) { | |||
| MS_LOG(EXCEPTION) << "error iteration shape!"; | |||
| } | |||
| if (num_layers_ <= 0) { | |||
| MS_LOG(EXCEPTION) << "layers must be greater than zero!"; | |||
| } | |||
| if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) { | |||
| MS_LOG(EXCEPTION) << "conv2d only support 3-D input!"; | |||
| } | |||
| const int gate_size = 4 * hidden_size_; | |||
| for (int i = 0; i < num_layers_; ++i) { | |||
| weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_); | |||
| weight_h_size_ += gate_size * hidden_size_; | |||
| } | |||
| weight_size_ = weight_size_ * num_directions_; | |||
| weight_h_size_ = weight_h_size_ * num_directions_; | |||
| auto eng = MKLKernelEngine::Get().engine(); | |||
| dnnl::stream s(eng); | |||
| dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional; | |||
| if (bidirectional_) { | |||
| direction = dnnl::rnn_direction::bidirectional_concat; | |||
| } | |||
| dim src_dims = {seq_len_, batch_size_, input_size_}; | |||
| dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||
| dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||
| weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_}; | |||
| weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_}; | |||
| bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_}; | |||
| dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_}; | |||
| dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||
| dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||
| dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc); | |||
| dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc); | |||
| dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc); | |||
| dnnl::memory::desc bias_desc = formatted_md(bias_dims_, tag::ldgo); | |||
| dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc); | |||
| dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc); | |||
| dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc); | |||
| auto desc = std::make_shared<dnnl::lstm_forward::desc>(dnnl::prop_kind::forward_training, direction, src_desc, | |||
| src_h_desc, src_c_desc, formatted_md(weights_dims_, tag::any), | |||
| formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, | |||
| dst_h_desc, dst_c_desc); | |||
| prim_desc_ = dnnl::lstm_forward::primitive_desc(*desc, eng); | |||
| primitive_ = std::make_shared<dnnl::lstm_forward>(prim_desc_); | |||
| AddArgument(DNNL_ARG_SRC_LAYER, src_desc); | |||
| AddArgument(DNNL_ARG_SRC_ITER, src_h_desc); | |||
| AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc); | |||
| AddArgument(DNNL_ARG_WEIGHTS_LAYER, prim_desc_.weights_layer_desc()); | |||
| AddArgument(DNNL_ARG_WEIGHTS_ITER, prim_desc_.weights_iter_desc()); | |||
| AddArgument(DNNL_ARG_BIAS, bias_desc); | |||
| AddArgument(DNNL_ARG_DST_LAYER, dst_desc); | |||
| AddArgument(DNNL_ARG_DST_ITER, dst_h_desc); | |||
| AddArgument(DNNL_ARG_DST_ITER_C, dst_c_desc); | |||
| AddArgument(DNNL_ARG_WORKSPACE, prim_desc_.workspace_desc()); | |||
| } | |||
| bool LstmCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| using dt = dnnl::memory::data_type; | |||
| using tag = dnnl::memory::format_tag; | |||
| auto eng = MKLKernelEngine::Get().engine(); | |||
| auto user_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng); | |||
| auto user_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng); | |||
| auto weights_memory = dnnl::memory(prim_desc_.weights_layer_desc(), eng); | |||
| auto weights_h_memory = dnnl::memory(prim_desc_.weights_iter_desc(), eng); | |||
| user_weights_memory.set_data_handle(inputs[3]->addr); | |||
| user_weights_h_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_); | |||
| Reorder(&user_weights_memory, &weights_memory); | |||
| Reorder(&user_weights_h_memory, &weights_h_memory); | |||
| auto bias_memory = dnnl::memory(prim_desc_.bias_desc(), eng); | |||
| if (has_bias_) { | |||
| bias_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_); | |||
| } else { | |||
| auto ret = | |||
| memset_s(bias_memory.get_data_handle(), prim_desc_.bias_desc().get_size(), 0, prim_desc_.bias_desc().get_size()); | |||
| if (ret != 0) { | |||
| MS_LOG(EXCEPTION) << "bias memset error"; | |||
| } | |||
| } | |||
| // set handle | |||
| SetArgumentHandle(DNNL_ARG_SRC_LAYER, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_ITER, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_ITER_C, inputs[2]->addr); | |||
| SetArgumentHandle(DNNL_ARG_WEIGHTS_LAYER, weights_memory.get_data_handle()); | |||
| SetArgumentHandle(DNNL_ARG_WEIGHTS_ITER, weights_h_memory.get_data_handle()); | |||
| SetArgumentHandle(DNNL_ARG_BIAS, bias_memory.get_data_handle()); | |||
| SetArgumentHandle(DNNL_ARG_DST_LAYER, outputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST_ITER, outputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST_ITER_C, outputs[2]->addr); | |||
| SetArgumentHandle(DNNL_ARG_WORKSPACE, outputs[3]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,70 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H_ | |||
| #if defined(__x86_64__) || defined(__amd64__) || defined(_M_IX86) || defined(_M_X64) | |||
| #define PLATFORM_86 | |||
| #endif | |||
| #ifdef PLATFORM_86 | |||
| #include <pmmintrin.h> | |||
| #endif | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class LstmCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| LstmCPUKernel() = default; | |||
| ~LstmCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| int weight_size_ = 0; | |||
| int weight_h_size_ = 0; | |||
| int input_size_; | |||
| int hidden_size_; | |||
| int num_layers_; | |||
| int batch_size_; | |||
| int seq_len_; | |||
| int num_directions_; | |||
| bool bidirectional_; | |||
| bool has_bias_; | |||
| dnnl::memory::dims weights_dims_; | |||
| dnnl::memory::dims weights_h_dims_; | |||
| dnnl::memory::dims bias_dims_; | |||
| dnnl::lstm_forward::primitive_desc prim_desc_; | |||
| }; | |||
| MS_REG_CPU_KERNEL(LSTM, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| LstmCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H | |||
| @@ -0,0 +1,196 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h" | |||
| #include <cstring> | |||
| #include <cmath> | |||
| #include <numeric> | |||
| #include <string> | |||
| #include "common/utils.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void LSTMGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| using tag = dnnl::memory::format_tag; | |||
| using dim = dnnl::memory::dims; | |||
| auto eng = MKLKernelEngine::Get().engine(); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); | |||
| bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional"); | |||
| input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size"); | |||
| hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size"); | |||
| num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers"); | |||
| has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias"); | |||
| batch_size_ = SizeToInt(src_shape[1]); | |||
| seq_len_ = SizeToInt(src_shape[0]); | |||
| num_directions_ = 1; | |||
| if (bidirectional_) { | |||
| num_directions_ = 2; | |||
| } | |||
| if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) { | |||
| MS_LOG(EXCEPTION) << "error iteration shape!"; | |||
| } | |||
| if (num_layers_ <= 0) { | |||
| MS_LOG(EXCEPTION) << "layers must be greater than zero!"; | |||
| } | |||
| if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) { | |||
| MS_LOG(EXCEPTION) << "conv2d only support 3-D input!"; | |||
| } | |||
| const int gate_size = 4 * hidden_size_; | |||
| for (int i = 0; i < num_layers_; ++i) { | |||
| weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_); | |||
| weight_h_size_ += gate_size * hidden_size_; | |||
| } | |||
| weight_size_ = weight_size_ * num_directions_; | |||
| weight_h_size_ = weight_h_size_ * num_directions_; | |||
| dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional; | |||
| if (bidirectional_) { | |||
| direction = dnnl::rnn_direction::bidirectional_concat; | |||
| } | |||
| dim src_dims = {seq_len_, batch_size_, input_size_}; | |||
| dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||
| dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||
| weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_}; | |||
| weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_}; | |||
| bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_}; | |||
| dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_}; | |||
| dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||
| dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; | |||
| dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc); | |||
| dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc); | |||
| dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc); | |||
| dnnl::memory::desc bias_desc = formatted_md(bias_dims_, tag::ldgo); | |||
| dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc); | |||
| dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc); | |||
| dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc); | |||
| auto forward_desc = std::make_shared<dnnl::lstm_forward::desc>( | |||
| dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc, | |||
| formatted_md(weights_dims_, tag::any), formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, dst_h_desc, | |||
| dst_c_desc); | |||
| auto prim_forward_desc = dnnl::lstm_forward::primitive_desc(*forward_desc, eng); | |||
| auto backward_desc = std::make_shared<dnnl::lstm_backward::desc>( | |||
| dnnl::prop_kind::backward, direction, src_desc, src_h_desc, src_c_desc, formatted_md(weights_dims_, tag::any), | |||
| formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, dst_h_desc, dst_c_desc, src_desc, src_h_desc, | |||
| src_c_desc, formatted_md(weights_dims_, tag::any), formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, | |||
| dst_h_desc, dst_c_desc); | |||
| prim_backward_desc_ = dnnl::lstm_backward::primitive_desc(*backward_desc, eng, prim_forward_desc); | |||
| primitive_ = std::make_shared<dnnl::lstm_backward>(prim_backward_desc_); | |||
| AddArgument(DNNL_ARG_SRC_LAYER, src_desc); | |||
| AddArgument(DNNL_ARG_SRC_ITER, src_h_desc); | |||
| AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc); | |||
| AddArgument(DNNL_ARG_WEIGHTS_LAYER, prim_backward_desc_.weights_layer_desc()); | |||
| AddArgument(DNNL_ARG_WEIGHTS_ITER, prim_backward_desc_.weights_iter_desc()); | |||
| AddArgument(DNNL_ARG_BIAS, bias_desc); | |||
| AddArgument(DNNL_ARG_DST_LAYER, dst_desc); | |||
| AddArgument(DNNL_ARG_DST_ITER, dst_h_desc); | |||
| AddArgument(DNNL_ARG_DST_ITER_C, dst_c_desc); | |||
| AddArgument(DNNL_ARG_WORKSPACE, prim_forward_desc.workspace_desc()); | |||
| AddArgument(DNNL_ARG_DIFF_SRC_LAYER, src_desc); | |||
| AddArgument(DNNL_ARG_DIFF_SRC_ITER, src_h_desc); | |||
| AddArgument(DNNL_ARG_DIFF_SRC_ITER_C, src_c_desc); | |||
| AddArgument(DNNL_ARG_DIFF_WEIGHTS_LAYER, prim_backward_desc_.diff_weights_layer_desc()); | |||
| AddArgument(DNNL_ARG_DIFF_WEIGHTS_ITER, prim_backward_desc_.diff_weights_iter_desc()); | |||
| AddArgument(DNNL_ARG_DIFF_BIAS, bias_desc); | |||
| AddArgument(DNNL_ARG_DIFF_DST_LAYER, dst_desc); | |||
| AddArgument(DNNL_ARG_DIFF_DST_ITER, dst_h_desc); | |||
| AddArgument(DNNL_ARG_DIFF_DST_ITER_C, dst_c_desc); | |||
| } | |||
| bool LSTMGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> &workspace /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| using dt = dnnl::memory::data_type; | |||
| using tag = dnnl::memory::format_tag; | |||
| auto eng = MKLKernelEngine::Get().engine(); | |||
| // construct fw memory | |||
| auto user_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng); | |||
| auto user_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng); | |||
| auto weights_memory = dnnl::memory(prim_backward_desc_.weights_layer_desc(), eng); | |||
| auto weights_h_memory = dnnl::memory(prim_backward_desc_.weights_iter_desc(), eng); | |||
| auto bias_memory = dnnl::memory(prim_backward_desc_.bias_desc(), eng); | |||
| user_weights_memory.set_data_handle(inputs[3]->addr); | |||
| user_weights_h_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_); | |||
| Reorder(&user_weights_memory, &weights_memory); | |||
| Reorder(&user_weights_h_memory, &weights_h_memory); | |||
| if (has_bias_) { | |||
| bias_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_); | |||
| } else { | |||
| if (memset_s(bias_memory.get_data_handle(), prim_backward_desc_.bias_desc().get_size(), 0, | |||
| prim_backward_desc_.bias_desc().get_size())) { | |||
| MS_LOG(EXCEPTION) << "bias memset error"; | |||
| } | |||
| } | |||
| // construct bw memory | |||
| auto diff_weights_memory = dnnl::memory(prim_backward_desc_.diff_weights_layer_desc(), eng); | |||
| auto diff_weights_h_memory = dnnl::memory(prim_backward_desc_.diff_weights_iter_desc(), eng); | |||
| auto diff_bias_memory = dnnl::memory(prim_backward_desc_.diff_bias_desc(), eng); | |||
| auto user_diff_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng); | |||
| auto user_diff_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng); | |||
| user_diff_weights_memory.set_data_handle(outputs[3]->addr); | |||
| user_diff_weights_h_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_); | |||
| if (memset_s(user_diff_weights_memory.get_data_handle(), user_diff_weights_memory.get_desc().get_size(), 0, | |||
| user_diff_weights_memory.get_desc().get_size())) { | |||
| MS_LOG(EXCEPTION) << "user weights grad memset error"; | |||
| } | |||
| if (memset_s(user_diff_weights_h_memory.get_data_handle(), user_diff_weights_h_memory.get_desc().get_size(), 0, | |||
| user_diff_weights_h_memory.get_desc().get_size())) { | |||
| MS_LOG(EXCEPTION) << "user weights iter grad memset error"; | |||
| } | |||
| if (has_bias_) { | |||
| diff_bias_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_ + weight_h_size_); | |||
| } | |||
| if (memset_s(diff_bias_memory.get_data_handle(), prim_backward_desc_.diff_bias_desc().get_size(), 0, | |||
| prim_backward_desc_.diff_bias_desc().get_size())) { | |||
| MS_LOG(EXCEPTION) << "bias grad memset error"; | |||
| } | |||
| if (memset_s(diff_weights_memory.get_data_handle(), diff_weights_memory.get_desc().get_size(), 0, | |||
| diff_weights_memory.get_desc().get_size())) { | |||
| MS_LOG(EXCEPTION) << "weights grad memset error"; | |||
| } | |||
| if (memset_s(diff_weights_h_memory.get_data_handle(), diff_weights_h_memory.get_desc().get_size(), 0, | |||
| diff_weights_h_memory.get_desc().get_size())) { | |||
| MS_LOG(EXCEPTION) << "weights iter grad memset error"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC_LAYER, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_ITER, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_ITER_C, inputs[2]->addr); | |||
| SetArgumentHandle(DNNL_ARG_WEIGHTS_LAYER, weights_memory.get_data_handle()); | |||
| SetArgumentHandle(DNNL_ARG_WEIGHTS_ITER, weights_h_memory.get_data_handle()); | |||
| SetArgumentHandle(DNNL_ARG_BIAS, bias_memory.get_data_handle()); | |||
| SetArgumentHandle(DNNL_ARG_DST_LAYER, inputs[4]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST_ITER, inputs[5]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST_ITER_C, inputs[6]->addr); | |||
| SetArgumentHandle(DNNL_ARG_WORKSPACE, inputs[10]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC_LAYER, outputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC_ITER, outputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC_ITER_C, outputs[2]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS_LAYER, diff_weights_memory.get_data_handle()); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_WEIGHTS_ITER, diff_weights_h_memory.get_data_handle()); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_BIAS, diff_bias_memory.get_data_handle()); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_DST_LAYER, inputs[7]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_DST_ITER, inputs[8]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_DST_ITER_C, inputs[9]->addr); | |||
| ExecutePrimitive(); | |||
| Reorder(&diff_weights_memory, &user_diff_weights_memory); | |||
| Reorder(&diff_weights_h_memory, &user_diff_weights_h_memory); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,71 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class LSTMGradCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| LSTMGradCPUKernel() = default; | |||
| ~LSTMGradCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| int weight_size_ = 0; | |||
| int weight_h_size_ = 0; | |||
| int input_size_; | |||
| int hidden_size_; | |||
| int num_layers_; | |||
| int batch_size_; | |||
| int seq_len_; | |||
| int num_directions_; | |||
| bool bidirectional_; | |||
| bool has_bias_; | |||
| dnnl::memory::dims weights_dims_; | |||
| dnnl::memory::dims weights_h_dims_; | |||
| dnnl::memory::dims bias_dims_; | |||
| dnnl::lstm_backward::primitive_desc prim_backward_desc_; | |||
| }; | |||
| MS_REG_CPU_KERNEL(LSTMGrad, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| LSTMGradCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,71 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h" | |||
| #include <algorithm> | |||
| #include <utility> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "common/utils.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void MatMulCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| if (src_shape.size() != 2 || weight_shape.size() != 2 || dst_shape.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "matmul invalid input size"; | |||
| } | |||
| bool trans_a = AnfAlgo::GetNodeAttr<bool>(kernel_node, TRANSPOSE_A); | |||
| bool trans_b = AnfAlgo::GetNodeAttr<bool>(kernel_node, TRANSPOSE_B); | |||
| if (trans_a) { | |||
| trans_a_ = TRANSPOSE_YES; | |||
| dim_m_ = static_cast<dnnl_dim_t>(src_shape[1]); | |||
| dim_k_ = static_cast<dnnl_dim_t>(src_shape[0]); | |||
| } else { | |||
| dim_m_ = static_cast<dnnl_dim_t>(src_shape[0]); | |||
| dim_k_ = static_cast<dnnl_dim_t>(src_shape[1]); | |||
| } | |||
| if (trans_b) { | |||
| trans_b_ = TRANSPOSE_YES; | |||
| } | |||
| dim_n_ = static_cast<dnnl_dim_t>(dst_shape[1]); | |||
| } | |||
| bool MatMulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "matmul error input output size!"; | |||
| } | |||
| dnnl_dim_t lda = dim_m_; | |||
| if (trans_a_ == TRANSPOSE_NO) { | |||
| lda = dim_k_; | |||
| } | |||
| dnnl_dim_t ldb = dim_k_; | |||
| if (trans_b_ == TRANSPOSE_NO) { | |||
| ldb = dim_n_; | |||
| } | |||
| auto input_a = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto input_b = reinterpret_cast<float *>(inputs[1]->addr); | |||
| auto output = reinterpret_cast<float *>(outputs[0]->addr); | |||
| (void)dnnl_sgemm(trans_a_, trans_b_, dim_m_, dim_n_, dim_k_, 1.f, input_a, lda, input_b, ldb, 0.f, output, dim_n_); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,50 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class MatMulCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| MatMulCPUKernel() = default; | |||
| ~MatMulCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| char trans_a_{TRANSPOSE_NO}; | |||
| char trans_b_{TRANSPOSE_NO}; | |||
| dnnl_dim_t dim_m_{0}; | |||
| dnnl_dim_t dim_n_{0}; | |||
| dnnl_dim_t dim_k_{0}; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| MatMul, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| MatMulCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,106 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| #include <vector> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include "common/utils.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, | |||
| const std::vector<size_t> &src_shape, int kernel_size, int stride, | |||
| std::vector<int> *padding_l, std::vector<int> *padding_r) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| if (src_shape.size() < 2) { | |||
| MS_LOG(EXCEPTION) << "set pad only support src dim >= 2!"; | |||
| } | |||
| std::vector<int> weight_height; | |||
| weight_height.emplace_back(src_shape[src_shape.size() - 2]); | |||
| weight_height.emplace_back(src_shape[src_shape.size() - 1]); | |||
| int rad = kernel_size / 2; | |||
| int need_pad = kernel_size - 1; | |||
| MS_LOG(INFO) << "pad mode " << pad_mode; | |||
| if (pad_mode == PAD_MODE_LOWER_SAME || pad_mode == PAD_MODE_UPPER_SAME) { | |||
| for (auto wh : weight_height) { | |||
| int re = (wh - 1) % stride; | |||
| int pad = std::max(rad - (re / 2), 0); | |||
| padding_r->emplace_back(pad); | |||
| pad = std::max(need_pad - pad - re, 0); | |||
| padding_l->emplace_back(pad); | |||
| } | |||
| } else if (pad_mode == PAD_MODE_LOWER_VALID || pad_mode == PAD_MODE_UPPER_VALID) { | |||
| MS_LOG(INFO) << "pad valid"; | |||
| padding_l->emplace_back(0); | |||
| padding_l->emplace_back(0); | |||
| padding_r->emplace_back(0); | |||
| padding_r->emplace_back(0); | |||
| } else { | |||
| std::vector<int> pad = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, PAD); | |||
| if (pad.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "wrong pad size in max pooling " << pad.size(); | |||
| } | |||
| padding_l->emplace_back(pad[0]); | |||
| padding_l->emplace_back(pad[1]); | |||
| padding_r->emplace_back(pad[2]); | |||
| padding_r->emplace_back(pad[3]); | |||
| } | |||
| } | |||
| dnnl::memory::format_tag MKLCPUKernel::GetDefaultFormatTag(const dnnl::memory::dims &dims) const { | |||
| dnnl::memory::format_tag mem_tag; | |||
| auto dim_size = dims.size(); | |||
| if (dim_size == 4) { | |||
| mem_tag = dnnl::memory::format_tag::abcd; | |||
| } else if (dim_size == 3) { | |||
| mem_tag = dnnl::memory::format_tag::abc; | |||
| } else if (dim_size == 2) { | |||
| mem_tag = dnnl::memory::format_tag::ab; | |||
| } else if (dim_size == 1) { | |||
| mem_tag = dnnl::memory::format_tag::a; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "kernel dims invalid " << dim_size; | |||
| } | |||
| return mem_tag; | |||
| } | |||
| dnnl::memory::desc MKLCPUKernel::GetDefaultMemDesc(const std::vector<size_t> &shape) { | |||
| dnnl::memory::dims dims; | |||
| dims.insert(dims.end(), shape.begin(), shape.end()); | |||
| dnnl::memory::format_tag mem_tag = GetDefaultFormatTag(dims); | |||
| dnnl::memory::desc mem_desc(dims, dnnl::memory::data_type::f32, mem_tag); | |||
| return mem_desc; | |||
| } | |||
| void MKLCPUKernel::AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc) { | |||
| arguments_[arg_key] = MKLKernelEngine::Get().CreateMemory(mem_desc, alloc); | |||
| } | |||
| void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) { | |||
| auto arg_iter = arguments_.find(arg_key); | |||
| if (arg_iter != arguments_.end()) { | |||
| arg_iter->second.set_data_handle(ptr); | |||
| } | |||
| } | |||
| void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); } | |||
| void MKLCPUKernel::Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem) { | |||
| MKLKernelEngine::Get().Reorder(src_mem, dst_mem); | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,52 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_ | |||
| #include <string> | |||
| #include <unordered_map> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include "dnnl.hpp" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class MKLCPUKernel : public CPUKernel { | |||
| public: | |||
| MKLCPUKernel() = default; | |||
| ~MKLCPUKernel() override = default; | |||
| protected: | |||
| void GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, const std::vector<size_t> &src_shape, | |||
| int kernel_size, int stride, std::vector<int> *padding_l, std::vector<int> *padding_r); | |||
| void AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc = false); | |||
| void SetArgumentHandle(int arg_key, void *ptr); | |||
| dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const; | |||
| dnnl::memory::desc GetDefaultMemDesc(const std::vector<size_t> &shape); | |||
| void ExecutePrimitive(); | |||
| std::unordered_map<int, dnnl::memory> arguments_; | |||
| std::shared_ptr<dnnl::primitive> primitive_{nullptr}; | |||
| inline dnnl::memory::desc formatted_md(const dnnl::memory::dims &dimensions, dnnl::memory::format_tag layout) { | |||
| return dnnl::memory::desc{{dimensions}, dnnl::memory::data_type::f32, layout}; | |||
| } | |||
| void Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem); | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,40 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "dnnl.hpp" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void MKLKernelEngine::Execute(const std::shared_ptr<dnnl::primitive> &primitive, | |||
| const std::unordered_map<int, dnnl::memory> &arguments) { | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| primitive->execute(stream_, arguments); | |||
| (void)stream_.wait(); | |||
| } | |||
| dnnl::memory MKLKernelEngine::CreateMemory(const dnnl::memory::desc &mem_desc, bool alloc) { | |||
| if (alloc) { | |||
| return dnnl::memory(mem_desc, engine_); | |||
| } else { | |||
| return dnnl::memory(mem_desc, engine_, nullptr); | |||
| } | |||
| } | |||
| void MKLKernelEngine::Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem) { | |||
| dnnl::reorder(*src_mem, *dst_mem).execute(stream_, *src_mem, *dst_mem); | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,61 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "common/utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void MulCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) { | |||
| MS_LOG(EXCEPTION) << "mul only support same dim input or tensor * scalar " << src0_shape.size() << " vs " | |||
| << src1_shape.size(); | |||
| } | |||
| if (src1_shape.size() < src0_shape.size()) { | |||
| for (size_t i = src1_shape.size(); i < src0_shape.size(); ++i) { | |||
| src1_shape.emplace_back(1); | |||
| } | |||
| } | |||
| dnnl::memory::desc src0_mem_desc = GetDefaultMemDesc(src0_shape); | |||
| dnnl::memory::desc src1_mem_desc = GetDefaultMemDesc(src1_shape); | |||
| dnnl::memory::desc dst_mem_desc = GetDefaultMemDesc(dst_shape); | |||
| dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_mul, src0_mem_desc, src1_mem_desc, dst_mem_desc); | |||
| auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||
| primitive_ = std::make_shared<dnnl::binary>(prim_desc); | |||
| AddArgument(DNNL_ARG_SRC_0, src0_mem_desc); | |||
| AddArgument(DNNL_ARG_SRC_1, src1_mem_desc); | |||
| AddArgument(DNNL_ARG_DST, dst_mem_desc); | |||
| } | |||
| bool MulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "mul error input output size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,42 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class MulCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| MulCPUKernel() = default; | |||
| ~MulCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| Mul, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| MulCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,69 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h" | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include "common/utils.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||
| std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE); | |||
| std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES); | |||
| if (kernel_sizes.size() != 4 || strides.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "invalid kernel size " << kernel_sizes.size() << " or stride size " << strides.size(); | |||
| } | |||
| dnnl::memory::dims strides_dims{strides[2], strides[3]}; | |||
| dnnl::memory::dims kernels_dims{kernel_sizes[2], kernel_sizes[3]}; | |||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING); | |||
| std::vector<int> int_padding_l; | |||
| std::vector<int> int_padding_r; | |||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_sizes[3], strides[3], &int_padding_l, &int_padding_r); | |||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "pooling get padding failed"; | |||
| } | |||
| dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]}; | |||
| dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]}; | |||
| dnnl::pooling_forward::desc desc = | |||
| dnnl::pooling_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::pooling_max, src_desc, dst_desc, | |||
| strides_dims, kernels_dims, padding_l, padding_r); | |||
| auto prim_desc = dnnl::pooling_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||
| primitive_ = std::make_shared<dnnl::pooling_forward>(prim_desc); | |||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_DST, dst_desc); | |||
| AddArgument(DNNL_ARG_WORKSPACE, prim_desc.workspace_desc()); | |||
| } | |||
| bool PoolingCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.empty() || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,41 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class PoolingCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| PoolingCPUKernel() = default; | |||
| ~PoolingCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL(MaxPool, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| PoolingCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,124 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h" | |||
| #include <string> | |||
| #include <utility> | |||
| #include <algorithm> | |||
| #include "common/utils.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| src_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| dst_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); | |||
| std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE); | |||
| std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES); | |||
| if (kernel_sizes.size() != 4 || strides.size() != 4 || src_shape_.size() != 4 || dst_shape_.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "pooling grad invalid input size"; | |||
| } | |||
| std::vector<int> padding_r; | |||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING); | |||
| kernel_size_ = kernel_sizes[3]; | |||
| stride_ = strides[3]; | |||
| GetPadding(kernel_node, pad_mode, src_shape_, kernel_size_, stride_, &padding_l_, &padding_r); | |||
| } | |||
| void PoolingGradCPUKernel::RowPoolingGrad(const float *input, float *output, float diff, | |||
| const std::vector<std::pair<size_t, size_t>> &box, | |||
| std::vector<std::pair<size_t, float>> *row_max_pair) { | |||
| float max_value = 0; | |||
| size_t max_index = box[1].second; | |||
| size_t src_width = src_shape_[3]; | |||
| size_t index_start; | |||
| size_t index; | |||
| for (size_t i = box[1].first; i < box[1].second; ++i) { | |||
| if ((*row_max_pair)[i].first == 0) { | |||
| index_start = box[0].first * src_width; | |||
| for (size_t j = box[0].first; j < box[0].second; ++j) { | |||
| index = index_start + i; | |||
| if (input[index] > (*row_max_pair)[i].second || j == box[0].first) { | |||
| (*row_max_pair)[i].second = input[index]; | |||
| (*row_max_pair)[i].first = index; | |||
| } | |||
| index_start += src_width; | |||
| } | |||
| } | |||
| if ((*row_max_pair)[i].second > max_value || max_index == box[1].second) { | |||
| max_value = (*row_max_pair)[i].second; | |||
| max_index = i; | |||
| } | |||
| } | |||
| output[(*row_max_pair)[max_index].first] += diff; | |||
| } | |||
| void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *diff, float *output) { | |||
| int src_width = SizeToInt(src_shape_[3]); | |||
| int src_height = SizeToInt(src_shape_[2]); | |||
| std::vector<std::pair<size_t, float>> row_max_pair(src_shape_[3]); | |||
| std::vector<std::pair<size_t, size_t>> box(2); | |||
| int h_start = -padding_l_[0]; | |||
| size_t diff_index = 0; | |||
| for (size_t h = 0; h < dst_shape_[2]; ++h) { | |||
| box[0].first = IntToSize(std::max(h_start, 0)); | |||
| box[0].second = IntToSize(std::min(h_start + kernel_size_, src_height)); | |||
| for (size_t w = 0; w < src_shape_[3]; ++w) { | |||
| row_max_pair[w].first = 0; | |||
| row_max_pair[w].second = 0; | |||
| } | |||
| int w_start = -padding_l_[1]; | |||
| for (size_t w = 0; w < dst_shape_[3]; ++w) { | |||
| box[1].first = IntToSize(std::max(w_start, 0)); | |||
| box[1].second = IntToSize(std::min(w_start + kernel_size_, src_width)); | |||
| RowPoolingGrad(input, output, diff[diff_index], box, &row_max_pair); | |||
| diff_index += 1; | |||
| w_start += stride_; | |||
| } | |||
| h_start += stride_; | |||
| } | |||
| } | |||
| bool PoolingGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 3 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "pooling grad error input output size!"; | |||
| } | |||
| auto input = reinterpret_cast<float *>(inputs[0]->addr); | |||
| auto diff = reinterpret_cast<float *>(inputs[2]->addr); | |||
| auto output = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto ret = memset_s(output, outputs[0]->size, 0, outputs[0]->size); | |||
| if (ret != 0) { | |||
| MS_LOG(EXCEPTION) << "pooling grad memset error"; | |||
| } | |||
| size_t src_wh = src_shape_[2] * src_shape_[3]; | |||
| size_t dst_wh = dst_shape_[2] * dst_shape_[3]; | |||
| for (size_t n = 0; n < src_shape_[0]; ++n) { | |||
| for (size_t c = 0; c < src_shape_[1]; ++c) { | |||
| ChannelPoolingGrad(input, diff, output); | |||
| input = input + src_wh; | |||
| output = output + src_wh; | |||
| diff = diff + dst_wh; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,56 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <utility> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class PoolingGradCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| PoolingGradCPUKernel() = default; | |||
| ~PoolingGradCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| private: | |||
| void RowPoolingGrad(const float *input, float *output, float diff, const std::vector<std::pair<size_t, size_t>> &box, | |||
| std::vector<std::pair<size_t, float>> *row_max_pair); | |||
| void ChannelPoolingGrad(const float *input, const float *diff, float *output); | |||
| int stride_{0}, kernel_size_{0}; | |||
| std::vector<int> padding_l_; | |||
| std::vector<size_t> src_shape_; | |||
| std::vector<size_t> dst_shape_; | |||
| }; | |||
| MS_REG_CPU_KERNEL(MaxPoolGrad, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| PoolingGradCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,52 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "common/utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void ReluCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (src_shape.size() != 4 && src_shape.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "relu kernel dims invalid " << src_shape.size(); | |||
| } | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::eltwise_forward::desc desc = | |||
| dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0); | |||
| auto prim_desc = dnnl::eltwise_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||
| primitive_ = std::make_shared<dnnl::eltwise_forward>(prim_desc); | |||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_DST, src_desc); | |||
| } | |||
| bool ReluCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.empty() || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "error input output size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,40 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class ReluCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| ReluCPUKernel() = default; | |||
| ~ReluCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), ReluCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,69 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "common/utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void ReluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| if (src_shape.size() != 4 && src_shape.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "relu grad kernel dims invalid " << src_shape.size(); | |||
| } | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::eltwise_forward::desc forward_desc = | |||
| dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0); | |||
| auto forward_prim_desc = dnnl::eltwise_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine()); | |||
| dnnl::eltwise_backward::desc backward_desc = | |||
| dnnl::eltwise_backward::desc(dnnl::algorithm::eltwise_relu, src_desc, src_desc, 0.0, 0.0); | |||
| auto backward_prim_desc = | |||
| dnnl::eltwise_backward::primitive_desc(backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc); | |||
| primitive_ = std::make_shared<dnnl::eltwise_backward>(backward_prim_desc); | |||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_DIFF_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_DIFF_DST, src_desc); | |||
| } | |||
| bool ReluGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.size() < 2 || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "relu grad error input output size!"; | |||
| } | |||
| if (inputs[0]->size != outputs[0]->size) { | |||
| MS_LOG(EXCEPTION) << "relu grad error input output data size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| size_t mem_bits = outputs[0]->size; | |||
| auto ret = memcpy_s(outputs[0]->addr, mem_bits, inputs[0]->addr, mem_bits); | |||
| if (ret != 0) { | |||
| MS_LOG(EXCEPTION) << "memcpy_s error, errorno " << ret; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,43 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class ReluGradCPUKernel : public MKLCPUKernel { | |||
| public: | |||
| ReluGradCPUKernel() = default; | |||
| ~ReluGradCPUKernel() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| }; | |||
| MS_REG_CPU_KERNEL( | |||
| ReluGrad, | |||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| ReluGradCPUKernel); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_ | |||
| @@ -0,0 +1,54 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h" | |||
| #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" | |||
| #include "runtime/device/cpu/cpu_device_address.h" | |||
| #include "common/utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void SoftmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); | |||
| std::vector<int> axis_list = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, AXIS); | |||
| if (axis_list.size() != 1) { | |||
| MS_LOG(EXCEPTION) << "cpu softmax only support input axis size 1"; | |||
| } | |||
| int axis = axis_list[0]; | |||
| if (axis == -1 || axis >= SizeToInt(src_shape.size())) { | |||
| axis = SizeToInt(src_shape.size()) - 1; | |||
| } | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::softmax_forward::desc desc = dnnl::softmax_forward::desc(dnnl::prop_kind::forward_training, src_desc, axis); | |||
| auto prim_desc = dnnl::softmax_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); | |||
| primitive_ = std::make_shared<dnnl::softmax_forward>(prim_desc); | |||
| AddArgument(DNNL_ARG_SRC, src_desc); | |||
| AddArgument(DNNL_ARG_DST, src_desc); | |||
| } | |||
| bool SoftmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, | |||
| const std::vector<kernel::AddressPtr> & /*workspace*/, | |||
| const std::vector<kernel::AddressPtr> &outputs) { | |||
| if (inputs.empty() || outputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "softmax error input output size!"; | |||
| } | |||
| SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); | |||
| SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); | |||
| ExecutePrimitive(); | |||
| return true; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||