diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c5901b1..acef6127 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,133 +1,55 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - cmake_minimum_required(VERSION 3.14) project (GraphEngine[CXX]) -set(CMAKE_CXX_STANDARD 17) -add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) -set(GE_SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}) -set(GE_PROTO_DIR ${GE_SOURCE_DIR}/) +set(GE_CODE_DIR ${CMAKE_CURRENT_LIST_DIR}) if (NOT BUILD_PATH) set(BUILD_PATH "${CMAKE_SOURCE_DIR}/build") endif() -# architecture: aarch64 or x86_64 -message(STATUS "System architecture: ${CMAKE_HOST_SYSTEM_PROCESSOR}") -# system: euleros or ubuntu -if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - execute_process( - COMMAND bash "-c" "cat /etc/os-release | grep ^ID= | awk -F '=' '{print $2}'" - OUTPUT_VARIABLE SYSTEM_TYPE - ) - MESSAGE(STATUS "System type: ${SYSTEM_TYPE}.") -endif() -# download json headers, rather than whole repository -include(${GE_SOURCE_DIR}/cmake/ge_utils.cmake) -include(${GE_SOURCE_DIR}/cmake/external_libs/json.cmake) -include(${GE_SOURCE_DIR}/cmake/external_libs/eigen.cmake) -include(${GE_SOURCE_DIR}/cmake/external_libs/gtest.cmake) -include(${GE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake) -include(${GE_SOURCE_DIR}/cmake/external_libs/onnx.cmake) -include(${GE_SOURCE_DIR}/cmake/external_libs/securec.cmake) -set(CMAKE_SKIP_RPATH TRUE) +option(ENABLE_OPEN_SRC "Enable graphengine compile in opensource." FALSE) + +if (ENABLE_OPEN_SRC) + set(HI_PYTHON python3.7) + + include(cmake/external_libs/protobuf_shared.cmake) + include(cmake/external_libs/protoc.cmake) + include(cmake/external_libs/gflags.cmake) + include(cmake/external_libs/securec.cmake) + include(cmake/external_libs/json.cmake) + include(cmake/FindModule.cmake) + include(cmake/intf_pub_linux.cmake) -# for CPU/GPU mode, find c_sec and slog from local prebuild -if(NOT ENABLE_D AND NOT GE_ONLY) - set(GE_PREBUILD_PATH ${GE_SOURCE_DIR}/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}) - find_library(slog libslog.so ${GE_PREBUILD_PATH}) -# if D_LINK_PATH is set in environment variables, search libraries in given path -elseif(DEFINED ENV{D_LINK_PATH}) - # D_LINK_PATH is set - set(GE_LIB_PATH $ENV{D_LINK_PATH}) - set(GE_SYS_ARCH "") - if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") - # x86 ubuntu - set(GE_SYS_ARCH "x86_64") - elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") - # arm euleros - set(GE_SYS_ARCH "aarch64") - else() - message(FATAL_ERROR "Running on a unsupported architecture: ${SYSTEM_TYPE}, build terminated") - endif() - set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH}) - find_library(slog libslog.so ${GE_LIB_PATH}) - find_library(mmpa libmmpa.so ${GE_LIB_PATH}) - find_library(runtime libruntime.so ${GE_LIB_PATH}) - find_library(msprof libmsprof.so ${GE_LIB_PATH}) - find_library(register libregister.so ${GE_LIB_PATH}) - find_library(hccl libhccl.so ${GE_LIB_PATH}) - find_library(resource libresource.so ${GE_LIB_PATH}) - find_library(error_manager liberror_manager.so ${GE_LIB_PATH}) -else() - # Ascend mode if(DEFINED ENV{ASCEND_CUSTOM_PATH}) set(ASCEND_DIR $ENV{ASCEND_CUSTOM_PATH}) else() set(ASCEND_DIR /usr/local/Ascend) endif() + set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64/common) set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64) - find_library(slog libslog.so ${ASCEND_DRIVER_DIR}) - find_library(mmpa libmmpa.so ${ASCEND_DRIVER_DIR}) - find_library(msprof libmsprof.so ${ASCEND_DRIVER_DIR}) - - find_library(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) - find_library(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) - find_library(register libregister.so ${ASCEND_RUNTIME_DIR}) - find_library(resource libresource.so ${ASCEND_RUNTIME_DIR}) - find_library(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) -endif() - -# add compile flags -if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") - message("Build in Debug mode") - set(CMAKE_C_FLAGS "-O0 -g -Wall -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe -fPIC ${CMAKE_C_FLAGS}") - set(CMAKE_CXX_FLAGS "-O0 -g -Wall -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe -fPIC ${CMAKE_CXX_FLAGS}") - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -rdynamic") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic") - endif() + find_module(slog libslog.so ${ASCEND_DRIVER_DIR}) + find_module(mmpa libmmpa.so ${ASCEND_DRIVER_DIR}) + find_module(msprof libmsprof.so ${ASCEND_DRIVER_DIR}) + find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) + find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) + find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) + find_module(runtime_compile libruntime_compile.so ${ASCEND_RUNTIME_DIR}) + find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) + find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) + find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) + + set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) + set(PARSER_DIR ${CMAKE_CURRENT_LIST_DIR}/parser) + set(GE_DEPEND_DIR ${CMAKE_CURRENT_LIST_DIR}/..) + + add_subdirectory(metadef) + #add_subdirectory(metadef/graph) + #add_subdirectory(metadef/register) else() - set(CMAKE_C_FLAGS "-O2 -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe ${CMAKE_C_FLAGS}") - set(CMAKE_CXX_FLAGS "-O2 -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe ${CMAKE_CXX_FLAGS}") -endif () - -# force __FILE__ to show relative path of file, from source directory, as cmake project makes __FILE__ absolute directory -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__FILE__='\"$(subst $(realpath ${CMAKE_SOURCE_DIR})/,,$(abspath $<))\"' -Wno-builtin-macro-redefined") - -# compile libraries from following directories -# libgraph is compiled in any situation -add_subdirectory(${GE_SOURCE_DIR}/metadef/graph) -if(ENABLE_D) - # if MindSpore compiles in D mode, compile the following libraries - add_subdirectory(${GE_SOURCE_DIR}/ge/common) - add_subdirectory(${GE_SOURCE_DIR}/ge/ge_runtime) -elseif(GE_ONLY) - # standalone GraphEngine compiles all following libraries - add_subdirectory(${GE_SOURCE_DIR}/ge/common) - add_subdirectory(${GE_SOURCE_DIR}/ge/ge_runtime) - add_subdirectory(${GE_SOURCE_DIR}/ge/ge_local_engine) - add_subdirectory(${GE_SOURCE_DIR}/ge/graph/build/memory) - add_subdirectory(${GE_SOURCE_DIR}/ge/) - add_subdirectory(${GE_SOURCE_DIR}/ge/plugin/engine) + set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/../metadef) + set(PARSER_DIR ${CMAKE_CURRENT_LIST_DIR}/../parser) + set(GE_DEPEND_DIR ${CMAKE_CURRENT_LIST_DIR}/..) endif() -# if (ENABLE_GE_COV OR ENABLE_GE_UT OR ENABLE_GE_ST) -# add_subdirectory(tests) -# endif() - +add_subdirectory(ge) diff --git a/cmake/FindModule.cmake b/cmake/FindModule.cmake new file mode 100644 index 00000000..74a63634 --- /dev/null +++ b/cmake/FindModule.cmake @@ -0,0 +1,23 @@ +#[[ + module - the name of export imported target + name - find the library name + path - find the library path +#]] +function(find_module module name path) + if (TARGET ${module}) + return() + endif() + find_library(${module}_LIBRARY_DIR NAMES ${name} NAMES_PER_DIR PATHS ${path} + PATH_SUFFIXES lib + ) + + message(STATUS "find ${name} location ${${module}_LIBRARY_DIR}") + if ("${${module}_LIBRARY_DIR}" STREQUAL "${module}_LIBRARY_DIR-NOTFOUND") + message(FATAL_ERROR "${name} not found in ${path}") + endif() + + add_library(${module} SHARED IMPORTED) + set_target_properties(${module} PROPERTIES + IMPORTED_LOCATION ${${module}_LIBRARY_DIR} + ) +endfunction() diff --git a/cmake/external_libs/eigen.cmake b/cmake/external_libs/eigen.cmake deleted file mode 100644 index b43e70b4..00000000 --- a/cmake/external_libs/eigen.cmake +++ /dev/null @@ -1,13 +0,0 @@ -set(Eigen3_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2") -set(Eigen3_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") -set(Eigen3_NS "ge_") -graphengine_add_pkg(Eigen3 - VER 3.3.7 - URL https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz - MD5 9e30f67e8531477de4117506fe44669b - CMAKE_OPTION -DBUILD_TESTING=OFF) - -find_package(Eigen3 3.3.7 REQUIRED ${GE_FIND_NO_DEFAULT_PATH}) -set_property(TARGET Eigen3::Eigen PROPERTY IMPORTED_GLOBAL TRUE) -add_library(graphengine::eigen ALIAS Eigen3::Eigen) -include_directories(${EIGEN3_INCLUDE_DIRS}) diff --git a/cmake/external_libs/gflags.cmake b/cmake/external_libs/gflags.cmake new file mode 100755 index 00000000..70cecb10 --- /dev/null +++ b/cmake/external_libs/gflags.cmake @@ -0,0 +1,38 @@ +if (HAVE_GFLAGS) + return() +endif() + +include(ExternalProject) +#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) + +if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR + (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) + set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) + message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") +endif() + +ExternalProject_Add(gflags_build + #URL http://tfk.inhuawei.com/api/containers/container1/download/protobuf-3.8.0.tar.gz + #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz + SOURCE_DIR ${GE_CODE_DIR}/../third_party/gflags/src/gflags-2.2.2 + CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags + BUILD_COMMAND $(MAKE) + INSTALL_COMMAND $(MAKE) install + EXCLUDE_FROM_ALL TRUE +) + +set(GFLAGS_PKG_DIR ${CMAKE_INSTALL_PREFIX}/gflags) + +add_library(gflags_static STATIC IMPORTED) + +set_target_properties(gflags_static PROPERTIES + IMPORTED_LOCATION ${GFLAGS_PKG_DIR}/lib/libgflags.a +) + +add_library(gflags INTERFACE) +target_include_directories(gflags INTERFACE ${GFLAGS_PKG_DIR}/include) +target_link_libraries(gflags INTERFACE gflags_static) + +add_dependencies(gflags gflags_build) + +set(HAVE_GFLAGS TRUE CACHE BOOL "gflags build add") diff --git a/cmake/external_libs/gtest.cmake b/cmake/external_libs/gtest.cmake deleted file mode 100644 index e6d1ab1d..00000000 --- a/cmake/external_libs/gtest.cmake +++ /dev/null @@ -1,16 +0,0 @@ -set(ge_gtest_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") -set(ge_gtest_CFLAGS "-D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") - -graphengine_add_pkg(ge_gtest - VER 1.8.0 - LIBS gtest gtest_main - URL https://github.com/google/googletest/archive/release-1.8.0.tar.gz - MD5 16877098823401d1bf2ed7891d7dce36 - CMAKE_OPTION -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON - -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON) - -add_library(graphengine::gtest ALIAS ge_gtest::gtest) -add_library(graphengine::gtest_main ALIAS ge_gtest::gtest_main) -include_directories(${ge_gtest_INC}) -file(COPY ${ge_gtest_INC}/../lib/libgtest.so DESTINATION ${CMAKE_SOURCE_DIR}/build/graphengine) -file(COPY ${ge_gtest_INC}/../lib/libgtest_main.so DESTINATION ${CMAKE_SOURCE_DIR}/build/graphengine) diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake old mode 100644 new mode 100755 index 4b9fa4e3..6d8d6def --- a/cmake/external_libs/json.cmake +++ b/cmake/external_libs/json.cmake @@ -1,9 +1,23 @@ -set(nlohmann_json_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2") -set(nlohmann_json_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") -graphengine_add_pkg(ge_nlohmann_json - VER 3.6.1 - HEAD_ONLY ./ - URL https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip - MD5 0dc903888211db3a0f170304cd9f3a89) -include_directories(${ge_nlohmann_json_INC}) -add_library(graphengine::json ALIAS ge_nlohmann_json) \ No newline at end of file +if (HAVE_JSON) + return() +endif() + +include(ExternalProject) + +set(JSON_SRC_DIR ${GE_CODE_DIR}/../third_party/json/include) +ExternalProject_Add(json_build + #URL https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip + #URL /home/txd/workspace/cloud_code/pkg/include.zip + SOURCE_DIR ${JSON_SRC_DIR} + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + EXCLUDE_FROM_ALL TRUE +) + + +add_library(json INTERFACE) +target_include_directories(json INTERFACE ${JSON_SRC_DIR}) +add_dependencies(json json_build) + +set(HAVE_JSON TRUE CACHE BOOL "json build add") diff --git a/cmake/external_libs/onnx.cmake b/cmake/external_libs/onnx.cmake old mode 100644 new mode 100755 index 621f67c6..49d40f8e --- a/cmake/external_libs/onnx.cmake +++ b/cmake/external_libs/onnx.cmake @@ -1,5 +1,29 @@ -graphengine_add_pkg(onnx - VER 1.6.0 - HEAD_ONLY ./ - URL https://github.com/onnx/onnx/releases/download/v1.6.0/onnx-1.6.0.tar.gz - MD5 512f2779d6215d4a36f366b6b9acdf1e) \ No newline at end of file +include(ExternalProject) + +#set(ONNX_SRC_DIR /home/txd/workspace/cloud_code/graphengine/build/graphengine/open_source/onnx) +#set(ONNX_PROTO ${ONNX_SRC_DIR}/onnx/onnx.proto) +set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx) +set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto) +file(MAKE_DIRECTORY ${ONNX_PROTO_DIR}) + +ExternalProject_Add(onnx + #URL https://github.com/onnx/onnx/releases/download/v1.6.0/onnx-1.6.0.tar.gz + URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz + #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345 + #SOURCE_DIR ${ONNX_SRC_DIR} + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + #INSTALL_COMMAND "" + INSTALL_COMMAND ${CMAKE_COMMAND} -E copy /onnx/onnx.proto ${ONNX_PROTO_FILE} + #BUILD_ALWAYS TRUE + EXCLUDE_FROM_ALL TRUE +) + +macro(onnx_protobuf_generate comp c_var h_var) + add_custom_command(OUTPUT ${ONNX_PROTO_FILE} + DEPENDS onnx + ) + ge_protobuf_generate(${comp} ${c_var} ${h_var} ${ONNX_PROTO_FILE}) +endmacro() + + diff --git a/cmake/external_libs/protobuf.cmake b/cmake/external_libs/protobuf.cmake deleted file mode 100644 index bbd86bc4..00000000 --- a/cmake/external_libs/protobuf.cmake +++ /dev/null @@ -1,54 +0,0 @@ -if (NOT TARGET protobuf::protobuf) -set(protobuf_USE_STATIC_LIBS ON) -set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -O2") -set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") -set(_ge_tmp_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) -string(REPLACE " -Wall" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -string(REPLACE " -Werror" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -graphengine_add_pkg(protobuf - VER 3.8.0 - LIBS protobuf - EXE protoc - URL https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz - MD5 3d9e32700639618a4d2d342c99d4507a - CMAKE_PATH ../cmake/ - CMAKE_OPTION -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_SHARED_LIBS=OFF) -set(CMAKE_CXX_FLAGS ${_ge_tmp_CMAKE_CXX_FLAGS}) -endif() -add_library(graphengine::protobuf ALIAS protobuf::protobuf) -set(PROTOBUF_LIBRARY protobuf::protobuf) -include_directories(${protobuf_INC}) -include_directories(${protobuf_DIRPATH}/src) - -function(ge_protobuf_generate comp c_var h_var) - if(NOT ARGN) - message(SEND_ERROR "Error: ge_protobuf_generate() called without any proto files") - return() - endif() - - set(${c_var}) - set(${h_var}) - - foreach(file ${ARGN}) - get_filename_component(abs_file ${file} ABSOLUTE) - get_filename_component(file_name ${file} NAME_WE) - get_filename_component(file_dir ${abs_file} PATH) - - list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${comp}/proto/${file_name}.pb.cc") - list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${comp}/proto/${file_name}.pb.h") - - add_custom_command( - OUTPUT "${CMAKE_BINARY_DIR}/proto/${comp}/proto/${file_name}.pb.cc" - "${CMAKE_BINARY_DIR}/proto/${comp}/proto/${file_name}.pb.h" - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto/${comp}/proto" - COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto/${comp}/proto ${abs_file} - DEPENDS protobuf::protoc ${abs_file} - COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) - endforeach() - - set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) - set(${c_var} ${${c_var}} PARENT_SCOPE) - set(${h_var} ${${h_var}} PARENT_SCOPE) - -endfunction() diff --git a/cmake/external_libs/protobuf_shared.cmake b/cmake/external_libs/protobuf_shared.cmake new file mode 100755 index 00000000..92a5e146 --- /dev/null +++ b/cmake/external_libs/protobuf_shared.cmake @@ -0,0 +1,59 @@ +if (HAVE_PROTOBUF) + return() +endif() + +include(ExternalProject) +include(GNUInstallDirs) + +if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR + (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) + set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) + message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") +endif() + +set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 $<$:-D_GLIBCXX_USE_CXX11_ABI=0> -O2") +set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") +ExternalProject_Add(protobuf_build + #URL http://tfk.inhuawei.com/api/containers/container1/download/protobuf-3.8.0.tar.gz + #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz + #SOURCE_DIR ${GE_CODE_DIR}/third_party/protobuf/src/protobuf-3.8.0 + DOWNLOAD_COMMAND ${CMAKE_COMMAND} -E copy_directory ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 + #CONFIGURE_COMMAND ${CMAKE_COMMAND} + #-DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR} + #-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + #-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + #-DCMAKE_LINKER=${CMAKE_LINKER} + #-DCMAKE_AR=${CMAKE_AR} + #-DCMAKE_RANLIB=${CMAKE_RANLIB} + #-Dprotobuf_WITH_ZLIB=OFF + #-Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protobuf /cmake + CONFIGURE_COMMAND cd + && ./autogen.sh && cd && /configure --prefix=${CMAKE_INSTALL_PREFIX}/protobuf --with-zlib=no CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} CXXFLAGS=${protobuf_CXXFLAGS} LDFLAGS=${protobuf_LDFLAGS} + && bash -c "sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=\"\"|g' libtool && sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool" + BUILD_COMMAND $(MAKE) + INSTALL_COMMAND $(MAKE) install + EXCLUDE_FROM_ALL TRUE +) +include(GNUInstallDirs) + +set(PROTOBUF_SHARED_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protobuf) + +add_library(protobuf SHARED IMPORTED) + +file(MAKE_DIRECTORY ${PROTOBUF_SHARED_PKG_DIR}/include) + +set_target_properties(protobuf PROPERTIES + IMPORTED_LOCATION ${PROTOBUF_SHARED_PKG_DIR}/lib/libprotobuf.so +) + +target_include_directories(protobuf INTERFACE ${PROTOBUF_SHARED_PKG_DIR}/include) + +set(INSTALL_BASE_DIR "") +set(INSTALL_LIBRARY_DIR lib) + +install(FILES ${PROTOBUF_SHARED_PKG_DIR}/lib/libprotobuf.so ${PROTOBUF_SHARED_PKG_DIR}/lib/libprotobuf.so.19.0.0 OPTIONAL + DESTINATION ${INSTALL_LIBRARY_DIR}) + +add_dependencies(protobuf protobuf_build) + +set(HAVE_PROTOBUF TRUE CACHE BOOL "protobuf build add") diff --git a/cmake/external_libs/protobuf_static.cmake b/cmake/external_libs/protobuf_static.cmake new file mode 100755 index 00000000..86baef22 --- /dev/null +++ b/cmake/external_libs/protobuf_static.cmake @@ -0,0 +1,43 @@ +include(ExternalProject) +include(GNUInstallDirs) +#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) + +if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR + (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) + set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) + message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") +endif() + +set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 $<$:-D_GLIBCXX_USE_CXX11_ABI=0> -O2") +set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") +set(PROTOBUF_STATIC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protobuf_static) +ExternalProject_Add(protobuf_static_build + #URL http://tfk.inhuawei.com/api/containers/container1/download/protobuf-3.8.0.tar.gz + #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz + SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 + CONFIGURE_COMMAND ${CMAKE_COMMAND} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR} + -DCMAKE_LINKER=${CMAKE_LINKER} + -DCMAKE_AR=${CMAKE_AR} + -DCMAKE_RANLIB=${CMAKE_RANLIB} + -Dprotobuf_WITH_ZLIB=OFF + -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_STATIC_PKG_DIR} /cmake + BUILD_COMMAND $(MAKE) + INSTALL_COMMAND $(MAKE) install + EXCLUDE_FROM_ALL TRUE +) +include(GNUInstallDirs) + +add_library(protobuf_static_lib STATIC IMPORTED) + +set_target_properties(protobuf_static_lib PROPERTIES + IMPORTED_LOCATION ${PROTOBUF_STATIC_PKG_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobuf.a +) + +add_library(protobuf_static INTERFACE) +target_include_directories(protobuf_static INTERFACE ${PROTOBUF_STATIC_PKG_DIR}/include) +target_link_libraries(protobuf_static INTERFACE protobuf_static_lib) + +add_dependencies(protobuf_static protobuf_static_build) diff --git a/cmake/external_libs/protoc.cmake b/cmake/external_libs/protoc.cmake new file mode 100755 index 00000000..61098b27 --- /dev/null +++ b/cmake/external_libs/protoc.cmake @@ -0,0 +1,102 @@ +if (HAVE_PROTOC) + return() +endif() + +include(ExternalProject) +include(GNUInstallDirs) +#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output) + +if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR + (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) + set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) + message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") +endif() + +set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2") +set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") +ExternalProject_Add(protoc_build + #URL http://tfk.inhuawei.com/api/containers/container1/download/protobuf-3.8.0.tar.gz + #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz + SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 + CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc /cmake + BUILD_COMMAND $(MAKE) + INSTALL_COMMAND $(MAKE) install + EXCLUDE_FROM_ALL TRUE +) + +set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc) + +set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc) + +function(protobuf_generate comp c_var h_var) + if(NOT ARGN) + message(SEND_ERROR "Error: protobuf_generate() called without any proto files") + return() + endif() + set(${c_var}) + set(${h_var}) + + foreach(file ${ARGN}) + get_filename_component(abs_file ${file} ABSOLUTE) + get_filename_component(file_name ${file} NAME_WE) + get_filename_component(file_dir ${abs_file} PATH) + get_filename_component(parent_subdir ${file_dir} NAME) + + if("${parent_subdir}" STREQUAL "proto") + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) + else() + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) + endif() + list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc") + list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h") + + add_custom_command( + OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h" + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" + COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file} + DEPENDS protoc_build ${abs_file} + COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM ) + endforeach() + + set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE) + set(${c_var} ${${c_var}} PARENT_SCOPE) + set(${h_var} ${${h_var}} PARENT_SCOPE) + +endfunction() + +function(protobuf_generate_py comp py_var) + if(NOT ARGN) + message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files") + return() + endif() + set(${py_var}) + + foreach(file ${ARGN}) + get_filename_component(abs_file ${file} ABSOLUTE) + get_filename_component(file_name ${file} NAME_WE) + get_filename_component(file_dir ${abs_file} PATH) + get_filename_component(parent_subdir ${file_dir} NAME) + + if("${parent_subdir}" STREQUAL "proto") + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto) + else() + set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir}) + endif() + list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py") + + add_custom_command( + OUTPUT "${proto_output_path}/${file_name}_pb2.py" + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}" + COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file} + DEPENDS protoc_build ${abs_file} + COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM ) + endforeach() + + set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE) + set(${py_var} ${${py_var}} PARENT_SCOPE) + +endfunction() + +set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add") diff --git a/cmake/external_libs/securec.cmake b/cmake/external_libs/securec.cmake old mode 100644 new mode 100755 index 83a4409d..496f1332 --- a/cmake/external_libs/securec.cmake +++ b/cmake/external_libs/securec.cmake @@ -1,11 +1,60 @@ -graphengine_add_pkg(securec - VER 1.1.10 - URL https://gitee.com/openeuler/bounds_checking_function/repository/archive/v1.1.10.tar.gz - MD5 0782dd2351fde6920d31a599b23d8c91 - LIBS c_sec - PATCHES ${GE_SOURCE_DIR}/third_party/patch/securec/securec.patch001 - CMAKE_OPTION "-DCMAKE_BUILD_TYPE=Release" - ) -include_directories(${securec_INC}) -file(COPY ${securec_INC}/../lib/libc_sec.so DESTINATION ${CMAKE_SOURCE_DIR}/build/graphengine) -add_library(graphengine::securec ALIAS securec::c_sec) \ No newline at end of file +if (HAVE_C_SEC) + return() +endif() + +include(ExternalProject) + +if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR + (${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend")) + set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE) + message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") +endif() + +ExternalProject_Add(c_sec_build + #URL http://tfk.inhuawei.com/api/containers/container1/download/protobuf-3.8.0.tar.gz + #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz + SOURCE_DIR ${GE_CODE_DIR}/../libc_sec + CONFIGURE_COMMAND ${CMAKE_COMMAND} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_LINKER=${CMAKE_LINKER} + -DCMAKE_AR=${CMAKE_AR} + -DCMAKE_RANLIB=${CMAKE_RANLIB} + -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/c_sec + BUILD_COMMAND $(MAKE) + INSTALL_COMMAND $(MAKE) install + EXCLUDE_FROM_ALL TRUE +) + +set(C_SEC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/c_sec) + +add_library(c_sec SHARED IMPORTED) + +file(MAKE_DIRECTORY ${C_SEC_PKG_DIR}/include) + +set_target_properties(c_sec PROPERTIES + IMPORTED_LOCATION ${C_SEC_PKG_DIR}/lib/libc_sec.so +) + +target_include_directories(c_sec INTERFACE ${C_SEC_PKG_DIR}/include) + +add_dependencies(c_sec c_sec_build) + +set(INSTALL_BASE_DIR "") +set(INSTALL_LIBRARY_DIR lib) + +install(FILES ${C_SEC_PKG_DIR}/lib/libc_sec.so OPTIONAL + DESTINATION ${INSTALL_LIBRARY_DIR}) + +add_library(c_sec_static_lib STATIC IMPORTED) +set_target_properties(c_sec_static_lib PROPERTIES + IMPORTED_LOCATION ${C_SEC_PKG_DIR}/lib/libc_sec.a +) + +add_library(c_sec_static INTERFACE) +target_include_directories(c_sec_static INTERFACE ${C_SEC_PKG_DIR}/include) +target_link_libraries(c_sec_static INTERFACE c_sec_static_lib) + +add_dependencies(c_sec_static c_sec_build) + +set(HAVE_C_SEC TRUE CACHE BOOL "c_sec build add") diff --git a/cmake/ge_utils.cmake b/cmake/ge_utils.cmake deleted file mode 100644 index 75480ded..00000000 --- a/cmake/ge_utils.cmake +++ /dev/null @@ -1,371 +0,0 @@ -include(FetchContent) -set(FETCHCONTENT_QUIET OFF) - -function(graphengine_add_submodule_obj des_submodule_objs sub_dir submodule_name_obj) - - add_subdirectory(${sub_dir}) - - if(NOT TARGET ${submodule_name_obj}) - message(FATAL_ERROR "Can not find submodule '${submodule_name_obj}'. in ${CMAKE_CURRENT_LIST_FILE}") - endif() - if("$" IN_LIST ${des_submodule_objs}) - message(FATAL_ERROR "submodule '${submodule_name_obj}' added more than once. in ${CMAKE_CURRENT_LIST_FILE}") - endif() - - set(${des_submodule_objs} ${${des_submodule_objs}} $ PARENT_SCOPE) - -endfunction() - -if (DEFINED ENV{MSLIBS_CACHE_PATH}) - set(_MS_LIB_CACHE $ENV{MSLIBS_CACHE_PATH}) -else() - set(_MS_LIB_CACHE ${CMAKE_BINARY_DIR}/.mslib) -endif () -message("MS LIBS CACHE PATH: ${_MS_LIB_CACHE}") - -if (NOT EXISTS ${_MS_LIB_CACHE}) - file(MAKE_DIRECTORY ${_MS_LIB_CACHE}) -endif () - -if (DEFINED ENV{MSLIBS_SERVER}) - set(LOCAL_LIBS_SERVER $ENV{MSLIBS_SERVER}) - message("LOCAL_LIBS_SERVER: ${LOCAL_LIBS_SERVER}") -endif () - -include(ProcessorCount) -ProcessorCount(N) -if (JOBS) - set(THNUM ${JOBS}) -else() - set(JOBS 8) - if (${JOBS} GREATER ${N}) - set(THNUM ${N}) - endif() -endif () -message("set make thread num: ${THNUM}") - -if(LOCAL_LIBS_SERVER) - if (NOT ENV{no_proxy}) - set(ENV{no_proxy} "${LOCAL_LIBS_SERVER}") - else() - string(FIND $ENV{no_proxy} ${LOCAL_LIBS_SERVER} IP_POS) - if (${IP_POS} EQUAL -1) - set(ENV{no_proxy} "$ENV{no_proxy},${LOCAL_LIBS_SERVER}") - endif () - endif () -endif() - -function(__download_pkg pkg_name pkg_url pkg_md5) - - if(LOCAL_LIBS_SERVER) - get_filename_component(_URL_FILE_NAME ${pkg_url} NAME) - set(pkg_url "http://${LOCAL_LIBS_SERVER}:8081/libs/${pkg_name}/${_URL_FILE_NAME}" ${pkg_url}) - endif() - - FetchContent_Declare( - ${pkg_name} - URL ${pkg_url} - URL_HASH MD5=${pkg_md5} - ) - FetchContent_GetProperties(${pkg_name}) - message("download: ${${pkg_name}_SOURCE_DIR} , ${pkg_name} , ${pkg_url}") - if(NOT ${pkg_name}_POPULATED) - FetchContent_Populate(${pkg_name}) - set(${pkg_name}_SOURCE_DIR ${${pkg_name}_SOURCE_DIR} PARENT_SCOPE) - endif() - -endfunction() - -function(__download_pkg_with_git pkg_name pkg_url pkg_git_commit pkg_md5) - - if(LOCAL_LIBS_SERVER) - set(pkg_url "http://${LOCAL_LIBS_SERVER}:8081/libs/${pkg_name}/${pkg_git_commit}") - FetchContent_Declare( - ${pkg_name} - URL ${pkg_url} - URL_HASH MD5=${pkg_md5} - ) - else() - FetchContent_Declare( - ${pkg_name} - GIT_REPOSITORY ${pkg_url} - GIT_TAG ${pkg_git_commit}) - endif() - FetchContent_GetProperties(${pkg_name}) - message("download: ${${pkg_name}_SOURCE_DIR} , ${pkg_name} , ${pkg_url}") - if(NOT ${pkg_name}_POPULATED) - FetchContent_Populate(${pkg_name}) - set(${pkg_name}_SOURCE_DIR ${${pkg_name}_SOURCE_DIR} PARENT_SCOPE) - endif() - -endfunction() - - -function(__find_pkg_then_add_target pkg_name pkg_exe) - - unset(${pkg_name}_LIBS) - - message("_FIND:${${pkg_name}_BASE_DIR}") - - if(pkg_exe) - find_program(${pkg_exe}_EXE ${pkg_exe} PATHS ${${pkg_name}_BASE_DIR}/bin NO_DEFAULT_PATH) - if(NOT ${pkg_exe}_EXE) - return() - endif() - add_executable(${pkg_name}::${pkg_exe} IMPORTED GLOBAL) - set_target_properties(${pkg_name}::${pkg_exe} PROPERTIES - IMPORTED_LOCATION ${${pkg_exe}_EXE} - ) - message("found ${${pkg_exe}_EXE}") - endif() - - foreach(_LIB_NAME ${ARGN}) - set(_LIB_SEARCH_NAME ${_LIB_NAME}) - set(_LIB_TYPE SHARED) - if (${pkg_name}_USE_STATIC_LIBS) - set(_LIB_SEARCH_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(_LIB_TYPE STATIC) - endif () - set(${_LIB_NAME}_LIB ${_LIB_NAME}_LIB-NOTFOUND) - find_library(${_LIB_NAME}_LIB ${_LIB_SEARCH_NAME} PATHS ${${pkg_name}_BASE_DIR}/lib NO_DEFAULT_PATH) - if(NOT ${_LIB_NAME}_LIB) - return() - endif() - add_library(${pkg_name}::${_LIB_NAME} ${_LIB_TYPE} IMPORTED GLOBAL) - set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${${pkg_name}_BASE_DIR}/include" - IMPORTED_LOCATION ${${_LIB_NAME}_LIB} - ) - list(APPEND ${pkg_name}_LIBS ${pkg_name}::${_LIB_NAME}) - message("found ${${_LIB_NAME}_LIB}") - STRING( REGEX REPLACE "(.+)/(.+)" "\\1" LIBPATH ${${_LIB_NAME}_LIB}) - set(${pkg_name}_LIBPATH ${LIBPATH} CACHE STRING INTERNAL) - endforeach(_LIB_NAME) - - set(${pkg_name}_LIBS ${${pkg_name}_LIBS} PARENT_SCOPE) -endfunction() - -function(__exec_cmd) - set(options ) - set(oneValueArgs WORKING_DIRECTORY) - set(multiValueArgs COMMAND) - - cmake_parse_arguments(EXEC "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) - - execute_process(COMMAND ${EXEC_COMMAND} - WORKING_DIRECTORY ${EXEC_WORKING_DIRECTORY} - RESULT_VARIABLE RESULT) - if(NOT RESULT EQUAL "0") - message(FATAL_ERROR "error! when ${EXEC_COMMAND} in ${EXEC_WORKING_DIRECTORY}") - endif() -endfunction() - -function(__check_patches pkg_patches) - # check patches - if (PKG_PATCHES) - file(TOUCH ${_MS_LIB_CACHE}/${pkg_name}_patch.md5) - file(READ ${_MS_LIB_CACHE}/${pkg_name}_patch.md5 ${pkg_name}_PATCHES_MD5) - - message("patches md5:${${pkg_name}_PATCHES_MD5}") - - set(${pkg_name}_PATCHES_NEW_MD5 ) - foreach(_PATCH ${PKG_PATCHES}) - file(MD5 ${_PATCH} _PF_MD5) - set(${pkg_name}_PATCHES_NEW_MD5 "${${pkg_name}_PATCHES_NEW_MD5},${_PF_MD5}") - endforeach(_PATCH) - - if (NOT ${pkg_name}_PATCHES_MD5 STREQUAL ${pkg_name}_PATCHES_NEW_MD5) - set(${pkg_name}_PATCHES ${PKG_PATCHES}) - file(REMOVE_RECURSE "${_MS_LIB_CACHE}/${pkg_name}-subbuild") - file(WRITE ${_MS_LIB_CACHE}/${pkg_name}_patch.md5 ${${pkg_name}_PATCHES_NEW_MD5}) - message("patches changed : ${${pkg_name}_PATCHES_NEW_MD5}") - endif () - endif () -endfunction() - -set(GE_FIND_NO_DEFAULT_PATH NO_CMAKE_PATH NO_CMAKE_ENVIRONMENT_PATH NO_SYSTEM_ENVIRONMENT_PATH - NO_CMAKE_BUILDS_PATH NO_CMAKE_PACKAGE_REGISTRY NO_CMAKE_SYSTEM_PATH - NO_CMAKE_SYSTEM_PACKAGE_REGISTRY) -set(GE_FIND_NO_DEFAULT_PATH ${GE_FIND_NO_DEFAULT_PATH} PARENT_SCOPE) - -function(graphengine_add_pkg pkg_name ) - set(options ) - set(oneValueArgs URL MD5 GIT_REPOSITORY GIT_TAG VER EXE DIR HEAD_ONLY CMAKE_PATH) - set(multiValueArgs CMAKE_OPTION LIBS PRE_CONFIGURE_COMMAND CONFIGURE_COMMAND BUILD_OPTION INSTALL_INCS INSTALL_LIBS PATCHES) - cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) - - if (NOT PKG_CMAKE_PATH) - set(PKG_CMAKE_PATH ..) - endif () - - set(__FIND_PKG_NAME ${pkg_name}) - string(TOLOWER ${pkg_name} pkg_name) - message("pkg name:${__FIND_PKG_NAME},${pkg_name}") - - set(${pkg_name}_PATCHES_HASH ) - foreach(_PATCH ${PKG_PATCHES}) - file(MD5 ${_PATCH} _PF_MD5) - set(${pkg_name}_PATCHES_HASH "${${pkg_name}_PATCHES_HASH},${_PF_MD5}") - endforeach(_PATCH) - - # check options - set(${pkg_name}_CONFIG_TXT - "${CMAKE_CXX_COMPILER_VERSION}-${CMAKE_C_COMPILER_VERSION} - ${ARGN} - ${${pkg_name}_USE_STATIC_LIBS}- ${${pkg_name}_PATCHES_HASH} - ${${pkg_name}_CXXFLAGS}--${${pkg_name}_CFLAGS}--${${pkg_name}_LDFLAGS}") - string(REPLACE ";" "-" ${pkg_name}_CONFIG_TXT ${${pkg_name}_CONFIG_TXT}) - string(MD5 ${pkg_name}_CONFIG_HASH ${${pkg_name}_CONFIG_TXT}) - - message("${pkg_name} config hash: ${${pkg_name}_CONFIG_HASH}") - - set(${pkg_name}_BASE_DIR ${_MS_LIB_CACHE}/${pkg_name}_${${pkg_name}_CONFIG_HASH}) - set(${pkg_name}_DIRPATH ${${pkg_name}_BASE_DIR} CACHE STRING INTERNAL) - - if(EXISTS ${${pkg_name}_BASE_DIR}/options.txt AND PKG_HEAD_ONLY) - set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/${PKG_HEAD_ONLY} PARENT_SCOPE) - add_library(${pkg_name} INTERFACE) - target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC}) - return() - endif () - - if(NOT PKG_EXE) - set(PKG_EXE 0) - endif() - - set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR}) - set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR} PARENT_SCOPE) - - if (PKG_LIBS) - __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIBS}) - if(${pkg_name}_LIBS) - set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) - message("Found libs: ${${pkg_name}_LIBS}") - return() - endif() - elseif(NOT PKG_HEAD_ONLY) - find_package(${__FIND_PKG_NAME} ${PKG_VER} ${GE_FIND_NO_DEFAULT_PATH}) - if (${__FIND_PKG_NAME}_FOUND) - set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) - message("Found pkg: ${__FIND_PKG_NAME}") - return() - endif () - endif () - - if (NOT PKG_DIR) - if (PKG_GIT_REPOSITORY) - __download_pkg_with_git(${pkg_name} ${PKG_GIT_REPOSITORY} ${PKG_GIT_TAG} ${PKG_MD5}) - else() - __download_pkg(${pkg_name} ${PKG_URL} ${PKG_MD5}) - endif() - else() - set(${pkg_name}_SOURCE_DIR ${PKG_DIR}) - endif () - file(WRITE ${${pkg_name}_BASE_DIR}/options.txt ${${pkg_name}_CONFIG_TXT}) - message("${pkg_name}_SOURCE_DIR : ${${pkg_name}_SOURCE_DIR}") - - foreach(_PATCH_FILE ${PKG_PATCHES}) - message("patching ${${pkg_name}_SOURCE_DIR} -p1 < ${_PATCH_FILE}") - execute_process(COMMAND patch -p1 INPUT_FILE ${_PATCH_FILE} - WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR} - RESULT_VARIABLE Result) - if(NOT Result EQUAL "0") - message(FATAL_ERROR "Failed patch: ${_PATCH_FILE}") - endif() - endforeach(_PATCH_FILE) - - file(LOCK ${${pkg_name}_BASE_DIR} DIRECTORY GUARD FUNCTION RESULT_VARIABLE ${pkg_name}_LOCK_RET TIMEOUT 600) - if(NOT ${pkg_name}_LOCK_RET EQUAL "0") - message(FATAL_ERROR "error! when try lock ${${pkg_name}_BASE_DIR} : ${${pkg_name}_LOCK_RET}") - endif() - - if(${pkg_name}_SOURCE_DIR) - if (PKG_HEAD_ONLY) - file(GLOB ${pkg_name}_SOURCE_SUBDIRS ${${pkg_name}_SOURCE_DIR}/*) - file(COPY ${${pkg_name}_SOURCE_SUBDIRS} DESTINATION ${${pkg_name}_BASE_DIR}) - set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/${PKG_HEAD_ONLY} PARENT_SCOPE) - add_library(${pkg_name} INTERFACE) - target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC}) - - elseif (PKG_CMAKE_OPTION) - # in cmake - file(MAKE_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) - if (${pkg_name}_CFLAGS) - set(${pkg_name}_CMAKE_CFLAGS "-DCMAKE_C_FLAGS=${${pkg_name}_CFLAGS}") - endif () - if (${pkg_name}_CXXFLAGS) - set(${pkg_name}_CMAKE_CXXFLAGS "-DCMAKE_CXX_FLAGS=${${pkg_name}_CXXFLAGS}") - endif () - - if (${pkg_name}_LDFLAGS) - if (${pkg_name}_USE_STATIC_LIBS) - #set(${pkg_name}_CMAKE_LDFLAGS "-DCMAKE_STATIC_LINKER_FLAGS=${${pkg_name}_LDFLAGS}") - else() - set(${pkg_name}_CMAKE_LDFLAGS "-DCMAKE_SHARED_LINKER_FLAGS=${${pkg_name}_LDFLAGS}") - endif () - endif () - - __exec_cmd(COMMAND ${CMAKE_COMMAND} ${PKG_CMAKE_OPTION} -G ${CMAKE_GENERATOR} - ${${pkg_name}_CMAKE_CFLAGS} ${${pkg_name}_CMAKE_CXXFLAGS} ${${pkg_name}_CMAKE_LDFLAGS} - -DCMAKE_INSTALL_PREFIX=${${pkg_name}_BASE_DIR} ${PKG_CMAKE_PATH} - WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) - - __exec_cmd(COMMAND ${CMAKE_COMMAND} --build . --target install -- -j${THNUM} - WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) - - else() - if (${pkg_name}_CFLAGS) - set(${pkg_name}_MAKE_CFLAGS "CFLAGS=${${pkg_name}_CFLAGS}") - endif () - if (${pkg_name}_CXXFLAGS) - set(${pkg_name}_MAKE_CXXFLAGS "CXXFLAGS=${${pkg_name}_CXXFLAGS}") - endif () - if (${pkg_name}_LDFLAGS) - set(${pkg_name}_MAKE_LDFLAGS "LDFLAGS=${${pkg_name}_LDFLAGS}") - endif () - # in configure && make - if (PKG_PRE_CONFIGURE_COMMAND) - __exec_cmd(COMMAND ${PKG_PRE_CONFIGURE_COMMAND} - WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) - endif () - - if (PKG_CONFIGURE_COMMAND) - __exec_cmd(COMMAND ${PKG_CONFIGURE_COMMAND} - ${${pkg_name}_MAKE_CFLAGS} ${${pkg_name}_MAKE_CXXFLAGS} ${${pkg_name}_MAKE_LDFLAGS} - --prefix=${${pkg_name}_BASE_DIR} - WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) - endif () - set(${pkg_name}_BUILD_OPTION ${PKG_BUILD_OPTION}) - if (NOT PKG_CONFIGURE_COMMAND) - set(${pkg_name}_BUILD_OPTION ${${pkg_name}_BUILD_OPTION} - ${${pkg_name}_MAKE_CFLAGS} ${${pkg_name}_MAKE_CXXFLAGS} ${${pkg_name}_MAKE_LDFLAGS}) - endif () - # build - __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_BUILD_OPTION} -j${THNUM} - WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) - - if (PKG_INSTALL_INCS OR PKG_INSTALL_LIBS) - file(GLOB ${pkg_name}_INSTALL_INCS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_INCS}) - file(GLOB ${pkg_name}_INSTALL_LIBS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_LIBS}) - file(COPY ${${pkg_name}_INSTALL_INCS} DESTINATION ${${pkg_name}_BASE_DIR}/include) - file(COPY ${${pkg_name}_INSTALL_LIBS} DESTINATION ${${pkg_name}_BASE_DIR}/lib) - else() - __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} install WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) - endif () - endif () - endif() - - if (PKG_LIBS) - __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIBS}) - set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) - if(NOT ${pkg_name}_LIBS) - message(FATAL_ERROR "Can not find pkg: ${pkg_name}") - endif() - else() - find_package(${__FIND_PKG_NAME} ${PKG_VER} QUIET) - if (${__FIND_PKG_NAME}_FOUND) - set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) - message("Found pkg: ${${__FIND_PKG_NAME}_LIBRARIES}") - return() - endif () - endif () -endfunction() diff --git a/cmake/intf_pub_android.cmake b/cmake/intf_pub_android.cmake new file mode 100755 index 00000000..153d5764 --- /dev/null +++ b/cmake/intf_pub_android.cmake @@ -0,0 +1,52 @@ + +add_library(intf_pub INTERFACE) + +target_compile_options(intf_pub INTERFACE + -Wall + -fPIC + -fstack-protector-strong +) +target_compile_definitions(intf_pub INTERFACE + $<$:_GLIBCXX_USE_CXX11_ABI=0> + $<$:CFG_BUILD_NDEBUG> + $<$:CFG_BUILD_DEBUG> + WIN64=1 + LINUX=0 +) +target_link_options(intf_pub INTERFACE + -Wl,-z,relro + -Wl,-z,now + -Wl,-z,noexecstack + $<$:-Wl,--build-id=none> +) +target_link_directories(intf_pub INTERFACE +) + +add_library(intf_ccec INTERFACE) +target_compile_options(intf_ccec INTERFACE + -mcpu=cortex-a73 + --target=aarch64-linux-android29 + --sysroot=${HCC_PATH}/../sysroot + -L${HCC_PATH}/../lib/gcc/aarch64-linux-android/4.9.x + -Wall + -fPIC + -fstack-protector-strong +) +target_compile_definitions(intf_ccec INTERFACE + $<$:_GLIBCXX_USE_CXX11_ABI=0> + $<$:CFG_BUILD_NDEBUG> + $<$:CFG_BUILD_DEBUG> +) + +target_link_options(intf_ccec INTERFACE + -mcpu=cortex-a73 + --target=aarch64-linux-android29 + --sysroot=${HCC_PATH}/../sysroot + -L${HCC_PATH}/../lib/gcc/aarch64-linux-android/4.9.x + -Wl,-cce-host-android + -Wl,-z,relro + -Wl,-z,now + -Wl,-z,noexecstack + $<$:-Wl,--build-id=none> +) + diff --git a/cmake/intf_pub_linux.cmake b/cmake/intf_pub_linux.cmake new file mode 100755 index 00000000..325a1b14 --- /dev/null +++ b/cmake/intf_pub_linux.cmake @@ -0,0 +1,32 @@ +if (HAVE_PUB) + return() +endif() + +add_library(intf_pub INTERFACE) + +target_compile_options(intf_pub INTERFACE + -Wall + -fPIC + $,-fstack-protector-all,-fstack-protector-strong> + $<$:-std=c++11> +) +target_compile_definitions(intf_pub INTERFACE + $<$:_GLIBCXX_USE_CXX11_ABI=0> + $<$:CFG_BUILD_NDEBUG> + $<$:CFG_BUILD_DEBUG> + WIN64=1 + LINUX=0 +) +target_link_options(intf_pub INTERFACE + -Wl,-z,relro + -Wl,-z,now + -Wl,-z,noexecstack + $<$:-Wl,--build-id=none> +) +target_link_directories(intf_pub INTERFACE +) +target_link_libraries(intf_pub INTERFACE + -lpthread +) + +set(HAVE_PUB TRUE CACHE BOOL "pub add") diff --git a/cmake/intf_pub_windows.cmake b/cmake/intf_pub_windows.cmake new file mode 100755 index 00000000..19e37283 --- /dev/null +++ b/cmake/intf_pub_windows.cmake @@ -0,0 +1,24 @@ + +add_library(intf_pub INTERFACE) + +target_compile_options(intf_pub INTERFACE + -Wall + -fPIC + $,-fstack-protector-all,-fstack-protector-strong> + $<$:-std=c++11> +) +target_compile_definitions(intf_pub INTERFACE + $<$:_GLIBCXX_USE_CXX11_ABI=0> + OS_TYPE=WIN64 + WIN64=1 + LINUX=0 + $<$:CFG_BUILD_NDEBUG> + $<$:CFG_BUILD_DEBUG> +) +target_link_options(intf_pub INTERFACE + $<$:-Wl,--build-id=none> +) +target_link_directories(intf_pub INTERFACE +) +target_link_libraries(intf_pub INTERFACE +) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index b1f1ce3d..bb586bd5 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -1,378 +1,833 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -# libge_compiler.so & libge_runner.so -# will later be integrated into libgraph_runner.so, works for both training and inference -# compiling proto files generates some warnings, use no-unused-variable to suppress them -set(CMAKE_CXX_FLAGS "-Wno-unused-variable ${CMAKE_CXX_FLAGS}") -file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "${GE_SOURCE_DIR}/metadef/proto/fusion_model.proto" - "${GE_SOURCE_DIR}/metadef/proto/optimizer_priority.proto" - ) -file(GLOB PROTO_CLIENT_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "${GE_SOURCE_DIR}/metadef/proto/ge_api.proto" - ) -file(GLOB PROTO_HEADER_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "${GE_SOURCE_DIR}/metadef/proto/om.proto" - "${GE_SOURCE_DIR}/metadef/proto/task.proto" - "${GE_SOURCE_DIR}/metadef/proto/insert_op.proto" - "${GE_SOURCE_DIR}/metadef/proto/ge_ir.proto" - "${GE_SOURCE_DIR}/metadef/proto/fwk_adapter.proto" - "${GE_SOURCE_DIR}/metadef/proto/op_mapping_info.proto" - "${GE_SOURCE_DIR}/metadef/proto/dump_task.proto" - ) -ge_protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) -ge_protobuf_generate(ge PROTO_CLIENT_SRCS PROTO_CLIENT_HDRS ${PROTO_CLIENT_LIST}) -ge_protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST}) -# include directories -include_directories(${CMAKE_CURRENT_LIST_DIR}) -include_directories(${GE_SOURCE_DIR}) -include_directories(${GE_SOURCE_DIR}/ge) -include_directories(${GE_SOURCE_DIR}/ge/analyzer) -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/common/util) -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/inc/external/ge) -include_directories(${GE_SOURCE_DIR}/inc/framework) -include_directories(${GE_SOURCE_DIR}/inc/framework/common) -include_directories(${GE_SOURCE_DIR}/metadef) -include_directories(${GE_SOURCE_DIR}/metadef/inc) -include_directories(${GE_SOURCE_DIR}/metadef/inc/external) -include_directories(${GE_SOURCE_DIR}/metadef/inc/external/graph) -include_directories(${GE_SOURCE_DIR}/metadef/inc/graph) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/proto/ge) - -######### libge_runner.so ############# -# need to remove dependencies on pb files later -file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "analyzer/analyzer.cc" - "client/ge_prof.cc" - "client/ge_api.cc" - "common/dump/dump_manager.cc" - "common/dump/dump_properties.cc" - "common/dump/dump_op.cc" - "common/formats/format_transfers/*.cc" - "common/formats/formats.cc" - "common/formats/utils/formats_trans_utils.cc" - "common/fp16_t.cc" - "common/ge/op_tiling_manager.cc" - "common/ge/plugin_manager.cc" - "common/helper/model_cache_helper.cc" - "common/profiling/profiling_manager.cc" - "engine_manager/dnnengine_manager.cc" - "executor/ge_executor.cc" - "ge_local_engine/engine/host_cpu_engine.cc" - "generator/ge_generator.cc" - "generator/generator_api.cc" - "graph/build/*.cc" - "graph/common/*.cc" - "graph/execute/graph_execute.cc" - "graph/label/*.cc" - "graph/load/graph_loader.cc" - "graph/load/new_model_manager/*.cc" - "graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "graph/load/new_model_manager/task_info/event_record_task_info.cc" - "graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "graph/load/new_model_manager/task_info/hccl_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_task_info.cc" - "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "graph/load/new_model_manager/task_info/label_set_task_info.cc" - "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "graph/load/new_model_manager/task_info/task_info.cc" - "graph/manager/graph_context.cc" - "graph/manager/graph_manager.cc" - "graph/manager/graph_manager_utils.cc" - "graph/manager/graph_mem_allocator.cc" - "graph/manager/graph_caching_allocator.cc" - "graph/manager/graph_var_manager.cc" - "graph/manager/model_manager/event_manager.cc" - "graph/manager/rdma_pool_allocator.cc" - "graph/manager/trans_var_data_utils.cc" - "graph/manager/util/debug.cc" - "graph/manager/util/hcom_util.cc" - "graph/manager/util/rt_context_util.cc" - "graph/manager/util/variable_accelerate_ctrl.cc" - "graph/manager/util/debug.cc" - "graph/manager/util/hcom_util.cc" - "graph/manager/util/rt_context_util.cc" - "graph/manager/util/variable_accelerate_ctrl.cc" - "graph/optimize/graph_optimize.cc" - "graph/optimize/mem_rw_conflict_optimize.cc" - "graph/optimize/optimizer/allreduce_fusion_pass.cc" - "graph/optimize/summary_optimize.cc" - "graph/partition/dynamic_shape_partition.cc" - "graph/partition/engine_place.cc" - "graph/partition/graph_partition.cc" - "graph/passes/*.cc" - "graph/preprocess/graph_preprocess.cc" - "graph/preprocess/insert_op/ge_aipp_op.cc" - "graph/preprocess/insert_op/util_insert_aipp_op.cc" - "graph/preprocess/multi_batch_copy_graph.cc" - "graph/preprocess/multi_batch_options.cc" - "host_kernels/add_kernel.cc" - "host_kernels/broadcast_args_kernel.cc" - "host_kernels/broadcast_gradient_args_kernel.cc" - "host_kernels/cast_kernel.cc" - "host_kernels/concat_offset_kernel.cc" - "host_kernels/concat_v2_kernel.cc" - "host_kernels/dynamic_stitch_kernel.cc" - "host_kernels/empty_kernel.cc" - "host_kernels/expanddims_kernel.cc" - "host_kernels/fill_kernel.cc" - "host_kernels/floordiv_kernel.cc" - "host_kernels/floormod_kernel.cc" - "host_kernels/gather_v2_kernel.cc" - "host_kernels/greater_kernel.cc" - "host_kernels/identity_kernel.cc" - "host_kernels/kernel_utils.cc" - "host_kernels/maximum_kernel.cc" - "host_kernels/mul_kernel.cc" - "host_kernels/pack_kernel.cc" - "host_kernels/permute_kernel.cc" - "host_kernels/range_kernel.cc" - "host_kernels/rank_kernel.cc" - "host_kernels/reduce_prod_kernel.cc" - "host_kernels/reshape_kernel.cc" - "host_kernels/rsqrt_kernel.cc" - "host_kernels/shape_kernel.cc" - "host_kernels/shape_n_kernel.cc" - "host_kernels/size_kernel.cc" - "host_kernels/slice_d_kernel.cc" - "host_kernels/slice_kernel.cc" - "host_kernels/squeeze_kernel.cc" - "host_kernels/ssd_prior_box_kernel.cc" - "host_kernels/strided_slice_kernel.cc" - "host_kernels/sub_kernel.cc" - "host_kernels/transdata_kernel.cc" - "host_kernels/transpose_kernel.cc" - "host_kernels/unpack_kernel.cc" - "host_kernels/unsqueeze_kernel.cc" - "hybrid/common/npu_memory_allocator.cc" - "hybrid/common/tensor_value.cc" - "hybrid/executor/*.cc" - "hybrid/executor/worker/*.cc" - "hybrid/hybrid_davinci_model.cc" - "hybrid/model/*.cc" - "hybrid/node_executor/aicore/*.cc" - "hybrid/node_executor/aicpu/aicpu_ext_info.cc" - "hybrid/node_executor/aicpu/aicpu_node_executor.cc" - "hybrid/node_executor/compiledsubgraph/known_node_executor.cc" - "hybrid/node_executor/controlop/control_op_executor.cc" - "hybrid/node_executor/ge_local/ge_local_node_executor.cc" - "hybrid/node_executor/hccl/hccl_node_executor.cc" - "hybrid/node_executor/hostcpu/ge_local_node_executor.cc" - "hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" - "hybrid/node_executor/host_cpu/kernel_factory.cc" - "hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" - "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" - "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" - "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" - "hybrid/node_executor/node_executor.cc" - "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" - "hybrid/node_executor/rts/rts_node_executor.cc" - "hybrid/node_executor/task_context.cc" - "init/gelib.cc" - "model/ge_model.cc" - "model/ge_root_model.cc" - "omm/csa_interact.cc" - "opskernel_manager/ops_kernel_manager.cc" - "session/inner_session.cc" - "session/session_manager.cc" - "single_op/*.cc" - "single_op/task/*.cc" - ) - - -######### libge_runner.so ############# -add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} ${PROTO_HEADER_HDRS}) +add_subdirectory(common) +add_subdirectory(plugin/engine) +add_subdirectory(graph/build/memory) +add_subdirectory(ge_local_engine) +add_subdirectory(host_cpu_engine) +add_subdirectory(executor) +#add_subdirectory(offline) + +set(PROTO_LIST + "${METADEF_DIR}/proto/fusion_model.proto" + "${GE_CODE_DIR}/ge/proto/optimizer_priority.proto" +) + +set(PROTO_CLIENT_LIST + "${GE_CODE_DIR}/ge/proto/ge_api.proto" +) + +set(PROTO_HEADER_LIST + "${METADEF_DIR}/proto/om.proto" + "${METADEF_DIR}/proto/task.proto" + "${METADEF_DIR}/proto/insert_op.proto" + "${METADEF_DIR}/proto/ge_ir.proto" + "${METADEF_DIR}/proto/fwk_adapter.proto" + "${METADEF_DIR}/proto/op_mapping_info.proto" +) + +protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) +protobuf_generate(ge PROTO_CLIENT_SRCS PROTO_CLIENT_HDRS ${PROTO_CLIENT_LIST}) +protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST}) + +############ libge_runner.so ############ +set(TRAIN_SRC_LIST + "common/formats/format_transfers/datatype_transfer.cc" + "common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc" + "common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc" + "common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc" + "common/formats/format_transfers/format_transfer_fractal_nz.cc" + "common/formats/format_transfers/format_transfer_fractal_z.cc" + "common/formats/format_transfers/format_transfer_fractal_zz.cc" + "common/formats/format_transfers/format_transfer_fracz_hwcn.cc" + "common/formats/format_transfers/format_transfer_fracz_nchw.cc" + "common/formats/format_transfers/format_transfer_fracz_nhwc.cc" + "common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc" + "common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc" + "common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc" + "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc" + "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc" + "common/formats/format_transfers/format_transfer_transpose.cc" + "common/formats/formats.cc" + "common/formats/utils/formats_trans_utils.cc" + "common/fp16_t.cc" + "common/ge/plugin_manager.cc" + "common/ge/op_tiling_manager.cc" + "common/helper/model_cache_helper.cc" + "common/profiling/profiling_manager.cc" + "common/dump/dump_manager.cc" + "common/dump/dump_properties.cc" + "common/dump/dump_op.cc" + "engine_manager/dnnengine_manager.cc" + "ge_local_engine/engine/host_cpu_engine.cc" + "generator/ge_generator.cc" + "generator/generator_api.cc" + "graph/build/graph_builder.cc" + "graph/build/label_allocator.cc" + "graph/build/logical_stream_allocator.cc" + "graph/build/model_builder.cc" + "graph/build/run_context.cc" + "graph/build/stream_allocator.cc" + "graph/build/stream_graph_optimizer.cc" + "graph/build/task_generator.cc" + "graph/common/bcast.cc" + "graph/common/local_context.cc" + "graph/common/omg_util.cc" + "graph/common/transop_util.cc" + "graph/execute/graph_execute.cc" + "graph/label/case_label_maker.cc" + "graph/label/if_label_maker.cc" + "graph/label/label_maker.cc" + "graph/label/partitioned_call_label_maker.cc" + "graph/label/while_label_maker.cc" + "graph/load/graph_loader.cc" + "graph/load/new_model_manager/cpu_queue_schedule.cc" + "graph/load/new_model_manager/data_dumper.cc" + "graph/load/new_model_manager/data_inputer.cc" + "graph/load/new_model_manager/davinci_model.cc" + "graph/load/new_model_manager/davinci_model_parser.cc" + "graph/load/new_model_manager/model_manager.cc" + "graph/load/new_model_manager/model_utils.cc" + "graph/load/new_model_manager/aipp_utils.cc" + "graph/load/new_model_manager/task_info/end_graph_task_info.cc" + "graph/load/new_model_manager/task_info/event_record_task_info.cc" + "graph/load/new_model_manager/task_info/event_wait_task_info.cc" + "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" + "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" + "graph/load/new_model_manager/task_info/hccl_task_info.cc" + "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" + "graph/load/new_model_manager/task_info/kernel_task_info.cc" + "graph/load/new_model_manager/task_info/label_set_task_info.cc" + "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" + "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" + "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" + "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" + "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" + "graph/load/new_model_manager/task_info/stream_active_task_info.cc" + "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" + "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" + "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" + "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" + "graph/load/new_model_manager/task_info/task_info.cc" + "graph/load/new_model_manager/tbe_handle_store.cc" + "graph/load/new_model_manager/zero_copy_task.cc" + "graph/load/new_model_manager/zero_copy_offset.cc" + "graph/manager/graph_context.cc" + "graph/manager/graph_manager.cc" + "graph/manager/graph_manager_utils.cc" + "graph/manager/graph_mem_allocator.cc" + "graph/manager/graph_caching_allocator.cc" + "graph/manager/graph_var_manager.cc" + "graph/manager/host_mem_manager.cc" + "graph/manager/rdma_pool_allocator.cc" + "graph/manager/memory_api.cc" + "graph/manager/model_manager/event_manager.cc" + "graph/manager/trans_var_data_utils.cc" + "graph/manager/util/debug.cc" + "graph/manager/util/hcom_util.cc" + "graph/manager/util/rt_context_util.cc" + "graph/manager/util/variable_accelerate_ctrl.cc" + "graph/optimize/graph_optimize.cc" + "graph/optimize/mem_rw_conflict_optimize.cc" + "graph/optimize/summary_optimize.cc" + "graph/partition/engine_place.cc" + "graph/partition/graph_partition.cc" + "graph/passes/addn_pass.cc" + "graph/passes/aicpu_constant_folding_pass.cc" + "graph/passes/assert_pass.cc" + "graph/passes/input_output_connection_identify_pass.cc" + "graph/passes/atomic_addr_clean_pass.cc" + "graph/passes/mark_same_addr_pass.cc" + "graph/passes/mark_graph_unknown_status_pass.cc" + "graph/partition/dynamic_shape_partition.cc" + "graph/passes/base_pass.cc" + "graph/passes/bitcast_pass.cc" + "graph/passes/cast_remove_pass.cc" + "graph/passes/cast_translate_pass.cc" + "graph/passes/common_subexpression_elimination_pass.cc" + "graph/passes/transop_symmetry_elimination_pass.cc" + "graph/passes/compile_nodes_pass.cc" + "graph/passes/constant_folding_pass.cc" + "graph/passes/constant_fuse_same_pass.cc" + "graph/passes/control_trigger_pass.cc" + "graph/passes/dimension_adjust_pass.cc" + "graph/passes/dimension_compute_pass.cc" + "graph/passes/dropout_pass.cc" + "graph/passes/hccl_group_pass.cc" + "graph/passes/enter_pass.cc" + "graph/passes/assign_pass.cc" + "graph/passes/flow_ctrl_pass.cc" + "graph/passes/global_step_insert_pass.cc" + "host_kernels/transpose_kernel.cc" + "host_kernels/add_kernel.cc" + "host_kernels/broadcast_args_kernel.cc" + "host_kernels/broadcast_gradient_args_kernel.cc" + "host_kernels/cast_kernel.cc" + "host_kernels/concat_offset_kernel.cc" + "host_kernels/concat_v2_kernel.cc" + "host_kernels/dynamic_stitch_kernel.cc" + "host_kernels/identity_kernel.cc" + "host_kernels/empty_kernel.cc" + "host_kernels/expanddims_kernel.cc" + "host_kernels/fill_kernel.cc" + "host_kernels/floordiv_kernel.cc" + "host_kernels/floormod_kernel.cc" + "host_kernels/gather_v2_kernel.cc" + "host_kernels/greater_kernel.cc" + "host_kernels/kernel_utils.cc" + "host_kernels/maximum_kernel.cc" + "host_kernels/mul_kernel.cc" + "host_kernels/pack_kernel.cc" + "host_kernels/permute_kernel.cc" + "host_kernels/range_kernel.cc" + "host_kernels/rank_kernel.cc" + "host_kernels/reduce_prod_kernel.cc" + "host_kernels/reshape_kernel.cc" + "host_kernels/rsqrt_kernel.cc" + "host_kernels/shape_kernel.cc" + "host_kernels/shape_n_kernel.cc" + "host_kernels/size_kernel.cc" + "host_kernels/slice_d_kernel.cc" + "host_kernels/slice_kernel.cc" + "host_kernels/squeeze_kernel.cc" + "host_kernels/unsqueeze_kernel.cc" + "host_kernels/ssd_prior_box_kernel.cc" + "host_kernels/strided_slice_kernel.cc" + "host_kernels/sub_kernel.cc" + "host_kernels/transdata_kernel.cc" + "host_kernels/unpack_kernel.cc" + "graph/passes/folding_pass.cc" + "graph/passes/get_original_format_pass.cc" + "graph/passes/guarantee_const_pass.cc" + "graph/passes/hccl_memcpy_pass.cc" + "graph/passes/identity_pass.cc" + "graph/passes/ref_identity_delete_op_pass.cc" + "graph/passes/infershape_pass.cc" + "graph/passes/isolated_op_remove_pass.cc" + "graph/passes/iterator_op_pass.cc" + "graph/passes/link_gen_mask_nodes_pass.cc" + "graph/passes/merge_pass.cc" + "graph/passes/multi_batch_pass.cc" + "graph/passes/multi_batch_clone_pass.cc" + "graph/passes/subexpression_migration_pass.cc" + "graph/passes/subgraph_const_migration_pass.cc" + "graph/passes/unused_args_clean_pass.cc" + "graph/passes/net_output_pass.cc" + "graph/passes/next_iteration_pass.cc" + "graph/passes/no_use_reshape_remove_pass.cc" + "graph/passes/pass_manager.cc" + "graph/passes/pass_utils.cc" + "graph/passes/permute_pass.cc" + "graph/passes/placeholder_with_default_pass.cc" + "graph/passes/prevent_gradient_pass.cc" + "graph/passes/print_op_pass.cc" + "graph/passes/prune_pass.cc" + "graph/passes/ctrl_edge_transfer_pass.cc" + "graph/passes/replace_with_empty_const_pass.cc" + "graph/passes/reshape_remove_pass.cc" + "graph/passes/reshape_recovery_pass.cc" + "graph/passes/resource_pair_add_control_pass.cc" + "graph/passes/resource_pair_remove_control_pass.cc" + "graph/passes/same_transdata_breadth_fusion_pass.cc" + "graph/passes/save_pass.cc" + "graph/passes/shape_operate_op_remove_pass.cc" + "graph/passes/snapshot_pass.cc" + "graph/passes/stop_gradient_pass.cc" + "graph/passes/subgraph_pass.cc" + "graph/passes/data_pass.cc" + "graph/passes/switch_data_edges_bypass.cc" + "graph/passes/switch_logic_remove_pass.cc" + "graph/passes/merge_to_stream_merge_pass.cc" + "graph/passes/switch_to_stream_switch_pass.cc" + "graph/passes/attach_stream_label_pass.cc" + "graph/passes/switch_dead_branch_elimination.cc" + "graph/passes/replace_transshape_pass.cc" + "graph/passes/transop_breadth_fusion_pass.cc" + "graph/passes/transop_depth_fusion_pass.cc" + "graph/passes/transop_nearby_allreduce_fusion_pass.cc" + "graph/passes/transop_without_reshape_fusion_pass.cc" + "graph/passes/transpose_transdata_pass.cc" + "graph/passes/unused_const_pass.cc" + "graph/passes/unused_op_remove_pass.cc" + "graph/passes/var_is_initialized_op_pass.cc" + "graph/passes/parallel_concat_start_op_pass.cc" + "graph/passes/cond_pass.cc" + "graph/passes/cond_remove_pass.cc" + "graph/passes/for_pass.cc" + "graph/passes/variable_format_pass.cc" + "graph/passes/variable_op_pass.cc" + "graph/passes/variable_prepare_op_pass.cc" + "graph/passes/variable_ref_delete_op_pass.cc" + "graph/passes/variable_ref_useless_control_out_delete_pass.cc" + "graph/passes/end_of_sequence_add_control_pass.cc" + "graph/passes/memcpy_addr_async_pass.cc" + "graph/passes/set_input_output_offset_pass.cc" + "graph/preprocess/graph_preprocess.cc" + "graph/preprocess/insert_op/ge_aipp_op.cc" + "graph/preprocess/insert_op/util_insert_aipp_op.cc" + "graph/preprocess/multi_batch_options.cc" + "graph/preprocess/multi_batch_copy_graph.cc" + "init/gelib.cc" + "model/ge_model.cc" + "model/ge_root_model.cc" + "omm/csa_interact.cc" + "opskernel_manager/ops_kernel_manager.cc" + "session/inner_session.cc" + "session/session_manager.cc" + "single_op/single_op.cc" + "single_op/single_op_manager.cc" + "single_op/single_op_model.cc" + "single_op/stream_resource.cc" + "single_op/task/build_task_utils.cc" + "single_op/task/op_task.cc" + "single_op/task/tbe_task_builder.cc" + "single_op/task/aicpu_task_builder.cc" + "single_op/task/aicpu_kernel_task_builder.cc" + "hybrid/common/tensor_value.cc" + "hybrid/common/npu_memory_allocator.cc" + "hybrid/executor/rt_callback_manager.cc" + "hybrid/executor/node_state.cc" + "hybrid/executor/node_done_manager.cc" + "hybrid/executor/hybrid_profiler.cc" + "hybrid/executor/hybrid_model_executor.cc" + "hybrid/executor/hybrid_model_async_executor.cc" + "hybrid/executor/hybrid_execution_context.cc" + "hybrid/executor/subgraph_context.cc" + "hybrid/executor/subgraph_executor.cc" + "hybrid/executor/worker/task_compile_engine.cc" + "hybrid/executor/worker/shape_inference_engine.cc" + "hybrid/executor/worker/execution_engine.cc" + "hybrid/model/hybrid_model.cc" + "hybrid/model/hybrid_model_builder.cc" + "hybrid/model/node_item.cc" + "hybrid/model/graph_item.cc" + "hybrid/node_executor/aicore/aicore_node_executor.cc" + "hybrid/node_executor/aicore/aicore_op_task.cc" + "hybrid/node_executor/aicore/aicore_task_builder.cc" + "hybrid/node_executor/aicore/aicore_task_compiler.cc" + "hybrid/node_executor/aicpu/aicpu_ext_info.cc" + "hybrid/node_executor/aicpu/aicpu_node_executor.cc" + "hybrid/node_executor/compiledsubgraph/known_node_executor.cc" + "hybrid/node_executor/ge_local/ge_local_node_executor.cc" + "hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" + "hybrid/node_executor/host_cpu/kernel_factory.cc" + "hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" + "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" + "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" + "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "hybrid/node_executor/controlop/control_op_executor.cc" + "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" + "hybrid/node_executor/hccl/hccl_node_executor.cc" + "hybrid/node_executor/rts/rts_node_executor.cc" + "hybrid/node_executor/node_executor.cc" + "hybrid/node_executor/task_context.cc" + "hybrid/hybrid_davinci_model.cc" + "executor/ge_executor.cc" + "client/ge_api.cc" + "client/ge_prof.cc" + "analyzer/analyzer.cc" +) + +add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) + target_compile_definitions(ge_runner PRIVATE - PROTOBUF_INLINE_NOT_IN_HEADERS=0 - DAVINCI_SUPPORT_PROFILING - REUSE_MEMORY=1 - DAVINCI_CLOUD) + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + DAVINCI_SUPPORT_PROFILING + REUSE_MEMORY=1 + FMK_SUPPORT_DUMP + DAVINCI_CLOUD +) + +target_compile_options(ge_runner PRIVATE + -O2 +) + +target_include_directories(ge_runner PRIVATE + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/ge/analyzer + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${GE_CODE_DIR}/inc/framework/common + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/cce + ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external + #### blue zone + ${ASCEND_DIR}/driver/include + ${ASCEND_DIR}/fwkacllib/include +) + target_link_libraries(ge_runner - graph - ge_common - ge_memory - ${PROTOBUF_LIBRARY} - ${register} - ${c_sec} - ${slog} - ${mmpa} - ${hccl} - ${msprof} - ${runtime} - ${resouce} - ${ascend_hal} - rt - dl) - -######### libge_compiler.so ############# -# need to remove dependencies on pb files later -file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "analyzer/analyzer.cc" - "common/dump/dump_properties.cc" - "common/dump/dump_manager.cc" - "common/dump/dump_op.cc" - "common/formats/format_transfers/*.cc" - "common/formats/formats.cc" - "common/formats/utils/formats_trans_utils.cc" - "common/fp16_t.cc" - "common/ge/op_tiling_manager.cc" - "common/ge/plugin_manager.cc" - "common/helper/model_cache_helper.cc" - "common/profiling/profiling_manager.cc" - "engine_manager/dnnengine_manager.cc" - "ge_local_engine/engine/host_cpu_engine.cc" - "generator/ge_generator.cc" - "generator/generator_api.cc" - "graph/build/*.cc" - "graph/common/*.cc" - "graph/execute/graph_execute.cc" - "graph/label/*.cc" - "graph/load/graph_loader.cc" - "graph/load/new_model_manager/*.cc" - "graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "graph/load/new_model_manager/task_info/event_record_task_info.cc" - "graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "graph/load/new_model_manager/task_info/kernel_task_info.cc" - "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "graph/load/new_model_manager/task_info/label_set_task_info.cc" - "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" - "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "graph/load/new_model_manager/task_info/task_info.cc" - "graph/manager/graph_caching_allocator.cc" - "graph/manager/graph_context.cc" - "graph/manager/graph_manager.cc" - "graph/manager/graph_manager_utils.cc" - "graph/manager/graph_mem_allocator.cc" - "graph/manager/trans_var_data_utils.cc" - "graph/manager/graph_var_manager.cc" - "graph/manager/model_manager/event_manager.cc" - "graph/manager/rdma_pool_allocator.cc" - "graph/manager/util/debug.cc" - "graph/manager/util/rt_context_util.cc" - "graph/manager/util/variable_accelerate_ctrl.cc" - "graph/optimize/graph_optimize.cc" - "graph/optimize/mem_rw_conflict_optimize.cc" - "graph/optimize/summary_optimize.cc" - "graph/partition/dynamic_shape_partition.cc" - "graph/partition/engine_place.cc" - "graph/partition/graph_partition.cc" - "graph/passes/*.cc" - "graph/preprocess/graph_preprocess.cc" - "graph/preprocess/insert_op/ge_aipp_op.cc" - "graph/preprocess/insert_op/util_insert_aipp_op.cc" - "graph/preprocess/multi_batch_copy_graph.cc" - "graph/preprocess/multi_batch_options.cc" - "host_kernels/add_kernel.cc" - "host_kernels/broadcast_args_kernel.cc" - "host_kernels/broadcast_gradient_args_kernel.cc" - "host_kernels/cast_kernel.cc" - "host_kernels/concat_offset_kernel.cc" - "host_kernels/concat_v2_kernel.cc" - "host_kernels/dynamic_stitch_kernel.cc" - "host_kernels/empty_kernel.cc" - "host_kernels/expanddims_kernel.cc" - "host_kernels/fill_kernel.cc" - "host_kernels/floordiv_kernel.cc" - "host_kernels/floormod_kernel.cc" - "host_kernels/gather_v2_kernel.cc" - "host_kernels/greater_kernel.cc" - "host_kernels/identity_kernel.cc" - "host_kernels/kernel_utils.cc" - "host_kernels/maximum_kernel.cc" - "host_kernels/mul_kernel.cc" - "host_kernels/pack_kernel.cc" - "host_kernels/permute_kernel.cc" - "host_kernels/range_kernel.cc" - "host_kernels/rank_kernel.cc" - "host_kernels/reduce_prod_kernel.cc" - "host_kernels/reshape_kernel.cc" - "host_kernels/rsqrt_kernel.cc" - "host_kernels/shape_kernel.cc" - "host_kernels/shape_n_kernel.cc" - "host_kernels/size_kernel.cc" - "host_kernels/slice_d_kernel.cc" - "host_kernels/slice_kernel.cc" - "host_kernels/squeeze_kernel.cc" - "host_kernels/ssd_prior_box_kernel.cc" - "host_kernels/strided_slice_kernel.cc" - "host_kernels/sub_kernel.cc" - "host_kernels/transdata_kernel.cc" - "host_kernels/transpose_kernel.cc" - "host_kernels/unpack_kernel.cc" - "host_kernels/unsqueeze_kernel.cc" - "hybrid/hybrid_davinci_model_stub.cc" - "hybrid/node_executor/aicpu/aicpu_ext_info.cc" - "init/gelib.cc" - "ir_build/atc_ir_common.cc" - "ir_build/ge_ir_build.cc" - "model/ge_model.cc" - "model/ge_root_model.cc" - "omm/csa_interact.cc" - "opskernel_manager/ops_kernel_manager.cc" - "session/inner_session.cc" - "session/session_manager.cc" - "single_op/*.cc" - "single_op/task/*.cc" - ) - -add_library(ge_compiler SHARED ${INFER_SRC_LIST} ${PROTO_SRCS} ${PROTO_HEADER_HDRS}) + $ + ge_memory + adump_server + -Wl,--no-as-needed + graph + ge_common + protobuf + register + c_sec + slog + mmpa + msprof + runtime + resource + error_manager + ascend_hal_stub + -Wl,--as-needed + json + -lrt + -ldl +) + +############ libge_compiler.so ############ +set(INFER_SRC_LIST + "graph/manager/trans_var_data_utils.cc" + "omm/csa_interact.cc" + "common/fp16_t.cc" + "common/formats/utils/formats_trans_utils.cc" + "common/formats/format_transfers/datatype_transfer.cc" + "common/formats/format_transfers/format_transfer_transpose.cc" + "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc" + "common/formats/format_transfers/format_transfer_fractal_z.cc" + "common/formats/format_transfers/format_transfer_fractal_nz.cc" + "common/formats/format_transfers/format_transfer_fractal_zz.cc" + "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc" + "common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc" + "common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc" + "common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc" + "common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc" + "common/formats/format_transfers/format_transfer_fracz_nchw.cc" + "common/formats/format_transfers/format_transfer_fracz_nhwc.cc" + "common/formats/format_transfers/format_transfer_fracz_hwcn.cc" + "common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc" + "common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc" + "common/formats/format_transfers/format_transfer_nchw_fz_c04.cc" + "common/formats/formats.cc" + "common/profiling/profiling_manager.cc" + "common/dump/dump_properties.cc" + "common/dump/dump_manager.cc" + "common/dump/dump_op.cc" + "common/dump/dump_server.cc" + "common/helper/model_cache_helper.cc" + "ge_local_engine/engine/host_cpu_engine.cc" + "common/ge/plugin_manager.cc" + "common/ge/op_tiling_manager.cc" + "init/gelib.cc" + "session/inner_session.cc" + "session/session_manager.cc" + "engine_manager/dnnengine_manager.cc" + "opskernel_manager/ops_kernel_manager.cc" + "graph/manager/graph_manager.cc" + "graph/manager/graph_manager_utils.cc" + "graph/manager/graph_context.cc" + "graph/preprocess/graph_preprocess.cc" + "graph/preprocess/multi_batch_options.cc" + "graph/preprocess/multi_batch_copy_graph.cc" + "graph/execute/graph_execute.cc" + "graph/load/graph_loader.cc" + "graph/optimize/graph_optimize.cc" + "graph/optimize/mem_rw_conflict_optimize.cc" + "graph/optimize/summary_optimize.cc" + "graph/build/graph_builder.cc" + "graph/partition/engine_place.cc" + "graph/partition/graph_partition.cc" + "graph/partition/dynamic_shape_partition.cc" + "generator/ge_generator.cc" + "generator/generator_api.cc" + "graph/manager/graph_var_manager.cc" + "graph/manager/host_mem_manager.cc" + "graph/manager/rdma_pool_allocator.cc" + "graph/manager/graph_mem_allocator.cc" + "graph/manager/graph_caching_allocator.cc" + "model/ge_model.cc" + "model/ge_root_model.cc" + "graph/common/transop_util.cc" + "graph/passes/pass_manager.cc" + "graph/passes/resource_pair_add_control_pass.cc" + "graph/passes/resource_pair_remove_control_pass.cc" + "graph/passes/pass_utils.cc" + "graph/passes/base_pass.cc" + "graph/passes/bitcast_pass.cc" + "graph/passes/constant_folding_pass.cc" + "graph/passes/aicpu_constant_folding_pass.cc" + "graph/passes/reshape_remove_pass.cc" + "graph/passes/reshape_recovery_pass.cc" + "graph/passes/transop_breadth_fusion_pass.cc" + "graph/passes/transop_depth_fusion_pass.cc" + "graph/passes/transop_nearby_allreduce_fusion_pass.cc" + "graph/passes/same_transdata_breadth_fusion_pass.cc" + "graph/passes/transop_without_reshape_fusion_pass.cc" + "graph/passes/compile_nodes_pass.cc" + "graph/passes/variable_prepare_op_pass.cc" + "graph/passes/variable_ref_delete_op_pass.cc" + "graph/passes/variable_ref_useless_control_out_delete_pass.cc" + "graph/passes/subgraph_pass.cc" + "graph/passes/data_pass.cc" + "graph/passes/net_output_pass.cc" + "graph/passes/replace_transshape_pass.cc" + "graph/passes/constant_fuse_same_pass.cc" + "graph/passes/print_op_pass.cc" + "graph/passes/no_use_reshape_remove_pass.cc" + "graph/passes/iterator_op_pass.cc" + "graph/passes/input_output_connection_identify_pass.cc" + "graph/passes/atomic_addr_clean_pass.cc" + "graph/passes/mark_same_addr_pass.cc" + "graph/passes/mark_graph_unknown_status_pass.cc" + "graph/common/omg_util.cc" + "graph/common/bcast.cc" + "graph/common/local_context.cc" + "graph/passes/dimension_compute_pass.cc" + "graph/passes/dimension_adjust_pass.cc" + "graph/passes/get_original_format_pass.cc" + "graph/passes/shape_operate_op_remove_pass.cc" + "graph/passes/unused_op_remove_pass.cc" + "graph/passes/assert_pass.cc" + "graph/passes/dropout_pass.cc" + "graph/passes/infershape_pass.cc" + "graph/passes/unused_const_pass.cc" + "graph/passes/isolated_op_remove_pass.cc" + "graph/passes/permute_pass.cc" + "graph/passes/ctrl_edge_transfer_pass.cc" + "graph/passes/end_of_sequence_add_control_pass.cc" + "host_kernels/broadcast_gradient_args_kernel.cc" + "host_kernels/greater_kernel.cc" + "host_kernels/gather_v2_kernel.cc" + "host_kernels/maximum_kernel.cc" + "host_kernels/floormod_kernel.cc" + "host_kernels/floordiv_kernel.cc" + "host_kernels/range_kernel.cc" + "host_kernels/shape_kernel.cc" + "host_kernels/size_kernel.cc" + "host_kernels/shape_n_kernel.cc" + "host_kernels/rank_kernel.cc" + "host_kernels/broadcast_args_kernel.cc" + "host_kernels/fill_kernel.cc" + "host_kernels/empty_kernel.cc" + "host_kernels/expanddims_kernel.cc" + "host_kernels/reshape_kernel.cc" + "host_kernels/squeeze_kernel.cc" + "host_kernels/unsqueeze_kernel.cc" + "host_kernels/kernel_utils.cc" + "host_kernels/cast_kernel.cc" + "host_kernels/transdata_kernel.cc" + "host_kernels/unpack_kernel.cc" + "host_kernels/transpose_kernel.cc" + "host_kernels/permute_kernel.cc" + "host_kernels/pack_kernel.cc" + "host_kernels/concat_v2_kernel.cc" + "host_kernels/concat_offset_kernel.cc" + "host_kernels/strided_slice_kernel.cc" + "host_kernels/ssd_prior_box_kernel.cc" + "host_kernels/add_kernel.cc" + "host_kernels/sub_kernel.cc" + "host_kernels/mul_kernel.cc" + "host_kernels/reduce_prod_kernel.cc" + "host_kernels/rsqrt_kernel.cc" + "host_kernels/slice_kernel.cc" + "host_kernels/slice_d_kernel.cc" + "host_kernels/dynamic_stitch_kernel.cc" + "host_kernels/identity_kernel.cc" + "graph/passes/stop_gradient_pass.cc" + "graph/passes/prevent_gradient_pass.cc" + "graph/passes/identity_pass.cc" + "graph/passes/ref_identity_delete_op_pass.cc" + "graph/passes/placeholder_with_default_pass.cc" + "graph/passes/snapshot_pass.cc" + "graph/passes/guarantee_const_pass.cc" + "graph/passes/var_is_initialized_op_pass.cc" + "graph/passes/parallel_concat_start_op_pass.cc" + "graph/passes/folding_pass.cc" + "graph/passes/cast_translate_pass.cc" + "graph/passes/prune_pass.cc" + "graph/passes/merge_to_stream_merge_pass.cc" + "graph/passes/switch_to_stream_switch_pass.cc" + "graph/passes/attach_stream_label_pass.cc" + "graph/passes/multi_batch_pass.cc" + "graph/passes/multi_batch_clone_pass.cc" + "graph/passes/subexpression_migration_pass.cc" + "graph/passes/subgraph_const_migration_pass.cc" + "graph/passes/unused_args_clean_pass.cc" + "graph/passes/next_iteration_pass.cc" + "graph/passes/control_trigger_pass.cc" + "graph/passes/cond_pass.cc" + "graph/passes/cond_remove_pass.cc" + "graph/passes/for_pass.cc" + "graph/passes/enter_pass.cc" + "graph/passes/assign_pass.cc" + "graph/passes/addn_pass.cc" + "graph/passes/common_subexpression_elimination_pass.cc" + "graph/passes/transop_symmetry_elimination_pass.cc" + "graph/passes/save_pass.cc" + "graph/passes/switch_dead_branch_elimination.cc" + "graph/passes/switch_logic_remove_pass.cc" + "graph/passes/switch_data_edges_bypass.cc" + "graph/passes/merge_pass.cc" + "graph/passes/variable_format_pass.cc" + "graph/passes/variable_op_pass.cc" + "graph/passes/cast_remove_pass.cc" + "graph/passes/transpose_transdata_pass.cc" + "graph/passes/hccl_memcpy_pass.cc" + "graph/passes/flow_ctrl_pass.cc" + "graph/passes/global_step_insert_pass.cc" + "graph/passes/link_gen_mask_nodes_pass.cc" + "graph/passes/replace_with_empty_const_pass.cc" + "graph/passes/hccl_group_pass.cc" + "graph/passes/memcpy_addr_async_pass.cc" + "graph/passes/set_input_output_offset_pass.cc" + "graph/manager/model_manager/event_manager.cc" + "graph/manager/util/rt_context_util.cc" + "graph/manager/util/variable_accelerate_ctrl.cc" + "graph/manager/util/debug.cc" + "graph/load/new_model_manager/model_manager.cc" + "graph/load/new_model_manager/data_inputer.cc" + "graph/load/new_model_manager/davinci_model.cc" + "graph/load/new_model_manager/davinci_model_parser.cc" + "graph/load/new_model_manager/model_utils.cc" + "graph/load/new_model_manager/aipp_utils.cc" + "graph/load/new_model_manager/tbe_handle_store.cc" + "graph/load/new_model_manager/cpu_queue_schedule.cc" + "graph/load/new_model_manager/zero_copy_task.cc" + "graph/load/new_model_manager/zero_copy_offset.cc" + "graph/load/new_model_manager/data_dumper.cc" + "graph/load/new_model_manager/task_info/task_info.cc" + "graph/load/new_model_manager/task_info/event_record_task_info.cc" + "graph/load/new_model_manager/task_info/event_wait_task_info.cc" + "graph/load/new_model_manager/task_info/fusion_start_task_info.cc" + "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" + "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" + "graph/load/new_model_manager/task_info/kernel_task_info.cc" + "graph/load/new_model_manager/task_info/label_set_task_info.cc" + "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" + "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" + "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" + "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" + "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" + "graph/load/new_model_manager/task_info/stream_active_task_info.cc" + "graph/load/new_model_manager/task_info/stream_switch_task_info.cc" + "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" + "graph/load/new_model_manager/task_info/end_graph_task_info.cc" + "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" + "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" + "single_op/task/op_task.cc" + "single_op/task/build_task_utils.cc" + "single_op/task/tbe_task_builder.cc" + "single_op/task/aicpu_task_builder.cc" + "single_op/task/aicpu_kernel_task_builder.cc" + "single_op/single_op.cc" + "single_op/single_op_model.cc" + "single_op/stream_resource.cc" + "single_op/single_op_manager.cc" + "hybrid/hybrid_davinci_model_stub.cc" + "ir_build/ge_ir_build.cc" + "ir_build/atc_ir_common.cc" + "graph/preprocess/insert_op/ge_aipp_op.cc" + "graph/preprocess/insert_op/util_insert_aipp_op.cc" + "hybrid/node_executor/aicpu/aicpu_ext_info.cc" + "graph/build/model_builder.cc" + "graph/build/task_generator.cc" + "graph/build/stream_allocator.cc" + "graph/build/logical_stream_allocator.cc" + "graph/build/stream_graph_optimizer.cc" + "graph/build/run_context.cc" + "graph/build/label_allocator.cc" + "graph/label/label_maker.cc" + "graph/label/if_label_maker.cc" + "graph/label/case_label_maker.cc" + "graph/label/while_label_maker.cc" + "graph/label/partitioned_call_label_maker.cc" + "analyzer/analyzer.cc" +) + +add_library(ge_compiler SHARED ${INFER_SRC_LIST} ${PROTO_SRCS}) + target_compile_definitions(ge_compiler PRIVATE - PROTOBUF_INLINE_NOT_IN_HEADERS=0 - REUSE_MEMORY=1 - FMK_HOST_INFER) + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + REUSE_MEMORY=1 + FMK_SUPPORT_DUMP + FMK_HOST_INFER + COMPILE_OMG_PACKAGE +) + +target_compile_options(ge_compiler PRIVATE + -O2 +) + +target_include_directories(ge_compiler PRIVATE + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/ge/analyzer + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${GE_CODE_DIR}/inc/framework/common + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/cce + ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external + #### blue zone + ${ASCEND_DIR}/driver/include + ${ASCEND_DIR}/fwkacllib/include +) + target_link_libraries(ge_compiler - graph - ge_common - ge_memory - ${PROTOBUF_LIBRARY} - ${register} - ${c_sec} - ${slog} - ${mmpa} - ${msprof} - ${runtime} - ${resouce} - ${error_manager} - rt - dl) + $ + ge_memory + -Wl,--no-as-needed + graph + ge_common + protobuf + register + c_sec + error_manager + slog + mmpa + runtime_compile + resource + -Wl,--as-needed + json + -lrt + -ldl +) + + +################################################################## +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_ir_build.cc + ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_api.cc + ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_prof.cc + COMMAND echo "Generating stub files." + && ${HI_PYTHON} ${CMAKE_CURRENT_LIST_DIR}/stub/gen_stubapi.py ${GE_CODE_DIR}/inc/external ${CMAKE_CURRENT_BINARY_DIR} + && mv ge_ir_build.cc stub_ge_ir_build.cc + && mv ge_api.cc stub_ge_api.cc + && mv ge_prof.cc stub_ge_prof.cc + && echo "Generating stub files end." + #WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + #DEPENDS stub/gen_stubapi.py ${TOP_DIR}/inc/external ${CMAKE_CURRENT_BINARY_DIR} +) + +add_custom_target(ge_stub + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_ir_build.cc + ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_api.cc + ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_prof.cc +) + +################################################################## +############ stub/libge_compiler.so ############ +add_library(atc_stub_ge_compiler SHARED + stub_ge_ir_build.cc +) + +add_dependencies(atc_stub_ge_compiler ge_stub) + +target_link_libraries(atc_stub_ge_compiler PRIVATE + $ +) + +set_target_properties(atc_stub_ge_compiler PROPERTIES + OUTPUT_NAME ge_compiler + LIBRARY_OUTPUT_DIRECTORY atc_stub +) + +target_include_directories(atc_stub_ge_compiler PRIVATE + ${GE_CODE_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/ge/analyzer + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/framework + ${GE_CODE_DIR}/inc/framework/common + ${GE_CODE_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/graph + #### yellow zone #### + ${GE_CODE_DIR}/../inc/cce + ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external + #### blue zone #### + ${ASCEND_DIR}/driver/include + ${ASCEND_DIR}/fwkacllib/include +) + +############ stub/libge_runner.so ############ +add_library(fwk_stub_ge_runner SHARED + stub_ge_api.cc + stub_ge_prof.cc +) + +add_dependencies(fwk_stub_ge_runner ge_stub) + +target_link_libraries(fwk_stub_ge_runner PRIVATE + $ +) + +set_target_properties(fwk_stub_ge_runner PROPERTIES + OUTPUT_NAME ge_runner + LIBRARY_OUTPUT_DIRECTORY fwk_stub +) + +target_include_directories(fwk_stub_ge_runner PRIVATE + ${GE_CODE_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/ge/analyzer + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${GE_CODE_DIR}/inc/framework/common + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/graph + #### yellow zone #### + ${GE_CODE_DIR}/../inc/cce + ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external + #### blue zone #### + ${ASCEND_DIR}/driver/include + ${ASCEND_DIR}/fwkacllib/include +) + +############################################################### +add_custom_target( + engine_conf.json ALL + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/engine_conf.json +) +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/engine_conf.json + COMMAND cp ${CMAKE_CURRENT_LIST_DIR}/engine_manager/engine_conf.json ${CMAKE_CURRENT_BINARY_DIR}/ +) + +############################################################### +add_custom_target( + optimizer_priority.pbtxt ALL + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/optimizer_priority.pbtxt +) +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/optimizer_priority.pbtxt + COMMAND cp ${CMAKE_CURRENT_LIST_DIR}/opskernel_manager/optimizer_priority.pbtxt ${CMAKE_CURRENT_BINARY_DIR}/ +) + +############################################################### + +############ install ############ +set(INSTALL_BASE_DIR "") +set(INSTALL_LIBRARY_DIR lib) + +install(TARGETS ge_runner ge_compiler OPTIONAL + LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} +) + +install(TARGETS atc_stub_ge_compiler fwk_stub_ge_runner OPTIONAL + LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}/stub +) + +install(FILES + ${CMAKE_CURRENT_BINARY_DIR}/engine_conf.json + ${CMAKE_CURRENT_BINARY_DIR}/optimizer_priority.pbtxt OPTIONAL + DESTINATION ${INSTALL_LIBRARY_DIR} +) diff --git a/ge/README.md b/ge/README.md new file mode 100755 index 00000000..e69de29b diff --git a/ge/analyzer/analyzer.cc b/ge/analyzer/analyzer.cc old mode 100644 new mode 100755 index b7d09bea..9064da28 --- a/ge/analyzer/analyzer.cc +++ b/ge/analyzer/analyzer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,32 +40,34 @@ const std::string kFilePath = "./"; const std::string kAnalyzeFile = "ge_check_op.json"; const std::string kUnknownShape = "unknownshape"; -const std::string kUnsupport = "unsupport"; +const std::string kUnsupport = "unsupport"; const std::string kSessionId = "session_id"; -const std::string kGraphId = "graph_id"; -const std::string kOpInfo = "op_info"; +const std::string kGraphId = "graph_id"; +const std::string kOpInfo = "op_info"; const std::string kErrorType = "error_type"; -const std::string kOpName = "name"; -const std::string kOpType = "type"; -const std::string kReason = "reason"; -const std::string kInput = "input"; -const std::string kOutput = "output"; -const std::string kShape = "shape"; -const std::string kDataType = "data_type"; -const std::string kLayout = "layout"; -const std::string kResult = "result"; -const std::string kOp = "op"; - -std::map errors_map{{PARSER, "paser_error"}, - {INFER_SHAPE, "infer_shape_error"}, - {CHECKSUPPORT, "check_support_error"}, - {GRAPH_OPTIMIZE, "graph_optimize_error"}, - {GRAPH_PARTION, "graph_partion_error"}, - {GRAPH_BUILDER, "graph_builder_error"}}; -} // namespace - -Analyzer *Analyzer::GetInstance() { +const std::string kOpName = "name"; +const std::string kOpType = "type"; +const std::string kReason = "reason"; +const std::string kInput = "input"; +const std::string kOutput = "output"; +const std::string kShape = "shape"; +const std::string kDataType = "data_type"; +const std::string kLayout = "layout"; +const std::string kResult = "result"; +const std::string kOp = "op"; + +std::map errors_map { + {PARSER, "paser_error"}, + {INFER_SHAPE, "infer_shape_error"}, + {CHECKSUPPORT, "check_support_error"}, + {GRAPH_OPTIMIZE, "graph_optimize_error"}, + {GRAPH_PARTION, "graph_partion_error"}, + {GRAPH_BUILDER, "graph_builder_error"} +}; +} + +Analyzer* Analyzer::GetInstance() { static Analyzer instance; return &instance; } @@ -75,7 +77,7 @@ Status Analyzer::BuildJsonObject(uint64_t session_id, uint64_t graph_id) { std::lock_guard lg(mutex_); auto iter = graph_infos_.find(session_id); if (iter == graph_infos_.end()) { - auto p = new (std::nothrow) GraphInfo(); + auto p = new(std::nothrow) GraphInfo(); GE_CHECK_NOTNULL(p); std::shared_ptr graph_info(p); std::map> graph_map; @@ -86,7 +88,7 @@ Status Analyzer::BuildJsonObject(uint64_t session_id, uint64_t graph_id) { } else { auto iter1 = (iter->second).find(graph_id); if (iter1 == (iter->second).end()) { - auto p = new (std::nothrow) GraphInfo(); + auto p = new(std::nothrow) GraphInfo(); GE_CHECK_NOTNULL(p); std::shared_ptr graph_info(p); graph_info->session_id = session_id; @@ -100,7 +102,14 @@ Status Analyzer::BuildJsonObject(uint64_t session_id, uint64_t graph_id) { } ge::Status Analyzer::Initialize() { - ClearHistoryFile(); + // Initialize file + string real_path = RealPath(kFilePath.c_str()); + if (real_path.empty()) { + GELOGE(FAILED, "File path is invalid."); + return FAILED; + } + json_file_name_ = real_path + "/" + kAnalyzeFile; + return SUCCESS; } @@ -174,15 +183,8 @@ ge::Status Analyzer::CreateAnalyzerFile() { return SUCCESS; } GELOGD("start to create analyzer file!"); - // Check whether the manifest exists, if not, create it. - string real_path = RealPath(kFilePath.c_str()); - if (real_path.empty()) { - GELOGE(FAILED, "File path is invalid."); - return FAILED; - } + std::lock_guard lg(file_mutex_); - json_file_name_ = real_path + "/" + kAnalyzeFile; - GELOGD("Created analyzer file:[%s]", json_file_name_.c_str()); int fd = open(json_file_name_.c_str(), O_WRONLY | O_CREAT | O_TRUNC, kFileAuthority); if (fd < 0) { GELOGE(INTERNAL_ERROR, "Fail to open the file: %s.", json_file_name_.c_str()); @@ -198,25 +200,27 @@ ge::Status Analyzer::CreateAnalyzerFile() { return SUCCESS; } -ge::Status Analyzer::SaveAnalyzerDataToFile() { +ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_id) { GELOGD("start to save analyze file!"); + + auto graph_info = GetJsonObject(session_id, graph_id); + GE_CHECK_NOTNULL(graph_info); + if (graph_info->op_info.size() == 0) { + GELOGD("session_id:%lu graph_id:%lu does not owner op info, break it!", session_id, graph_id); + return SUCCESS; + } std::lock_guard lg(file_mutex_); - json_file_.open(json_file_name_, std::ios::out); + json_file_.open(json_file_name_, std::ios::app); if (!json_file_.is_open()) { GELOGE(FAILED, "analyzer file does not exist[%s]", json_file_name_.c_str()); return PARAM_INVALID; } - std::lock_guard lk(mutex_); - for (auto &ele : graph_infos_) { - for (auto &ele2 : ele.second) { - json jsn; - GraphInfoToJson(jsn, *(ele2.second)); - json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; - } - } - + json jsn; + GraphInfoToJson(jsn, *graph_info); + json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; json_file_.close(); + return SUCCESS; } @@ -237,13 +241,7 @@ ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { return FAILED; } // create json file - status = CreateAnalyzerFile(); - if (status != SUCCESS) { - GELOGE(status, "create analyzer file failed!"); - return status; - } - // save data to file - return SaveAnalyzerDataToFile(); + return CreateAnalyzerFile(); } ge::Status Analyzer::SaveOpInfo(ge::OpDescPtr desc, DataInfo &data_info, @@ -256,18 +254,18 @@ ge::Status Analyzer::SaveOpInfo(ge::OpDescPtr desc, DataInfo &data_info, op_info.error_type = iter->second; op_info.op_name = desc->GetName(); op_info.op_type = desc->GetType(); - op_info.reason = data_info.reason; + op_info.reason = data_info.reason; for (const auto &ptr : desc->GetAllInputsDescPtr()) { TensorInfo tensor_info; - tensor_info.shape = ptr->GetShape().GetDims(); + tensor_info.shape = ptr->GetShape().GetDims(); tensor_info.d_type = ge::TypeUtils::DataTypeToSerialString(ptr->GetDataType()); tensor_info.layout = ge::TypeUtils::FormatToSerialString(ptr->GetFormat()); op_info.input_info.emplace_back(tensor_info); } for (const auto &ptr : desc->GetAllOutputsDescPtr()) { TensorInfo tensor_info; - tensor_info.shape = ptr->GetShape().GetDims(); + tensor_info.shape = ptr->GetShape().GetDims(); tensor_info.d_type = ge::TypeUtils::DataTypeToSerialString(ptr->GetDataType()); tensor_info.layout = ge::TypeUtils::FormatToSerialString(ptr->GetFormat()); op_info.output_info.emplace_back(tensor_info); @@ -277,13 +275,13 @@ ge::Status Analyzer::SaveOpInfo(ge::OpDescPtr desc, DataInfo &data_info, return SUCCESS; } -void Analyzer::TensorInfoToJson(json &j, const TensorInfo &tensor_info) { +void Analyzer::TensorInfoToJson(json& j, const TensorInfo &tensor_info) { j[kShape] = tensor_info.shape; j[kDataType] = tensor_info.d_type; j[kLayout] = tensor_info.layout; } -void Analyzer::OpInfoToJson(json &j, const OpInfo &op_info) { +void Analyzer::OpInfoToJson(json& j, const OpInfo &op_info) { j[kErrorType] = op_info.error_type; j[kOpName] = op_info.op_name; j[kOpType] = op_info.op_type; @@ -300,7 +298,7 @@ void Analyzer::OpInfoToJson(json &j, const OpInfo &op_info) { } } -void Analyzer::GraphInfoToJson(json &j, const GraphInfo &graph_info) { +void Analyzer::GraphInfoToJson(json& j, const GraphInfo &graph_info) { GELOGD("start to buff graph info!"); j[kSessionId] = graph_info.session_id; j[kGraphId] = graph_info.graph_id; @@ -312,4 +310,4 @@ void Analyzer::GraphInfoToJson(json &j, const GraphInfo &graph_info) { } j[kOp] = json_op_infos; } -} // namespace ge +} // namespace ge diff --git a/ge/analyzer/analyzer.h b/ge/analyzer/analyzer.h old mode 100644 new mode 100755 index 1afeeca3..fd89b150 --- a/ge/analyzer/analyzer.h +++ b/ge/analyzer/analyzer.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,12 +33,12 @@ namespace ge { namespace analyzer { enum AnalyzeType { - PARSER = 0, - INFER_SHAPE = 1, - CHECKSUPPORT = 2, + PARSER = 0, + INFER_SHAPE = 1, + CHECKSUPPORT = 2, GRAPH_OPTIMIZE = 3, - GRAPH_PARTION = 4, - GRAPH_BUILDER = 5, + GRAPH_PARTION = 4, + GRAPH_BUILDER = 5, }; struct TensorInfo { @@ -66,7 +66,8 @@ struct DataInfo { DataInfo() = default; ~DataInfo() = default; - DataInfo(uint64_t sess, uint64_t graph, AnalyzeType type, ge::NodePtr node, std::string error_info) { + DataInfo(uint64_t sess, uint64_t graph, AnalyzeType type, + ge::NodePtr node, std::string error_info) { session_id = sess; graph_id = graph; analyze_type = type; @@ -79,10 +80,10 @@ struct DataInfo { ge::NodePtr node_ptr{nullptr}; std::string reason; }; -} // namespace analyzer +} class Analyzer { - public: +public: /** * @ingroup ge * @brief: get analyzer instance. @@ -156,33 +157,39 @@ class Analyzer { */ ge::Status DoAnalyze(analyzer::DataInfo &data_info); + /** + * @ingroup ge + * @brief: Buff analyzed data and output to json file + * @param [in]: session id , graph id + * @return: 0: SUCCESS other: FAILED + */ + ge::Status SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_id); + Analyzer(const Analyzer &) = delete; - Analyzer &operator=(const Analyzer &) = delete; + Analyzer& operator=(const Analyzer&) = delete; Analyzer(Analyzer &&) = delete; - Analyzer &operator=(Analyzer &&) = delete; - - private: - void TensorInfoToJson(nlohmann::json &j, const analyzer::TensorInfo &tensor_info); - void OpInfoToJson(nlohmann::json &j, const analyzer::OpInfo &op_info); - void GraphInfoToJson(nlohmann::json &j, const analyzer::GraphInfo &graph_info); + Analyzer& operator=(Analyzer &&) = delete; +private: + void TensorInfoToJson(nlohmann::json& j, const analyzer::TensorInfo &tensor_info); + void OpInfoToJson(nlohmann::json& j, const analyzer::OpInfo &op_info); + void GraphInfoToJson(nlohmann::json& j, const analyzer::GraphInfo &graph_info); - ge::Status SaveAnalyzerDataToFile(); ge::Status SaveOpInfo(ge::OpDescPtr desc, analyzer::DataInfo &data_info, - std::shared_ptr graph_info); + std::shared_ptr graph_info); void ClearHistoryFile(); ge::Status CreateAnalyzerFile(); - explicit Analyzer(){}; + explicit Analyzer() {}; ~Analyzer() = default; - private: +private: std::map>> graph_infos_; - std::recursive_mutex mutex_; // protect graph_infos_ - std::mutex file_mutex_; // protect json_file_ + std::recursive_mutex mutex_; // protect graph_infos_ + std::mutex file_mutex_; // protect json_file_ std::ofstream json_file_; std::string json_file_name_; std::atomic_bool is_json_file_create_{false}; }; -} // namespace ge -#endif // DOMI_ANALYZER_ANANLYZER_H_ +} // namespace ge +#endif // DOMI_ANALYZER_ANANLYZER_H_ diff --git a/ge/client/CMakeLists.txt b/ge/client/CMakeLists.txt deleted file mode 100755 index 945d8aa6..00000000 --- a/ge/client/CMakeLists.txt +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -# libge_client.so -# add all proto files, generate corresponding .h and .cc files -set(CMAKE_CXX_FLAGS "-Wno-unused-variable ${CMAKE_CXX_FLAGS}") -file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "../proto/ge_api.proto" - ) - -file(GLOB PROTO_HEADER_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "../proto/ge_ir.proto" - "../proto/task.proto" - "../proto/om.proto" - "../proto/insert_op.proto" - ) - -file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "ge_api.cc" - "ge_prof.cc" - ) - -ge_protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) -ge_protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST}) - -# include directories -include_directories(${CMAKE_CURRENT_LIST_DIR}) -include_directories(${GE_SOURCE_DIR}/ge) -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/inc/common) -include_directories(${GE_SOURCE_DIR}/inc/framework) -include_directories(${GE_SOURCE_DIR}/metadef/inc) -include_directories(${GE_SOURCE_DIR}/metadef/inc/external/graph) -include_directories(${GE_SOURCE_DIR}/metadef/inc/graph) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/proto/ge) - -############ libge_client.so ################ -add_library(ge_client SHARED ${SRC_LIST} ${PROTO_SRCS} ${PROTO_HEADER_HDRS}) -target_compile_definitions(ge_client PRIVATE - Werror - PROTOBUF_INLINE_NOT_IN_HEADERS=0 - REUSE_MEMORY=1 - PLATFORM_CLOUD) -target_link_libraries(ge_client - graph - ge_compiler - ge_common - ${PROTOBUF_LIBRARY} - ${register} - ${c_sec} - ${slog} - ${mmpa} - ${runtime} - ${msprof} - rt - dl) diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index ad01e48f..68c9fccd 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -380,7 +380,7 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vectorSessionManagerObj().RunGraphAsync(sessionId_, graph_id, inputs, callback); if (ret != SUCCESS) { diff --git a/ge/client/ge_prof.cc b/ge/client/ge_prof.cc index d4407852..f7fef4e9 100644 --- a/ge/client/ge_prof.cc +++ b/ge/client/ge_prof.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,39 +29,22 @@ using std::vector; namespace { const uint32_t kMaxDeviceNum = 64; -const std::string PROFILING_INIT = "prof_init"; -const std::string PROFILING_FINALIZE = "prof_finalize"; -const std::string PROFILING_START = "prof_start"; -const std::string PROFILING_STOP = "prof_stop"; -const std::string DEVICES_NUMS = "devNums"; -const std::string DEVICE_ID_LIST = "devIdList"; -const std::string AICORE_METRICS = "aicoreMetrics"; +const uint32_t kDeviceListIndex = 3; +const std::string kProfilingInit = "prof_init"; +const std::string kProfilingFinalize = "prof_finalize"; +const std::string kProfilingStart = "prof_start"; +const std::string kProfilingStop = "prof_stop"; +const std::string kDeviceNums = "devNums"; +const std::string kDeviceIdList = "devIdList"; +const std::string kAicoreMetrics = "aicoreMetrics"; const std::map kProfAicoreMetricsToString = { - {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, - {ge::kAicorePipeline, "AICORE_PIPELINE"}, - {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, - {ge::kAicoreMemory, "AICORE_MEMORY"}, - {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, - {ge::kAicoreStall, "AICORE_STALL"}, - {ge::kAicoreMetricsAll, "AICORE_METRICS_ALL"}}; - -const std::map kDataTypeConfigMapping = {{ge::kProfAcl, PROF_ACL_API}, - {ge::kProfTaskTime, PROF_TASK_TIME}, - {ge::kProfAiCoreMetrics, PROF_AICORE_METRICS}, - {ge::kProfAicpuTrace, PROF_AICPU_TRACE}, - {ge::kProfModelExecute, PROF_MODEL_EXECUTE}, - {ge::kProfRuntimeApi, PROF_RUNTIME_API}, - {ge::kProfRuntimeTrace, PROF_RUNTIME_TRACE}, - {ge::kProfScheduleTimeline, PROF_SCHEDULE_TIMELINE}, - {ge::kProfScheduleTrace, PROF_SCHEDULE_TRACE}, - {ge::kProfAiVectorCoreMetrics, PROF_AIVECTORCORE_METRICS}, - {ge::kProfSubtaskTime, PROF_SUBTASK_TIME}, - {ge::kProfTrainingTrace, PROF_TRAINING_TRACE}, - {ge::kProfHcclTrace, PROF_HCCL_TRACE}, - {ge::kProfDataProcess, PROF_DATA_PROCESS}, - {ge::kProfTaskTrace, PROF_TASK_TRACE}, - {ge::kProfModelLoad, PROF_MODEL_LOAD}}; + {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, + {ge::kAicorePipeline, "AICORE_PIPELINE"}, + {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, + {ge::kAicoreMemory, "AICORE_MEMORY"}, + {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, + {ge::kAicoreStall, "AICORE_STALL"}}; } // namespace static bool g_graph_prof_init_ = false; @@ -107,11 +90,11 @@ Status aclgrphProfInit(const char *profiler_path, uint32_t length) { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_INIT; - command.module_index = kProfModelLoad | kProfTrainingTrace; + command.cmd_type = kProfilingInit; + command.module_index = PROF_MODEL_LOAD; ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command %s failed, config = %s", PROFILING_INIT.c_str(), profiler_path); + GELOGE(ret, "Handle profiling command %s failed, config = %s", kProfilingInit.c_str(), profiler_path); return ret; } if (!g_graph_prof_init_) { @@ -143,10 +126,10 @@ Status aclgrphProfFinalize() { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_FINALIZE; + command.cmd_type = kProfilingFinalize; Status ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command %s failed.", PROFILING_FINALIZE.c_str()); + GELOGE(ret, "Handle profiling command %s failed.", kProfilingFinalize.c_str()); return ret; } @@ -164,9 +147,9 @@ Status aclgrphProfFinalize() { bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector &prof_config_params) { prof_config_params.clear(); - prof_config_params.emplace_back(DEVICES_NUMS); + prof_config_params.emplace_back(kDeviceNums); prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums)); - prof_config_params.emplace_back(DEVICE_ID_LIST); + prof_config_params.emplace_back(kDeviceIdList); std::string devID = ""; if (profiler_config->config.devNums == 0) { GELOGW("The device num is invalid."); @@ -180,9 +163,9 @@ bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector(profiler_config->config.aicoreMetrics)); + kProfAicoreMetricsToString.find(static_cast(profiler_config->config.aicoreMetrics)); if (iter == kProfAicoreMetricsToString.end()) { GELOGW("The prof aicore metrics is invalid."); return false; @@ -250,13 +233,7 @@ aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t dev } config->config.aicoreMetrics = static_cast(aicore_metrics); - uint64_t data_type = 0; - for (auto &iter : kDataTypeConfigMapping) { - if ((iter.first & data_type_config) == iter.first) { - data_type |= iter.second; - } - } - config->config.dataTypeConfig = data_type; + config->config.dataTypeConfig = data_type_config; GELOGI("Successfully create prof config."); return config; } @@ -309,9 +286,11 @@ Status aclgrphProfStart(aclgrphProfConfig *profiler_config) { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_START; + command.cmd_type = kProfilingStart; command.cmd_params = prof_params; command.module_index = profiler_config->config.dataTypeConfig; + GELOGI("Profiling will start, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), + prof_params[kDeviceListIndex].c_str(), command.module_index); ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command failed"); @@ -360,9 +339,11 @@ Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_STOP; + command.cmd_type = kProfilingStop; command.cmd_params = prof_params; command.module_index = profiler_config->config.dataTypeConfig; + GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), + prof_params[kDeviceListIndex].c_str(), command.module_index); ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command failed"); diff --git a/ge/client/proto/ge_api.proto b/ge/client/proto/ge_api.proto new file mode 120000 index 00000000..26d705fe --- /dev/null +++ b/ge/client/proto/ge_api.proto @@ -0,0 +1 @@ +../../proto/ge_api.proto \ No newline at end of file diff --git a/ge/client/proto/ge_ir.proto b/ge/client/proto/ge_ir.proto new file mode 120000 index 00000000..f83e9bda --- /dev/null +++ b/ge/client/proto/ge_ir.proto @@ -0,0 +1 @@ +../../proto/ge_ir.proto \ No newline at end of file diff --git a/ge/client/proto/insert_op.proto b/ge/client/proto/insert_op.proto new file mode 120000 index 00000000..7db5a53b --- /dev/null +++ b/ge/client/proto/insert_op.proto @@ -0,0 +1 @@ +../../proto/insert_op.proto \ No newline at end of file diff --git a/ge/client/proto/om.proto b/ge/client/proto/om.proto new file mode 100755 index 00000000..e15e5f80 --- /dev/null +++ b/ge/client/proto/om.proto @@ -0,0 +1,396 @@ +/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details at + * http://www.apache.org/licenses/LICENSE-2.0 + */ +syntax = "proto3"; + +package domi; + +enum TargetType +{ + MINI = 0; + TINY = 1; + LITE = 2; +} + +// offline model +message ModelDef { + string name = 1; + uint32 version = 2; + + uint64 memory_size = 10; + uint32 stream_num = 11; + uint32 event_num = 12; + uint64 weight_size = 13; + uint32 label_num = 15; + repeated OpDef op = 20; + TargetType target_type = 23; + + map attr = 30; +}; + +// operator define +message OpDef { + string name = 1; + string type = 2; + + uint32 id = 3; + uint32 stream_id = 4; + + repeated string input_name = 5; + + repeated string src_name = 8; + repeated int32 src_index = 9; + repeated int64 input = 10; + repeated int64 output = 11; + repeated TensorDescriptor input_desc = 12; + repeated TensorDescriptor output_desc = 13; + repeated WeightDef weights = 14; + repeated string dst_name = 15; + repeated int32 dst_index = 16; + + repeated int64 workspace = 20; + repeated uint32 workspace_bytes = 21; + + repeated string weight_name = 22; + repeated bool is_input_const = 23; + + map attr = 30; + + QuantizeFactorParams quantize_factor = 31; + + oneof op_params { + // start at 100 here + SendOpParams sender_param = 100; + RecvOpParams receiver_param = 200; + ConvolutionOpParams convolution_param = 300; + PoolingOpParams pooling_param = 400; + EltwiseOpParams eltwise_param = 500; + BatchNormOpParams batchnorm_param = 600; + ScaleOpParams scale_param = 700; + FullConnectionOpParams full_connection_param = 800; + SoftmaxOpParams softmax_param = 900; + ActivationOpParams activation_param = 1000; + ReshapeOpParams reshape_param = 1100; + } +}; + +message SendOpParams { + uint32 event_id = 1; +}; + +message RecvOpParams { + uint32 event_id = 1; +}; + +enum QuantizeScaleType +{ + VECTOR_SCALE = 0; + SCALAR_SCALE = 1; +} + +enum QuantizeScaleMode +{ + NORMAL_MODE = 0; + SQRT_MODE = 1; +} + +enum QuantizeAlgorithm +{ + NON_OFFSET_ALGO = 0; + HALF_OFFSET_ALGO = 1; + ALL_OFFSET_ALGO = 2; +} +message QuantizeFactor +{ + QuantizeScaleMode scale_mode = 1; + bytes scale_value = 2; + int64 scale_offset = 3; + bytes offset_data_value = 4; + int64 offset_data_offset = 5; + bytes offset_weight_value = 6; + int64 offset_weight_offset = 7; + bytes offset_pad_value = 8; + int64 offset_pad_offset = 9; +}; + +message QuantizeCalcFactor +{ + bytes offsetw = 1; + int64 offsetw_offset = 2; + bytes offsetd = 3; + int64 offsetd_offset = 4; + bytes scalereq = 5; + int64 scaledreq_offset = 6; + bytes offsetdnext = 7; + int64 offsetdnext_offset = 8; +} + +message QuantizeFactorParams +{ + QuantizeAlgorithm quantize_algo = 1; + QuantizeScaleType scale_type = 2; + QuantizeFactor quantize_param = 3; + QuantizeFactor dequantize_param = 4; + QuantizeFactor requantize_param = 5; + QuantizeCalcFactor quantizecalc_param = 6; +}; + +message ConvolutionOpParams { + int32 mode = 1; + int32 algo = 2; + int32 pad_mode = 3; + uint32 group = 4; + uint32 num_output = 5; + + repeated uint32 pad = 10; + repeated uint32 stride = 11; + repeated uint32 dilation = 12; + repeated uint32 kernel = 13; + + float alpha = 20; + float beta = 21; + + WeightDef filter = 40; + WeightDef bias = 41; + + bool relu_flag = 62; + repeated uint32 adj = 70; + repeated uint32 target_shape = 71; + repeated uint32 before_pad = 72; +}; + +message PoolingOpParams { + int32 mode = 1; + int32 nan_opt = 2; + int32 pad_mode = 3; + bool global_pooling = 4; + + repeated uint32 window = 10; + repeated uint32 pad = 11; + repeated uint32 stride = 12; + bool ceil_mode = 13; + int32 data_mode = 14; + + float alpha = 20; + float beta = 21; + repeated uint32 before_pad = 22; +}; + +message EltwiseOpParams { + int32 mode = 1; + repeated float coeff = 2; + float alpha = 3; + float beta = 4; + repeated WeightDef weight = 5; + bool relu_flag = 6; +}; + +message ActivationOpParams { + int32 mode = 1; + float coef = 2; + float alpha = 3; + float beta = 4; +}; + +message BatchNormOpParams { + int32 mode = 1; + + float alpha = 2; + float beta = 3; + double epsilon = 4;//optinal,[default = 1e-5] + bool use_global_stats = 5; //optinal,by default true,testing mode + float moving_average_fraction = 6; //optinal,[default = .999]; + + WeightDef estimated_mean = 7; + WeightDef estimated_variance = 8; + + WeightDef scale = 9; + WeightDef bias = 10; +}; + +message ScaleOpParams { + WeightDef scale = 1; + WeightDef bias = 2; +}; + +message ReshapeOpParams { + float alpha = 1; + float beta = 2; + ShapeDef shape = 3; + int32 axis = 4; + int32 num_axes = 5; + int32 format = 6; +}; + +message SoftmaxOpParams { + int32 algo = 1; + int32 mode = 2; + float alpha = 3; + float beta = 4; +}; + +message FullConnectionOpParams { + WeightDef filter = 1; + WeightDef bias = 2; + uint32 num_output = 3; + bool relu_flag = 12; +}; + +message FlattenOpParams { + float alpha = 1; + float beta = 2; + int32 start_axis = 3; + int32 end_axis = 4; +} + +message AddLimitedOpParams { + float alpha = 1; + float beta = 2; + int32 axis = 3; + bool broadcast = 4; + + repeated WeightDef weight = 10; +}; + +message MulLimitedOpParams { + float alpha = 1; + float beta = 2; + int32 axis = 3; + bool broadcast = 4; + + repeated WeightDef weight = 10; +}; + +message AddOpParams { + float alpha = 1; + float beta = 2; + + repeated WeightDef weight = 10; +}; + +message MulOpParams { + float alpha = 1; + float beta = 2; + + repeated WeightDef weight = 10; +}; + +message SubOpParams { + float alpha = 1; + float beta = 2; + + repeated WeightDef weight = 10; +}; + +message BiasAddOpParams { + float alpha = 1; + float beta = 2; + + WeightDef bias = 10; +}; + +message MatMulOpParams { + float alpha = 1; + float beta = 2; + bool transposeX = 3; + bool transposeW = 4; + + WeightDef filter = 10; + WeightDef bias = 12; +}; + +message RsqrtOpParams { + float alpha = 1; + float beta = 2; +}; + + +message WeightDef { + int32 format = 1; + int32 data_type = 2; + ShapeDef shape = 3; + bytes data = 4; + int64 data_offset = 5; + uint32 cmps_size = 6; + bytes cmps_tab = 7; + int64 cmps_tab_offset = 10; + CompressInfo cmps_info = 8; + AllOffsetQuantizeInfo alloffset_quantize_info = 11; +} + +message ShapeDef { + repeated int64 dim = 1; +} + +enum DeviceType { + NPU = 0; // In default, we will use NPU. + CPU = 1; // CPU +} + +message AllOffsetQuantizeInfo { + float scale = 1; + int32 offset = 2; +} + +message TensorDescriptor { + int32 format = 1; + int32 data_type = 2; + repeated int64 dim = 3; + uint32 size = 4; + bool reuse_input = 5; + bool output_tensor = 7; + DeviceType device_type = 8; + bool input_tensor = 9; + uint32 real_dim_cnt = 10; + uint32 reuse_input_index = 11; + AllOffsetQuantizeInfo alloffset_quantize_info = 12; +} + +message CompressInfo { + int32 blockRow = 1; // block row + int32 blockCol = 2; // block col + int32 fractalK = 3; // fractal K + int32 fractalN = 4; // fractal N + int32 lastFractalK = 5; // K of last fractal + int32 lastFractalN = 6; // N of last fractal + int32 cubeSize = 7; // cube's length + int32 loadDir = 8; // data load directtiono 0:col load 1:row load +} + +message AttrDef { + message ListValue { + repeated string s = 2; // "list(string)" + repeated int64 i = 3 [packed = true]; // "list(int)" + repeated float f = 4 [packed = true]; // "list(float)" + repeated bool b = 5 [packed = true]; // "list(bool)" + repeated uint32 u = 6 [packed = true]; // "list(uint)" + repeated bytes bt = 7; + } + + oneof value { + string s = 2; // "string" + int64 i = 3; // "int" + float f = 4; // "float" + bool b = 5; // "bool" + uint32 u = 6; // "uint32" + bytes bt = 7; + ListValue list = 1; // any "list(...)" + NamedAttrs func = 10; + } +} + +// A list of attr names and their values. The whole list is attached +// with a string name. E.g., MatMul[T=float]. +message NamedAttrs { + string name = 1; + map attr = 2; +} + diff --git a/ge/client/proto/task.proto b/ge/client/proto/task.proto new file mode 120000 index 00000000..36ae4847 --- /dev/null +++ b/ge/client/proto/task.proto @@ -0,0 +1 @@ +../../proto/task.proto \ No newline at end of file diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index 85af6d5b..c8b1934c 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -1,105 +1,167 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ +set(PROTO_LIST + "${METADEF_DIR}/proto/om.proto" + "${METADEF_DIR}/proto/ge_ir.proto" + "${METADEF_DIR}/proto/insert_op.proto" + "${METADEF_DIR}/proto/task.proto" + "${METADEF_DIR}/proto/tensorflow/attr_value.proto" + "${METADEF_DIR}/proto/tensorflow/function.proto" + "${METADEF_DIR}/proto/tensorflow/graph.proto" + "${METADEF_DIR}/proto/tensorflow/node_def.proto" + "${METADEF_DIR}/proto/tensorflow/op_def.proto" + "${METADEF_DIR}/proto/tensorflow/resource_handle.proto" + "${METADEF_DIR}/proto/tensorflow/tensor.proto" + "${METADEF_DIR}/proto/tensorflow/tensor_shape.proto" + "${METADEF_DIR}/proto/tensorflow/types.proto" + "${METADEF_DIR}/proto/tensorflow/versions.proto" +) -# libge_common.so -file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "${GE_SOURCE_DIR}/metadef/proto/om.proto" - "${GE_SOURCE_DIR}/metadef/proto/ge_ir.proto" - "${GE_SOURCE_DIR}/metadef/proto/task.proto" - "${GE_SOURCE_DIR}/metadef/proto/insert_op.proto" - ) +protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) -file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "../model/ge_model.cc" - "auth/file_saver.cc" - "context/ctx.cc" - "cust_aicpu_kernel_store.cc" - "debug/memory_dumper.cc" - "dump/dump_properties.cc" - "fmk_error_codes.cc" - "formats/format_transfers/datatype_transfer.cc" - "formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc" - "formats/format_transfers/format_transfer_dhwcn_fracz3D.cc" - "formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc" - "formats/format_transfers/format_transfer_fractal_nz.cc" - "formats/format_transfers/format_transfer_fractal_z.cc" - "formats/format_transfers/format_transfer_fractal_zz.cc" - "formats/format_transfers/format_transfer_fracz_hwcn.cc" - "formats/format_transfers/format_transfer_fracz_nchw.cc" - "formats/format_transfers/format_transfer_fracz_nhwc.cc" - "formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc" - "formats/format_transfers/format_transfer_nc1hwc0_nchw.cc" - "formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc" - "formats/format_transfers/format_transfer_nchw_fz_c04.cc" - "formats/format_transfers/format_transfer_nchw_nc1hwc0.cc" - "formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc" - "formats/format_transfers/format_transfer_transpose.cc" - "formats/formats.cc" - "formats/utils/formats_trans_utils.cc" - "fp16_t.cc" - "ge/datatype_util.cc" - "ge/tbe_plugin_manager.cc" - "ge_format_util.cc" - "helper/model_helper.cc" - "helper/om_file_helper.cc" - "kernel_store.cc" - "math/fp16_math.cc" - "model_parser/base.cc" - "model_saver.cc" - "op/attr_value_util.cc" - "op/ge_op_utils.cc" - "properties_manager.cc" - "tbe_kernel_store.cc" - "thread_pool.cc" - "types.cc" - "util.cc" - ) +set(SRC_LIST + "context/ctx.cc" + "model_saver.cc" + "ge/datatype_util.cc" + "helper/om_file_helper.cc" + "helper/model_helper.cc" + "../model/ge_model.cc" + "auth/file_saver.cc" + "fp16_t.cc" + "math/fp16_math.cc" + "debug/memory_dumper.cc" + "formats/utils/formats_trans_utils.cc" + "dump/dump_properties.cc" + "formats/format_transfers/datatype_transfer.cc" + "formats/format_transfers/format_transfer_transpose.cc" + "formats/format_transfers/format_transfer_nchw_nc1hwc0.cc" + "formats/format_transfers/format_transfer_fractal_z.cc" + "formats/format_transfers/format_transfer_fractal_nz.cc" + "formats/format_transfers/format_transfer_fractal_zz.cc" + "formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc" + "formats/format_transfers/format_transfer_nc1hwc0_nchw.cc" + "formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc" + "formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc" + "formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc" + "formats/format_transfers/format_transfer_fracz_nchw.cc" + "formats/format_transfers/format_transfer_fracz_nhwc.cc" + "formats/format_transfers/format_transfer_fracz_hwcn.cc" + "formats/format_transfers/format_transfer_dhwcn_fracz3D.cc" + "formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc" + "formats/format_transfers/format_transfer_nchw_fz_c04.cc" + "formats/formats.cc" + "ge_format_util.cc" + "fmk_error_codes.cc" + "util.cc" + "properties_manager.cc" + "types.cc" + "model_parser/base.cc" + "kernel_store.cc" + "tbe_kernel_store.cc" + "cust_aicpu_kernel_store.cc" + "op/attr_value_util.cc" + "op/ge_op_utils.cc" + "thread_pool.cc" + "ge/tbe_plugin_manager.cc" +) -ge_protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) +############ libge_common.so ############ +add_library(ge_common SHARED ${SRC_LIST} ${PROTO_HDRS}) +target_compile_definitions(ge_common PRIVATE + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + HOST_VISIBILITY + FMK_SUPPORT_DUMP + OS_CENTOS +) -# include directories -include_directories(${CMAKE_CURRENT_LIST_DIR}) -include_directories(${CMAKE_CURRENT_LIST_DIR}/op) -include_directories(${GE_SOURCE_DIR}/ge) -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/common/util) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/inc/framework) -include_directories(${GE_SOURCE_DIR}/metadef/inc) -include_directories(${GE_SOURCE_DIR}/metadef/inc/external) -include_directories(${GE_SOURCE_DIR}/metadef/inc/external/graph) -include_directories(${GE_SOURCE_DIR}/metadef/inc/graph) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/proto/ge) +target_compile_options(ge_common PRIVATE + -fvisibility=hidden + -O2 + -Werror +) -############ libge_common.so ################ -add_library(ge_common SHARED ${SRC_LIST} ${PROTO_HDRS}) -target_compile_definitions(ge_common PUBLIC - PROTOBUF_INLINE_NOT_IN_HEADERS=0 - HOST_VISIBILITY - OS_CENTOS) -target_link_libraries(ge_common - graph - ${PROTOBUF_LIBRARY} - ${register} - ${c_sec} - ${slog} - ${mmpa} - ${resource} - ${error_manager} - rt - dl) +target_include_directories(ge_common PRIVATE + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/ge/common + ${GE_CODE_DIR}/ge/common/op + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_DEPEND_DIR}/inc + ${GE_DEPEND_DIR}/inc/cce + #### blue zone #### + #${GE_DEPEND_DIR}/include +) + +target_link_libraries(ge_common PRIVATE + $ + -Wl,--no-as-needed + graph + protobuf + register + c_sec + error_manager + slog + mmpa + -Wl,--as-needed + json + -lrt + -ldl +) + +############ libge_common.a ############ +add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_HDRS}) +target_compile_definitions(ge_common_static PRIVATE + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + HOST_VISIBILITY + FMK_SUPPORT_DUMP + OS_CENTOS +) + +target_compile_options(ge_common_static PRIVATE + -fvisibility=hidden + -O2 + -Werror +) + +target_include_directories(ge_common_static PRIVATE + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/ge/common + ${GE_CODE_DIR}/ge/common/op + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_DEPEND_DIR}/inc + ${GE_DEPEND_DIR}/inc/cce + #### blue zone #### + #${GE_DEPEND_DIR}/include +) + +target_link_libraries(ge_common_static PRIVATE + $ + protobuf + json + c_sec + -lrt + -ldl +) + +############ install ############ +set(INSTALL_BASE_DIR "") +set(INSTALL_LIBRARY_DIR lib) + +install(TARGETS ge_common OPTIONAL + LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} +) diff --git a/ge/common/auth/file_saver.cc b/ge/common/auth/file_saver.cc old mode 100644 new mode 100755 index 4aaf9c19..60d99c0b --- a/ge/common/auth/file_saver.cc +++ b/ge/common/auth/file_saver.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,9 +55,26 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) { Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size == 0 || data == nullptr, return PARAM_INVALID); - + mmSsize_t write_count; + uint32_t size_2g = ((uint32_t) 0x1 << 31); + uint32_t size_1g = ((uint32_t) 0x1 << 30); // Write data - int32_t write_count = mmWrite(fd, const_cast(data), size); + if (size > size_2g) { + auto seek = reinterpret_cast(const_cast(data)); + while (size > size_1g) { + write_count = mmWrite(fd, reinterpret_cast(seek), size_1g); + if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { + GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); + return FAILED; + } + size -= size_1g; + seek += size_1g; + } + write_count = mmWrite(fd, reinterpret_cast(seek), size); + } else { + write_count = mmWrite(fd, const_cast(data), size); + } + // -1: Failed to write to file; - 2: Illegal parameter if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); @@ -99,10 +116,10 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi ModelPartitionTable &model_partition_table, const std::vector &partition_datas) { - GE_CHK_BOOL_RET_STATUS( - !partition_datas.empty() && model_partition_table.num != 0 && model_partition_table.num == partition_datas.size(), - FAILED, "Invalid param:partition data size is (%u), model_partition_table.num is (%zu).", model_partition_table.num, - partition_datas.size()); + GE_CHK_BOOL_RET_STATUS(!partition_datas.empty() && model_partition_table.num != 0 + && model_partition_table.num == partition_datas.size(), FAILED, + "Invalid param:partition data size is (%u), model_partition_table.num is (%zu).", + model_partition_table.num, partition_datas.size()); // Open file int32_t fd = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(OpenFile(fd, file_path) != SUCCESS, return FAILED); @@ -110,16 +127,18 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi do { // Write file header GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - WriteData(static_cast(&file_header), sizeof(ModelFileHeader), fd) != SUCCESS, ret = FAILED; break); + WriteData(static_cast(&file_header), sizeof(ModelFileHeader), fd) != SUCCESS, ret = FAILED; + break); // Write model partition table uint32_t table_size = static_cast(SIZE_OF_MODEL_PARTITION_TABLE(model_partition_table)); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - WriteData(static_cast(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break); + WriteData(static_cast(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break); // Write partition data for (const auto &partitionData : partition_datas) { + GELOGI("GC:size[%zu]", partitionData.size); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - WriteData(static_cast(partitionData.data), partitionData.size, fd) != SUCCESS, ret = FAILED; - break); + WriteData(static_cast(partitionData.data), partitionData.size, fd) != SUCCESS, ret = FAILED; + break); } } while (0); // Close file @@ -132,9 +151,9 @@ Status FileSaver::SaveToBuffWithFileHeader(const ModelFileHeader &file_header, const std::vector &partitionDatas, ge::ModelBufferData &model) { GE_CHK_BOOL_RET_STATUS( - !partitionDatas.empty() && model_partition_table.num != 0 && model_partition_table.num == partitionDatas.size(), - FAILED, "Invalid param:partition data size is (%u), model_partition_table.num is (%zu).", model_partition_table.num, - partitionDatas.size()); + !partitionDatas.empty() && model_partition_table.num != 0 && model_partition_table.num == partitionDatas.size(), + FAILED, "Invalid param:partition data size is (%u), model_partition_table.num is (%zu).", + model_partition_table.num, partitionDatas.size()); uint32_t model_header_size = sizeof(ModelFileHeader); uint32_t table_size = static_cast(SIZE_OF_MODEL_PARTITION_TABLE(model_partition_table)); uint32_t total_size = model_header_size + table_size; diff --git a/ge/common/auth/file_saver.h b/ge/common/auth/file_saver.h index d415746d..79e2126e 100644 --- a/ge/common/auth/file_saver.h +++ b/ge/common/auth/file_saver.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -74,8 +74,10 @@ class FileSaver { ModelPartitionTable &model_partition_table, const std::vector &partition_datas); - static Status SaveToBuffWithFileHeader(const ModelFileHeader &file_header, ModelPartitionTable &model_partition_table, - const std::vector &partitionDatas, ge::ModelBufferData &model); + static Status SaveToBuffWithFileHeader(const ModelFileHeader &file_header, + ModelPartitionTable &model_partition_table, + const std::vector &partitionDatas, + ge::ModelBufferData& model); static Status SaveToFile(const string &file_path, const void *data, int len); diff --git a/ge/common/base64.h b/ge/common/base64.h index 26819c88..fb6c1870 100644 --- a/ge/common/base64.h +++ b/ge/common/base64.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,23 +25,24 @@ namespace ge { namespace { -const char *kBase64Chars = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; +const char* kBase64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; const char kEqualSymbol = '='; const size_t kBase64CharsNum = 64; const size_t kThreeByteOneGroup = 3; const size_t kFourByteOneGroup = 4; -} // namespace +} namespace base64 { -static inline bool IsBase64Char(const char &c) { return (isalnum(c) || (c == '+') || (c == '/')); } +static inline bool IsBase64Char(const char &c) { + return (isalnum(c) || (c == '+') || (c == '/')); +} static std::string EncodeToBase64(const std::string &raw_data) { size_t encode_length = raw_data.size() / kThreeByteOneGroup * kFourByteOneGroup; encode_length += raw_data.size() % kThreeByteOneGroup == 0 ? 0 : kFourByteOneGroup; - size_t raw_data_index = 0; + size_t raw_data_index = 0 ; size_t encode_data_index = 0; std::string encode_data; encode_data.resize(encode_length); @@ -79,7 +80,8 @@ static std::string EncodeToBase64(const std::string &raw_data) { #pragma GCC diagnostic ignored "-Wunused-function" static Status DecodeFromBase64(const std::string &base64_data, std::string &decode_data) { if (base64_data.size() % kFourByteOneGroup != 0) { - GELOGE(PARAM_INVALID, "base64 data size must can be divided by 4, but given data size is %zu", base64_data.size()); + GELOGE(PARAM_INVALID, "base64 data size must can be divided by 4, but given data size is %zu", + base64_data.size()); return PARAM_INVALID; } decode_data.clear(); @@ -92,7 +94,8 @@ static Status DecodeFromBase64(const std::string &base64_data, std::string &deco for (std::size_t input_data_index = 0; input_data_index < base64_data_len; input_data_index += 4) { for (size_t i = 0; i < kFourByteOneGroup; ++i) { - if (base64_data[input_data_index + i] == kEqualSymbol && input_data_index >= base64_data_len - 4 && i > 1) { + if (base64_data[input_data_index + i] == kEqualSymbol && + input_data_index >= base64_data_len - 4 && i > 1) { byte_4[i] = kBase64CharsNum; } else if (IsBase64Char(base64_data[input_data_index + i])) { byte_4[i] = FindCharInBase64Chars(base64_data[input_data_index + i]); @@ -102,18 +105,18 @@ static Status DecodeFromBase64(const std::string &base64_data, std::string &deco } } decode_data += static_cast((byte_4[0] << 2u) + ((byte_4[1] & 0x30) >> 4u)); - if (byte_4[2] >= kBase64CharsNum) { + if (byte_4[2] >= kBase64CharsNum){ break; } else if (byte_4[3] >= kBase64CharsNum) { - decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); + decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); break; } - decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); - decode_data += static_cast(((byte_4[2] & 0x03) << 6u) + byte_4[3]); + decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); + decode_data += static_cast(((byte_4[2] & 0x03) << 6u) + byte_4[3]); } return SUCCESS; } #pragma GCC diagnostic pop -} // namespace base64 +} } // namespace ge #endif // GE_COMMON_BASE64_H_ \ No newline at end of file diff --git a/ge/common/context/ctx.cc b/ge/common/context/ctx.cc old mode 100644 new mode 100755 index f6ae364d..9fe2f8c7 --- a/ge/common/context/ctx.cc +++ b/ge/common/context/ctx.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/convert/pb2json.cc b/ge/common/convert/pb2json.cc deleted file mode 100644 index 0a5d24ee..00000000 --- a/ge/common/convert/pb2json.cc +++ /dev/null @@ -1,248 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// File: pb2json.h -// Description: This imply file for protobuf message and json interconversion - -#include "common/convert/pb2json.h" -#include -#include -#include "securec.h" -#include "framework/common/fmk_types.h" -#include "framework/common/debug/ge_log.h" - -using std::set; -using std::string; - -namespace ge { -namespace { -const int kSignificantDigits = 10; -} -// JSON parses non utf8 character throwing exceptions, so some fields need to be shielded through black fields -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void Pb2Json::Message2Json(const ProtobufMsg &message, - const set &black_fields, Json &json, - bool enum2str) { - auto descriptor = message.GetDescriptor(); - auto reflection = message.GetReflection(); - if (descriptor == nullptr || reflection == nullptr) { - return; - } - - auto count = descriptor->field_count(); - - for (auto i = 0; i < count; ++i) { - const auto field = descriptor->field(i); - if (field == nullptr) { - return; - } - - // Do not display weight data - if (black_fields.find(field->name()) != black_fields.end()) { - continue; - } - - if (field->is_repeated()) { - if (reflection->FieldSize(message, field) > 0) { - RepeatedMessage2Json(message, field, reflection, black_fields, json[field->name()], enum2str); - } - continue; - } - - if (!reflection->HasField(message, field)) { - continue; - } - - OneField2Json(message, field, reflection, black_fields, json, enum2str); - } -} - -void Pb2Json::OneField2Json(const ProtobufMsg &message, const ProtobufFieldDescriptor *field, - const ProtobufReflection *reflection, const set &black_fields, Json &json, - bool enum2str) { - switch (field->type()) { - case ProtobufFieldDescriptor::TYPE_MESSAGE: { - const ProtobufMsg &tmp_message = reflection->GetMessage(message, field); - if (0 != tmp_message.ByteSize()) { - Message2Json(tmp_message, black_fields, json[field->name()], enum2str); - } - break; - } - - case ProtobufFieldDescriptor::TYPE_BOOL: - json[field->name()] = reflection->GetBool(message, field); - break; - - case ProtobufFieldDescriptor::TYPE_ENUM: { - auto *enum_value_desc = reflection->GetEnum(message, field); - Enum2Json(enum_value_desc, field, enum2str, json); - break; - } - - case ProtobufFieldDescriptor::TYPE_INT32: - case ProtobufFieldDescriptor::TYPE_SINT32: - case ProtobufFieldDescriptor::TYPE_SFIXED32: - json[field->name()] = reflection->GetInt32(message, field); - break; - - case ProtobufFieldDescriptor::TYPE_UINT32: - case ProtobufFieldDescriptor::TYPE_FIXED32: - json[field->name()] = reflection->GetUInt32(message, field); - break; - - case ProtobufFieldDescriptor::TYPE_INT64: - case ProtobufFieldDescriptor::TYPE_SINT64: - case ProtobufFieldDescriptor::TYPE_SFIXED64: - json[field->name()] = reflection->GetInt64(message, field); - break; - - case ProtobufFieldDescriptor::TYPE_UINT64: - case ProtobufFieldDescriptor::TYPE_FIXED64: - json[field->name()] = reflection->GetUInt64(message, field); - break; - - case ProtobufFieldDescriptor::TYPE_FLOAT: - char str[kSignificantDigits]; - if (sprintf_s(str, kSignificantDigits, "%g", reflection->GetFloat(message, field)) != -1) { - json[field->name()] = str; - } else { - json[field->name()] = reflection->GetFloat(message, field); - } - - break; - - case ProtobufFieldDescriptor::TYPE_STRING: - json[field->name()] = reflection->GetString(message, field); - break; - - case ProtobufFieldDescriptor::TYPE_BYTES: { - string field_name = field->name(); - string type_bytes = reflection->GetString(message, field); - json[field_name] = TypeBytes2String(field_name, type_bytes); - break; - } - - default: - break; - } -} - -string Pb2Json::TypeBytes2String(string &field_name, string &type_bytes) { - if (field_name != "offset") { - return type_bytes; - } - string result = ""; - for (char temp_value : type_bytes) { - uint8_t *value = 0; - value = reinterpret_cast(&temp_value); - char str[kSignificantDigits]; - if (sprintf_s(str, kSignificantDigits, "%d", *value) == -1) { - GELOGW("Convert bytes to string fail, filed name:%s", field_name.c_str()); - continue; - } - result += str; - } - return result; -} - -void Pb2Json::RepeatedMessage2Json(const ProtobufMsg &message, const ProtobufFieldDescriptor *field, - const ProtobufReflection *reflection, const set &black_fields, Json &json, - bool enum2str) { - if ((field == nullptr) || (reflection == nullptr)) { - Message2Json(message, black_fields, json, enum2str); - return; - } - - for (auto i = 0; i < reflection->FieldSize(message, field); ++i) { - Json tmp_json; - switch (field->type()) { - case ProtobufFieldDescriptor::TYPE_MESSAGE: { - const ProtobufMsg &tmp_message = reflection->GetRepeatedMessage(message, field, i); - if (0 != tmp_message.ByteSize()) { - Message2Json(tmp_message, black_fields, tmp_json, enum2str); - } - } break; - - case ProtobufFieldDescriptor::TYPE_BOOL: - tmp_json = reflection->GetRepeatedBool(message, field, i); - break; - - case ProtobufFieldDescriptor::TYPE_ENUM: { - auto *enum_value_desc = reflection->GetRepeatedEnum(message, field, i); - RepeatedEnum2Json(enum_value_desc, enum2str, tmp_json); - } break; - - case ProtobufFieldDescriptor::TYPE_INT32: - case ProtobufFieldDescriptor::TYPE_SINT32: - case ProtobufFieldDescriptor::TYPE_SFIXED32: - tmp_json = reflection->GetRepeatedInt32(message, field, i); - break; - - case ProtobufFieldDescriptor::TYPE_UINT32: - case ProtobufFieldDescriptor::TYPE_FIXED32: - tmp_json = reflection->GetRepeatedUInt32(message, field, i); - break; - - case ProtobufFieldDescriptor::TYPE_INT64: - case ProtobufFieldDescriptor::TYPE_SINT64: - case ProtobufFieldDescriptor::TYPE_SFIXED64: - tmp_json = reflection->GetRepeatedInt64(message, field, i); - break; - - case ProtobufFieldDescriptor::TYPE_UINT64: - case ProtobufFieldDescriptor::TYPE_FIXED64: - tmp_json = reflection->GetRepeatedUInt64(message, field, i); - break; - - case ProtobufFieldDescriptor::TYPE_FLOAT: - tmp_json = reflection->GetRepeatedFloat(message, field, i); - break; - - case ProtobufFieldDescriptor::TYPE_STRING: - case ProtobufFieldDescriptor::TYPE_BYTES: - tmp_json = reflection->GetRepeatedString(message, field, i); - break; - - default: - break; - } - json += tmp_json; - } -} - -void Pb2Json::Enum2Json(const ProtobufEnumValueDescriptor *enum_value_desc, const ProtobufFieldDescriptor *field, - bool enum2str, Json &json) { - if (enum_value_desc != nullptr) { - if (field == nullptr) { - return; - } - if (enum2str) { - json[field->name()] = enum_value_desc->name(); - } else { - json[field->name()] = enum_value_desc->number(); - } - } -} - -void Pb2Json::RepeatedEnum2Json(const ProtobufEnumValueDescriptor *enum_value_desc, bool enum2str, Json &json) { - if (enum_value_desc != nullptr) { - if (enum2str) { - json = enum_value_desc->name(); - } else { - json = enum_value_desc->number(); - } - } -} -} // namespace ge diff --git a/ge/common/convert/pb2json.h b/ge/common/convert/pb2json.h deleted file mode 100644 index 88ded50e..00000000 --- a/ge/common/convert/pb2json.h +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// File: pb2json.h -// Description: This header file for protobuf message and json interconversion - -#ifndef GE_COMMON_CONVERT_PB2JSON_H_ -#define GE_COMMON_CONVERT_PB2JSON_H_ -#include -#include -#include -#include -#include "google/protobuf/descriptor.h" -#include "google/protobuf/message.h" -#include "nlohmann/json.hpp" - -namespace ge { -using Json = nlohmann::json; -using ProtobufMsg = ::google::protobuf::Message; -using ProtobufReflection = ::google::protobuf::Reflection; -using ProtobufFieldDescriptor = ::google::protobuf::FieldDescriptor; -using ProtobufDescriptor = ::google::protobuf::Descriptor; -using ProtobufEnumValueDescriptor = ::google::protobuf::EnumValueDescriptor; - -class Pb2Json { - public: - /** - * @ingroup domi_omg - * @brief Transfer protobuf object to JSON object - * @param [out] json Converted JSON object - * @return void success - * @author - */ - static void Message2Json(const ProtobufMsg &message, const std::set &black_fields, Json &json, - bool enum2str = false); - - protected: - static void RepeatedMessage2Json(const ProtobufMsg &message, const ProtobufFieldDescriptor *field, - const ProtobufReflection *reflection, const std::set &black_fields, - Json &json, bool enum2str); - - static void Enum2Json(const ProtobufEnumValueDescriptor *enum_value_desc, const ProtobufFieldDescriptor *field, - bool enum2str, Json &json); - - static void RepeatedEnum2Json(const ProtobufEnumValueDescriptor *enum_value_desc, bool enum2str, Json &json); - - static void OneField2Json(const ProtobufMsg &message, const ProtobufFieldDescriptor *field, - const ProtobufReflection *reflection, const std::set &black_fields, Json &json, - bool enum2str); - - static std::string TypeBytes2String(std::string &field_name, std::string &type_bytes); -}; -} // namespace ge - -#endif // GE_COMMON_CONVERT_PB2JSON_H_ diff --git a/ge/common/cust_aicpu_kernel_store.cc b/ge/common/cust_aicpu_kernel_store.cc old mode 100644 new mode 100755 index 46eb484b..86881b0e --- a/ge/common/cust_aicpu_kernel_store.cc +++ b/ge/common/cust_aicpu_kernel_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,9 @@ namespace ge { CustAICPUKernelStore::CustAICPUKernelStore() {} -void CustAICPUKernelStore::AddCustAICPUKernel(const CustAICPUKernelPtr &kernel) { AddKernel(kernel); } +void CustAICPUKernelStore::AddCustAICPUKernel(const CustAICPUKernelPtr &kernel) { + AddKernel(kernel); +} void CustAICPUKernelStore::LoadCustAICPUKernelBinToOpDesc(const std::shared_ptr &op_desc) const { GELOGI("LoadCustAICPUKernelBinToOpDesc in"); diff --git a/ge/common/cust_aicpu_kernel_store.h b/ge/common/cust_aicpu_kernel_store.h old mode 100644 new mode 100755 index 6dff0435..033a636b --- a/ge/common/cust_aicpu_kernel_store.h +++ b/ge/common/cust_aicpu_kernel_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/debug/memory_dumper.cc b/ge/common/debug/memory_dumper.cc index 1a7d9db8..d2b8d674 100644 --- a/ge/common/debug/memory_dumper.cc +++ b/ge/common/debug/memory_dumper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/debug/memory_dumper.h b/ge/common/debug/memory_dumper.h old mode 100644 new mode 100755 index 4995f5f7..a71f86f4 --- a/ge/common/debug/memory_dumper.h +++ b/ge/common/debug/memory_dumper.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_manager.cc b/ge/common/dump/dump_manager.cc index fbf9afe7..17019c5a 100644 --- a/ge/common/dump/dump_manager.cc +++ b/ge/common/dump/dump_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_manager.h b/ge/common/dump/dump_manager.h index dbc89cc8..53a643f9 100644 --- a/ge/common/dump/dump_manager.h +++ b/ge/common/dump/dump_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc old mode 100644 new mode 100755 index 31a88023..ca2dec98 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -252,4 +252,4 @@ Status DumpOp::LaunchDumpOp() { } return SUCCESS; } -} // namespace ge +} // namesapce ge diff --git a/ge/common/dump/dump_op.h b/ge/common/dump/dump_op.h old mode 100644 new mode 100755 index b3042245..d59962e6 --- a/ge/common/dump/dump_op.h +++ b/ge/common/dump/dump_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_properties.cc b/ge/common/dump/dump_properties.cc index b6247c6e..a4540367 100644 --- a/ge/common/dump/dump_properties.cc +++ b/ge/common/dump/dump_properties.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,14 +35,14 @@ const std::string kDumpStatusOpen = "on"; const uint32_t kAicoreOverflow = (0x1 << 0); const uint32_t kAtomicOverflow = (0x1 << 1); const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); -} // namespace +} namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { CopyFrom(other); } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=( - const DumpProperties &other) { + const DumpProperties &other) { CopyFrom(other); return *this; } @@ -97,7 +97,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti // The following is the new dump scenario of the fusion operator FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue( - const std::string &model, const std::set &layers) { + const std::string &model, const std::set &layers) { for (const std::string &layer : layers) { GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str()); } @@ -136,7 +136,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set DumpPrope } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set DumpProperties::GetPropertyValue( - const std::string &model) const { + const std::string &model) const { auto iter = model_dump_properties_map_.find(model); if (iter != model_dump_properties_map_.end()) { return iter->second; @@ -145,7 +145,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set DumpPrope } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump( - const std::string &model, const std::string &om_name, const std::string &op_name) const { + const std::string &model, const std::string &om_name, const std::string &op_name) const { // if dump all if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { return true; @@ -201,7 +201,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( - const std::string &dump_op_switch) { + const std::string dump_op_switch) { dump_op_switch_ = dump_op_switch; } @@ -266,4 +266,4 @@ void DumpProperties::SetDumpDebugOptions() { GELOGI("ge.exec.enableDumpDebug is false or is not set."); } } -} // namespace ge +} // namespace diff --git a/ge/common/dump/dump_properties.h b/ge/common/dump/dump_properties.h index 7909d5a5..682d2d08 100644 --- a/ge/common/dump/dump_properties.h +++ b/ge/common/dump/dump_properties.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,7 +65,7 @@ class DumpProperties { const std::string &GetDumpStatus() const; - void SetDumpOpSwitch(const std::string &dump_op_switch); + void SetDumpOpSwitch(const std::string dump_op_switch); const std::string &GetDumpOpSwitch() const; @@ -77,9 +77,9 @@ class DumpProperties { uint32_t GetOpDebugMode() const { return op_debug_mode_; } - const std::string &GetEnableDump() const { return enable_dump_; } + const std::string &GetEnableDump() const {return enable_dump_;} - const std::string &GetEnableDumpDebug() const { return enable_dump_debug_; } + const std::string &GetEnableDumpDebug() const {return enable_dump_debug_;} private: void CopyFrom(const DumpProperties &other); @@ -99,6 +99,6 @@ class DumpProperties { bool is_op_debug_ = false; uint32_t op_debug_mode_ = 0; }; -} // namespace ge +} -#endif // GE_COMMON_DUMP_DUMP_PROPERTIES_H_ \ No newline at end of file +#endif //GE_COMMON_DUMP_DUMP_PROPERTIES_H_ \ No newline at end of file diff --git a/ge/common/dump/dump_server.cc b/ge/common/dump/dump_server.cc new file mode 100644 index 00000000..a3dc5804 --- /dev/null +++ b/ge/common/dump/dump_server.cc @@ -0,0 +1,21 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "adx_datadump_server.h" + +int AdxDataDumpServerUnInit() { return 0; } + +int AdxDataDumpServerInit() { return 0; } diff --git a/ge/common/fmk_error_codes.cc b/ge/common/fmk_error_codes.cc old mode 100644 new mode 100755 index 3ad8503a..ddb8089d --- a/ge/common/fmk_error_codes.cc +++ b/ge/common/fmk_error_codes.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/datatype_transfer.cc b/ge/common/formats/format_transfers/datatype_transfer.cc index a603b2f4..725eed98 100644 --- a/ge/common/formats/format_transfers/datatype_transfer.cc +++ b/ge/common/formats/format_transfers/datatype_transfer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,8 +89,8 @@ Status TransDataSrc2Fp16(const CastArgs &args, uint8_t *dst, const size_t data_s } Status CastKernel(const CastArgs &args, uint8_t *dst, const size_t data_size, const DataTypeTransMode trans_mode) { - static std::map> transfer_handle = - { + static std::map> + transfer_handle = { {kTransferWithDatatypeFloatToFloat16, TransDataSrc2Fp16}, {kTransferWithDatatypeFloatToInt32, TransDataSrc2Dst}, {kTransferWithDatatypeFloat16ToFloat, TransDataSrc2Dst}, @@ -107,7 +107,7 @@ Status CastKernel(const CastArgs &args, uint8_t *dst, const size_t data_size, co {kTransferWithDatatypeInt32ToInt64, TransDataSrc2Dst}, {kTransferWithDatatypeInt32ToDouble, TransDataSrc2Dst}, {kTransferWithDatatypeDoubleToInt32, TransDataSrc2Dst}, - }; + }; auto it = transfer_handle.find(trans_mode); if (it == transfer_handle.end()) { return UNSUPPORTED; diff --git a/ge/common/formats/format_transfers/datatype_transfer.h b/ge/common/formats/format_transfers/datatype_transfer.h old mode 100644 new mode 100755 index 4d93fd6c..22313e90 --- a/ge/common/formats/format_transfers/datatype_transfer.h +++ b/ge/common/formats/format_transfers/datatype_transfer.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc index 40dc749d..12d13e44 100644 --- a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc +++ b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -105,8 +105,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size auto dst_offset = dst_idx * size; // The memcpy_s/memset_s argument `dstMax` must be less than 2G auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h index d2156018..8ff704eb 100644 --- a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h +++ b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc index 76d8696a..4060a3b2 100644 --- a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc +++ b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h" #include diff --git a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h old mode 100644 new mode 100755 index 41581dec..6a31a746 --- a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h +++ b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWCN_FRACTAL_Z_3D_H_ #define GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWCN_FRACTAL_Z_3D_H_ diff --git a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc index 9de2e3a0..457469c7 100644 --- a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h" #include diff --git a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h old mode 100644 new mode 100755 index 1c4986b8..728cfbdc --- a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h +++ b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWNC_FRACTAL_Z_3D_TRANSPOSE_H_ #define GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWNC_FRACTAL_Z_3D_TRANSPOSE_H_ diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc old mode 100644 new mode 100755 index 65798f29..cb4de6b5 --- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -154,9 +154,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con for (int64_t w1_idx = 0; w1_idx < num_w1; w1_idx++) { auto dst_offset = (h1h0_head + w1_idx * h1h0w0) * size; auto src_offset = (src_h_head + w1_idx * w0) * size; - auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? + dst_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -169,9 +168,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con auto src_w_idx = w1_head + w0_idx; auto dst_offset = (h1h0_head + num_w1 * h1h0w0 + w0_idx) * size; auto src_offset = (src_h_head + src_w_idx) * size; - auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? + dst_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -227,9 +225,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con for (int64_t w1_idx = 0; w1_idx < num_w1; w1_idx++) { auto src_offset = (h1h0_head + w1_idx * h1h0w0) * size; auto dst_offset = (dst_h_head + w1_idx * w0) * size; - auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? + dst_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -242,9 +239,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con auto dst_w_idx = w1_head + w0_idx; auto src_offset = (h1h0_head + num_w1 * h1h0w0 + w0_idx) * size; auto dst_offset = (dst_h_head + dst_w_idx) * size; - auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? + dst_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.h b/ge/common/formats/format_transfers/format_transfer_fractal_nz.h old mode 100644 new mode 100755 index 49e82884..68abdbc8 --- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.h +++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc index f2ec29da..0e941486 100644 --- a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -124,11 +124,11 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { std::shared_ptr dst(new (std::nothrow) uint8_t[dst_size], std::default_delete()); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - dst == nullptr, - GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); - return OUT_OF_MEMORY;); + dst == nullptr, + GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", + TypeUtils::FormatToSerialString(args.src_format).c_str(), + TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); + return OUT_OF_MEMORY;); for (int64_t vfi = 0; vfi < vf_cnt; vfi++) { // vertical fractal matrix base index @@ -152,8 +152,8 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { auto idx = gfi * fractal_ele_cnt + col * c0 + row; auto offset = idx * size; auto protected_size = dst_size - offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - offset + : static_cast(SECUREC_MEM_MAX_LEN); errno_t ret = EOK; if (need_pad_zero) { ret = memset_s(dst.get() + offset, static_cast(protected_size), 0, static_cast(size)); @@ -209,11 +209,11 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { std::shared_ptr dst(new (std::nothrow) uint8_t[dst_size], std::default_delete()); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - dst == nullptr, - GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); - return OUT_OF_MEMORY;); + dst == nullptr, + GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", + TypeUtils::FormatToSerialString(args.src_format).c_str(), + TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); + return OUT_OF_MEMORY;); for (int64_t c1i = 0; c1i < c1; c1i++) { for (int64_t hi = 0; hi < h; hi++) { @@ -223,8 +223,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { int64_t dst_idx = c1i * hwn1n0c0 + hi * wn1n0c0 + wi * n1n0c0 + n1n0i * c0 + c0i; int64_t dst_offset = dst_idx * data_size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto pad_zero = ((c1i * c0 + c0i) >= c) || (n1n0i >= n); errno_t ret = EOK; if (pad_zero) { @@ -284,11 +284,11 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { std::shared_ptr dst(new (std::nothrow) uint8_t[dst_size], std::default_delete()); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - dst == nullptr, - GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); - return OUT_OF_MEMORY;); + dst == nullptr, + GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", + TypeUtils::FormatToSerialString(args.src_format).c_str(), + TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); + return OUT_OF_MEMORY;); for (int64_t c1i = 0; c1i < c1; c1i++) { for (int64_t hi = 0; hi < h; hi++) { @@ -298,8 +298,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { int64_t dst_idx = c1i * hwn1n0c0 + hi * wn1n0c0 + wi * n1n0c0 + n1n0i * c0 + c0i; int64_t dst_offset = dst_idx * data_size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto pad_zero = ((c1i * c0 + c0i) >= c) || (n1n0i >= n); errno_t ret = EOK; if (pad_zero) { diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_z.h b/ge/common/formats/format_transfers/format_transfer_fractal_z.h old mode 100644 new mode 100755 index 5ae83303..d640eb60 --- a/ge/common/formats/format_transfers/format_transfer_fractal_z.h +++ b/ge/common/formats/format_transfers/format_transfer_fractal_z.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc old mode 100644 new mode 100755 index d5507765..009bce2b --- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -158,8 +158,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con auto src_offset = (src_h_head + w1_idx * w0) * size; auto dst_offset = (h0_head + w1_idx * h0w0) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -174,8 +174,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con auto src_offset = (src_h_head + src_w_idx) * size; auto dst_offset = (w0_head + w0_idx) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -236,8 +236,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con auto src_offset = (h0_head + w1_idx * h0w0) * size; auto dst_offset = (dst_h_head + w1_idx * w0) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -252,8 +252,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con auto dst_w_idx = w1_head + w0_idx; auto dst_offset = (dst_h_head + dst_w_idx) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_zz.h b/ge/common/formats/format_transfers/format_transfer_fractal_zz.h old mode 100644 new mode 100755 index 93f40920..c1898e5b --- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.h +++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc old mode 100644 new mode 100755 index b0eebcfa..2076f6f9 --- a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc +++ b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,9 +104,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in int64_t src_idx = c1_idx * hwncc0 + h_idx * wncc0 + w_idx * ncc0 + nc_idx * c0 + c0_idx; auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; - auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? + total_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -140,7 +139,7 @@ Status FormatTransferFracZHwcn::TransFormat(const TransArgs &args, TransResult & } GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, - ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); + ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); return PARAM_INVALID; } GELOGD("Begin to trans format from FracZ to HWCN, src shape %s, data type %s, dst shape %s, memory size %ld", diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h index a7efbfcb..4cc393d3 100644 --- a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h +++ b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc old mode 100644 new mode 100755 index 9f8d9e39..042559ca --- a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,9 +104,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in int64_t src_idx = c1_idx * hwncc0 + h_idx * wncc0 + w_idx * ncc0 + nc_idx * c0 + c0_idx; auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; - auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? + total_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -140,7 +139,7 @@ Status FormatTransferFracZNchw::TransFormat(const TransArgs &args, TransResult & } GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, - ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); + ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); return PARAM_INVALID; } GELOGD("Begin to trans format from FracZ to NCHW, src shape %s, data type %s, dst shape %s, memory size %ld", diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h old mode 100644 new mode 100755 index af2cedd0..9b22a7e0 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc old mode 100644 new mode 100755 index 9a1e5f3b..98bd1807 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,9 +104,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size int64_t src_idx = c1_idx * hwncc0 + h_idx * wncc0 + w_idx * ncc0 + nc_idx * c0 + c0_idx; auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; - auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? + total_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -139,7 +138,7 @@ Status FormatTransferFracZNhwc::TransFormat(const TransArgs &args, TransResult & } GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, - ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); + ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); return PARAM_INVALID; } GELOGD("Begin to trans format from FracZ to NHWC, src shape %s, data type %s, dst shape %s, memory size %ld", diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h old mode 100644 new mode 100755 index 41654304..efeb2506 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc old mode 100644 new mode 100755 index 7101256a..d2f8cf30 --- a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -122,8 +122,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in int64_t dst_idx = c0_idx + co_head_addr; auto dst_offset = dst_idx * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); int64_t c_idx = c0_idx + c1_idx * c0; int64_t src_idx = h_idx * wcn + w_idx * cn + c_idx * n + n_idx; auto src_offset = src_idx * size; @@ -141,7 +141,7 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in } } else { auto ret = - memset_s(dst.get() + dst_offset, static_cast(protected_size), 0, static_cast(size)); + memset_s(dst.get() + dst_offset, static_cast(protected_size), 0, static_cast(size)); if (ret != EOK) { GELOGE(INTERNAL_ERROR, "Failed to set to 0 to C1HWNCoC0[%ld, %ld, %ld, %ld, %ld, %ld] offset %ld, " diff --git a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h index 81d7358e..079cb449 100644 --- a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h +++ b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc old mode 100644 new mode 100755 index 57ab1266..31744d86 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,8 +102,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h old mode 100644 new mode 100755 index 6d599933..453c843e --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc old mode 100644 new mode 100755 index e68e54de..ee3f9917 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,8 +102,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h old mode 100644 new mode 100755 index 8ff60bb1..8b456019 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc index 2039da47..6f065fc5 100644 --- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h old mode 100644 new mode 100755 index 4a0fce95..d9a3490c --- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h +++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc old mode 100644 new mode 100755 index b4e92cbc..ebc15da7 --- a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -115,8 +115,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in int64_t dst_index = c0_idx + w_head_addr; int64_t dst_offset = dst_index * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); int64_t cIdx = c0_idx + c1_idx * c0; int64_t srcIdx = n_idx * chw + cIdx * hw + h_idx * w + w_idx; auto src_offset = srcIdx * size; @@ -133,7 +133,7 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in } } else { auto ret = - memset_s(dst.get() + dst_offset, static_cast(protected_size), 0, static_cast(size)); + memset_s(dst.get() + dst_offset, static_cast(protected_size), 0, static_cast(size)); if (ret != EOK) { GELOGE(INTERNAL_ERROR, "Failed to set to 0 to " @@ -173,10 +173,10 @@ Status FormatTransferNchwNc1hwc0::TransFormat(const TransArgs &args, TransResult return PARAM_INVALID; } GELOGD( - "Begin to trans format from NCHW to NC1HWC0, src shape %s, data type " - "%s, dst shape %s memory size %ld", - ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), - ShapeToString(args.dst_shape).c_str(), total_size); + "Begin to trans format from NCHW to NC1HWC0, src shape %s, data type " + "%s, dst shape %s memory size %ld", + ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), + ShapeToString(args.dst_shape).c_str(), total_size); if (GetDstDataAfterTrans(args, result, size, total_size) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h index c6269579..dd31574d 100644 --- a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h +++ b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc old mode 100644 new mode 100755 index a5be94ff..3ae7a924 --- a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -119,8 +119,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in int64_t dst_idx = c0_idx + w_head_addr; int64_t dst_offset = dst_idx * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); int64_t c_idx = c0_idx + c1_idx * c0; int64_t src_idx = n_idx * hwc + h_idx * wc + w_idx * c + c_idx; auto src_offset = src_idx * size; diff --git a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h old mode 100644 new mode 100755 index fb190f54..47c0d50e --- a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h +++ b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.cc b/ge/common/formats/format_transfers/format_transfer_transpose.cc old mode 100644 new mode 100755 index 3be4d67d..19f54040 --- a/ge/common/formats/format_transfers/format_transfer_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_transpose.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,22 +28,22 @@ namespace ge { namespace formats { namespace { std::map>> perm_args{ - {FORMAT_NCHW, - {{FORMAT_NHWC, std::vector({0, 2, 3, 1})}, - {FORMAT_HWCN, std::vector({2, 3, 1, 0})}, - {FORMAT_CHWN, std::vector({1, 2, 3, 0})}}}, - {FORMAT_NHWC, - {{FORMAT_NCHW, std::vector({0, 3, 1, 2})}, - {FORMAT_CHWN, std::vector({3, 1, 2, 0})}, - {FORMAT_HWCN, std::vector({1, 2, 3, 0})}}}, - {FORMAT_HWCN, - {{FORMAT_NCHW, std::vector({3, 2, 0, 1})}, - {FORMAT_NHWC, std::vector({3, 0, 1, 2})}, - {FORMAT_CHWN, std::vector({2, 0, 1, 3})}}}, - {FORMAT_CHWN, - {{FORMAT_NCHW, std::vector({3, 0, 1, 2})}, - {FORMAT_NHWC, std::vector({3, 1, 2, 0})}, - {FORMAT_HWCN, std::vector({1, 2, 0, 3})}}}, + {FORMAT_NCHW, + {{FORMAT_NHWC, std::vector({0, 2, 3, 1})}, + {FORMAT_HWCN, std::vector({2, 3, 1, 0})}, + {FORMAT_CHWN, std::vector({1, 2, 3, 0})}}}, + {FORMAT_NHWC, + {{FORMAT_NCHW, std::vector({0, 3, 1, 2})}, + {FORMAT_CHWN, std::vector({3, 1, 2, 0})}, + {FORMAT_HWCN, std::vector({1, 2, 3, 0})}}}, + {FORMAT_HWCN, + {{FORMAT_NCHW, std::vector({3, 2, 0, 1})}, + {FORMAT_NHWC, std::vector({3, 0, 1, 2})}, + {FORMAT_CHWN, std::vector({2, 0, 1, 3})}}}, + {FORMAT_CHWN, + {{FORMAT_NCHW, std::vector({3, 0, 1, 2})}, + {FORMAT_NHWC, std::vector({3, 1, 2, 0})}, + {FORMAT_HWCN, std::vector({1, 2, 0, 3})}}}, }; bool IsShapeArgValid(const std::vector &src_shape, const std::vector &perm_arg) { @@ -163,8 +163,8 @@ Status Transpose(const uint8_t *src, const std::vector &src_shape, Data auto src_offset = GenOffset(src_heads, dst_indexes) * data_size; auto dst_offset_bytes = dst_index * data_size; auto protected_size = dst_size - dst_offset_bytes < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset_bytes - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset_bytes + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset_bytes, static_cast(protected_size), src + src_offset, static_cast(data_size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.h b/ge/common/formats/format_transfers/format_transfer_transpose.h old mode 100644 new mode 100755 index 0e84ef8c..7fa19ff0 --- a/ge/common/formats/format_transfers/format_transfer_transpose.h +++ b/ge/common/formats/format_transfers/format_transfer_transpose.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ Status TransposeWithShapeCheck(const uint8_t *src, const std::vector &s Status GetPermByForamt(Format src_format, Format dst_format, std::vector &perm); + class FormatTransferTranspose : public FormatTransfer { public: Status TransFormat(const TransArgs &args, TransResult &result) override; diff --git a/ge/common/formats/formats.cc b/ge/common/formats/formats.cc old mode 100644 new mode 100755 index d01d055b..697e16ad --- a/ge/common/formats/formats.cc +++ b/ge/common/formats/formats.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,7 +51,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransFormat(const TransArg GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransShape(Format src_format, const std::vector &src_shape, - DataType data_type, Format dst_format, + DataType data_type, + Format dst_format, std::vector &dst_shape) { formats::TransArgs args; args.src_format = src_format; diff --git a/ge/common/formats/formats.h b/ge/common/formats/formats.h index b58c67aa..52ae84ad 100644 --- a/ge/common/formats/formats.h +++ b/ge/common/formats/formats.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,8 +36,8 @@ namespace formats { */ Status TransFormat(const TransArgs &args, TransResult &result); -Status TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, - std::vector &dst_shape); +Status TransShape(Format src_format, const std::vector &src_shape, DataType data_type, + Format dst_format, std::vector &dst_shape); Status TransDataType(const CastArgs &args, TransResult &result); diff --git a/ge/common/formats/utils/formats_definitions.h b/ge/common/formats/utils/formats_definitions.h old mode 100644 new mode 100755 index 2faa60e1..7f873f1b --- a/ge/common/formats/utils/formats_definitions.h +++ b/ge/common/formats/utils/formats_definitions.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,13 +23,38 @@ static const int kCubeSize = 16; static const int kNiSize = 16; static const int64_t kShapeItemNumMAX = 1024UL * 1024UL * 1024UL * 1024UL; -enum NchwDimIndex { kNchwN, kNchwC, kNchwH, kNchwW, kNchwDimsNum }; +enum NchwDimIndex { + kNchwN, + kNchwC, + kNchwH, + kNchwW, + kNchwDimsNum +}; -enum NhwcDimIndex { kNhwcN, kNhwcH, kNhwcW, kNhwcC, kNhwcDimsNum }; +enum NhwcDimIndex { + kNhwcN, + kNhwcH, + kNhwcW, + kNhwcC, + kNhwcDimsNum +}; -enum HwcnDimIndex { kHwcnH, kHwcnW, kHwcnC, kHwcnN, kHwcnDimsNum }; +enum HwcnDimIndex { + kHwcnH, + kHwcnW, + kHwcnC, + kHwcnN, + kHwcnDimsNum +}; -enum Nc1hwc0DimIndex { kNc1hwc0N, kNc1hwc0C1, kNc1hwc0H, kNc1hwc0W, kNc1hwc0C0, kNc1hwc0DimsNum }; +enum Nc1hwc0DimIndex { + kNc1hwc0N, + kNc1hwc0C1, + kNc1hwc0H, + kNc1hwc0W, + kNc1hwc0C0, + kNc1hwc0DimsNum +}; enum C1hwncoc0DimIndex { kC1hwncoc0C1, @@ -41,11 +66,31 @@ enum C1hwncoc0DimIndex { kC1hwncoc0DimsNum }; -enum FracZDimIndex { kFracZHWC1, kFracZN0, kFracZNi, kFracZC0, kFracZDimsNum }; +enum FracZDimIndex { + kFracZHWC1, + kFracZN0, + kFracZNi, + kFracZC0, + kFracZDimsNum +}; -enum DhwcnDimIndex { kDhwcnD, kDhwcnH, kDhwcnW, kDhwcnC, kDhwcnN, kDhwcnDimsNum }; +enum DhwcnDimIndex { + kDhwcnD, + kDhwcnH, + kDhwcnW, + kDhwcnC, + kDhwcnN, + kDhwcnDimsNum +}; -enum DhwncDimIndex { kDhwncD, kDhwncH, kDhwncW, kDhwncN, kDhwncC, kDhwncDimsNum }; +enum DhwncDimIndex { + kDhwncD, + kDhwncH, + kDhwncW, + kDhwncN, + kDhwncC, + kDhwncDimsNum +}; } // namespace formats } // namespace ge #endif // GE_COMMON_FORMATS_UTILS_FORMATS_DEFINITIONS_H_ diff --git a/ge/common/formats/utils/formats_trans_utils.cc b/ge/common/formats/utils/formats_trans_utils.cc old mode 100644 new mode 100755 index 23da0f74..e184a866 --- a/ge/common/formats/utils/formats_trans_utils.cc +++ b/ge/common/formats/utils/formats_trans_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/utils/formats_trans_utils.h b/ge/common/formats/utils/formats_trans_utils.h old mode 100644 new mode 100755 index 8b6f0604..a480b814 --- a/ge/common/formats/utils/formats_trans_utils.h +++ b/ge/common/formats/utils/formats_trans_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/fp16_t.cc b/ge/common/fp16_t.cc old mode 100644 new mode 100755 index 7b111e63..2f94323d --- a/ge/common/fp16_t.cc +++ b/ge/common/fp16_t.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/fp16_t.h b/ge/common/fp16_t.h old mode 100644 new mode 100755 index 0fda2cd2..0c5cd17b --- a/ge/common/fp16_t.h +++ b/ge/common/fp16_t.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/datatype_util.cc b/ge/common/ge/datatype_util.cc old mode 100644 new mode 100755 index f2ff12cb..d99f13c1 --- a/ge/common/ge/datatype_util.cc +++ b/ge/common/ge/datatype_util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,46 +22,46 @@ namespace { const std::vector kEmptyDatatypeVector; std::map> g_translatable_data_type = { - // key:src datatype, value:dst datatype - {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}, - {ge::DT_BOOL, {ge::DT_INT32}}, - {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, - {ge::DT_INT64, {ge::DT_INT32}}}; + // key:src datatype, value:dst datatype + {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}, + {ge::DT_BOOL, {ge::DT_INT32}}, + {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, + {ge::DT_INT64, {ge::DT_INT32}}}; std::map> g_reverse_translatable_data_type = { - // key:dst datatype,value:src datatype - {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, - {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, - {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; + // key:dst datatype,value:src datatype + {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, + {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, + {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; static const std::map g_dump_data_type_map = { - // key:ge datatype,value:proto datatype - {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, - {ge::DT_FLOAT, ge::proto::DT_FLOAT}, - {ge::DT_FLOAT16, ge::proto::DT_FLOAT16}, - {ge::DT_INT8, ge::proto::DT_INT8}, - {ge::DT_UINT8, ge::proto::DT_UINT8}, - {ge::DT_INT16, ge::proto::DT_INT16}, - {ge::DT_UINT16, ge::proto::DT_UINT16}, - {ge::DT_INT32, ge::proto::DT_INT32}, - {ge::DT_INT64, ge::proto::DT_INT64}, - {ge::DT_UINT32, ge::proto::DT_UINT32}, - {ge::DT_UINT64, ge::proto::DT_UINT64}, - {ge::DT_BOOL, ge::proto::DT_BOOL}, - {ge::DT_DOUBLE, ge::proto::DT_DOUBLE}, - {ge::DT_DUAL, ge::proto::DT_DUAL}, - {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8}, - {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8}, - {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64}, - {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128}, - {ge::DT_QINT8, ge::proto::DT_QINT8}, - {ge::DT_QINT16, ge::proto::DT_QINT16}, - {ge::DT_QINT32, ge::proto::DT_QINT32}, - {ge::DT_QUINT8, ge::proto::DT_QUINT8}, - {ge::DT_QUINT16, ge::proto::DT_QUINT16}, - {ge::DT_RESOURCE, ge::proto::DT_RESOURCE}, - {ge::DT_STRING_REF, ge::proto::DT_STRING_REF}, - {ge::DT_STRING, ge::proto::DT_STRING}, + // key:ge datatype,value:proto datatype + {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, + {ge::DT_FLOAT, ge::proto::DT_FLOAT}, + {ge::DT_FLOAT16, ge::proto::DT_FLOAT16}, + {ge::DT_INT8, ge::proto::DT_INT8}, + {ge::DT_UINT8, ge::proto::DT_UINT8}, + {ge::DT_INT16, ge::proto::DT_INT16}, + {ge::DT_UINT16, ge::proto::DT_UINT16}, + {ge::DT_INT32, ge::proto::DT_INT32}, + {ge::DT_INT64, ge::proto::DT_INT64}, + {ge::DT_UINT32, ge::proto::DT_UINT32}, + {ge::DT_UINT64, ge::proto::DT_UINT64}, + {ge::DT_BOOL, ge::proto::DT_BOOL}, + {ge::DT_DOUBLE, ge::proto::DT_DOUBLE}, + {ge::DT_DUAL, ge::proto::DT_DUAL}, + {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8}, + {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8}, + {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64}, + {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128}, + {ge::DT_QINT8, ge::proto::DT_QINT8}, + {ge::DT_QINT16, ge::proto::DT_QINT16}, + {ge::DT_QINT32, ge::proto::DT_QINT32}, + {ge::DT_QUINT8, ge::proto::DT_QUINT8}, + {ge::DT_QUINT16, ge::proto::DT_QUINT16}, + {ge::DT_RESOURCE, ge::proto::DT_RESOURCE}, + {ge::DT_STRING_REF, ge::proto::DT_STRING_REF}, + {ge::DT_STRING, ge::proto::DT_STRING}, }; } // namespace diff --git a/ge/common/ge/datatype_util.h b/ge/common/ge/datatype_util.h index 480b35e7..e42b25a7 100644 --- a/ge/common/ge/datatype_util.h +++ b/ge/common/ge/datatype_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,10 +37,10 @@ static const int32_t kGeSizeUint16 = sizeof(uint16_t); static const int32_t kGeSizeUint32 = sizeof(uint32_t); static std::map CONST_OPDATA_TYPE_SIZE_MAP = { - {ge::DT_FLOAT, kGeSizeFloat}, {ge::DT_FLOAT16, kGeSizeHalfFloat}, {ge::DT_INT8, kGeSizeInt8}, - {ge::DT_INT16, kGeSizeInt16}, {ge::DT_INT32, kGeSizeInt32}, {ge::DT_INT64, kGeSizeInt64}, - {ge::DT_UINT8, kGeSizeUint8}, {ge::DT_UINT16, kGeSizeUint16}, {ge::DT_UINT32, kGeSizeUint32}, - {ge::DT_UINT64, kGeSizeUint64}, {ge::DT_DOUBLE, kGeSizeDouble}, {ge::DT_BOOL, kGeSizeBool}}; + {ge::DT_FLOAT, kGeSizeFloat}, {ge::DT_FLOAT16, kGeSizeHalfFloat}, {ge::DT_INT8, kGeSizeInt8}, + {ge::DT_INT16, kGeSizeInt16}, {ge::DT_INT32, kGeSizeInt32}, {ge::DT_INT64, kGeSizeInt64}, + {ge::DT_UINT8, kGeSizeUint8}, {ge::DT_UINT16, kGeSizeUint16}, {ge::DT_UINT32, kGeSizeUint32}, + {ge::DT_UINT64, kGeSizeUint64}, {ge::DT_DOUBLE, kGeSizeDouble}, {ge::DT_BOOL, kGeSizeBool}}; class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY DataTypeUtil { public: diff --git a/ge/common/ge/ge_util.h b/ge/common/ge/ge_util.h index c6319bd3..52e7c370 100644 --- a/ge/common/ge/ge_util.h +++ b/ge/common/ge/ge_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/op_tiling_manager.cc b/ge/common/ge/op_tiling_manager.cc index ec43ab2e..251634e2 100644 --- a/ge/common/ge/op_tiling_manager.cc +++ b/ge/common/ge/op_tiling_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,8 +45,8 @@ std::string OpTilingManager::GetPath() { if (opp_path_env != nullptr) { char resolved_path[PATH_MAX]; if (realpath(opp_path_env, resolved_path) == NULL) { - ErrorManager::GetInstance().ATCReportErrMessage("E19024", {"env", "value", "situation"}, - {"ASCEND_OPP_PATH", opp_path_env, "loading the tiling lib"}); + ErrorManager::GetInstance().ATCReportErrMessage( + "E19024", {"env", "value", "situation"}, {"ASCEND_OPP_PATH", opp_path_env, "loading the tiling lib"}); GELOGE(PARAM_INVALID, "Failed load tiling lib as env 'ASCEND_OPP_PATH'[%s] is invalid path.", opp_path_env); return std::string(); } diff --git a/ge/common/ge/op_tiling_manager.h b/ge/common/ge/op_tiling_manager.h index 320e1411..d4e7f34e 100644 --- a/ge/common/ge/op_tiling_manager.h +++ b/ge/common/ge/op_tiling_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,3 +36,4 @@ class OpTilingManager { } // namespace ge #endif // GE_COMMON_GE_OP_TILING_MANAGER_H_ + diff --git a/ge/common/ge/plugin_manager.cc b/ge/common/ge/plugin_manager.cc index c56b2a2a..57d51223 100644 --- a/ge/common/ge/plugin_manager.cc +++ b/ge/common/ge/plugin_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -93,15 +93,13 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec std::vector path_vec; SplitPath(path, path_vec); for (const auto &single_path : path_vec) { - GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, - GELOGE(GE_PLGMGR_PATH_INVALID, "The shared library file path is too long!"); + GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, GELOGE(GE_PLGMGR_PATH_INVALID, + "The shared library file path is too long!"); continue); // load break when number of loaded so reach maximum if (num_of_loaded_so >= kMaxNumOfSo) { - GELOGW( - "The number of dynamic libraries loaded exceeds the kMaxNumOfSo," - " and only the first %d shared libraries will be loaded.", - kMaxNumOfSo); + GELOGW("The number of dynamic libraries loaded exceeds the kMaxNumOfSo," + " and only the first %d shared libraries will be loaded.", kMaxNumOfSo); break; } @@ -182,9 +180,9 @@ Status PluginManager::ValidateSo(const string &file_path, int64_t size_of_loaded // load continue if the total size of so reaches maximum when it is loaded if (size_of_loaded_so + file_size > kMaxSizeOfLoadedSo) { GELOGW( - "%s is skipped because the size of loaded share library reaches maximum if it is loaded! " - "(size: %ldB, size of loaded share library: %ldB, maximum: %dB)", - file_path.c_str(), file_size, size_of_loaded_so, kMaxSizeOfLoadedSo); + "%s is skipped because the size of loaded share library reaches maximum if it is loaded! " + "(size: %ldB, size of loaded share library: %ldB, maximum: %dB)", + file_path.c_str(), file_size, size_of_loaded_so, kMaxSizeOfLoadedSo); return FAILED; } @@ -231,10 +229,8 @@ Status PluginManager::Load(const string &path, const vector &func_check_ // load break when number of loaded so reach maximum if (num_of_loaded_so >= kMaxNumOfSo) { - GELOGW( - "The number of dynamic libraries loaded exceeds the kMaxNumOfSo," - " and only the first %d shared libraries will be loaded.", - kMaxNumOfSo); + GELOGW("The number of dynamic libraries loaded exceeds the kMaxNumOfSo," + " and only the first %d shared libraries will be loaded.", kMaxNumOfSo); break; } diff --git a/ge/common/ge/plugin_manager.h b/ge/common/ge/plugin_manager.h old mode 100644 new mode 100755 index b35a631a..903367a3 --- a/ge/common/ge/plugin_manager.h +++ b/ge/common/ge/plugin_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/tbe_plugin_manager.cc b/ge/common/ge/tbe_plugin_manager.cc old mode 100644 new mode 100755 index 8a594cb9..92da8e14 --- a/ge/common/ge/tbe_plugin_manager.cc +++ b/ge/common/ge/tbe_plugin_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -94,13 +94,6 @@ void TBEPluginManager::ProcessSoFullName(vector &file_list, string &caff full_name.compare(full_name.size() - caffe_parser_so_suff.size(), caffe_parser_so_suff.size(), caffe_parser_so_suff) == 0) { caffe_parser_path = full_name; - } else if ((full_name.size() >= aicpu_so_suff.size() && - full_name.compare(full_name.size() - aicpu_so_suff.size(), aicpu_so_suff.size(), aicpu_so_suff) == 0) || - (full_name.size() >= aicpu_host_so_suff.size() && - full_name.compare(full_name.size() - aicpu_host_so_suff.size(), aicpu_host_so_suff.size(), - aicpu_host_so_suff) == 0)) { - // aicpu so, Put the file path into the omgcontext and save into the model in the builder stage. - domi::GetContext().aicpu_op_run_paths.push_back(full_name); } else { // Save parser so path into file_list vector file_list.push_back(full_name); @@ -193,8 +186,8 @@ void TBEPluginManager::LoadCustomOpLib() { } } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo( - const std::map &options) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY +void TBEPluginManager::LoadPluginSo(const std::map &options) { vector file_list; string caffe_parser_path; std::string plugin_path; @@ -230,39 +223,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPlug } } -Status TBEPluginManager::CheckCustomAiCpuOpLib() { - std::vector vec_op_type; - - domi::OpRegistry::Instance()->GetOpTypeByImplyType(vec_op_type, domi::ImplyType::CUSTOM); - for (size_t i = 0; i < vec_op_type.size(); i++) { - bool aicpu_so_exist = false; - std::string ai_cpu_so_name = "lib" + vec_op_type[i] + "_aicpu.so"; - for (size_t j = 0; j < domi::GetContext().aicpu_op_run_paths.size(); j++) { - string bin_file_path = domi::GetContext().aicpu_op_run_paths[j]; - if (bin_file_path.size() >= ai_cpu_so_name.size() && - bin_file_path.compare(bin_file_path.size() - ai_cpu_so_name.size(), ai_cpu_so_name.size(), ai_cpu_so_name) == - 0) { - aicpu_so_exist = true; - break; - } - } - if (!aicpu_so_exist) { - GELOGE(FAILED, "Can't find aicpu run so(%s), please check the plugin path!", ai_cpu_so_name.c_str()); - return FAILED; - } - } - return SUCCESS; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::InitPreparation( - const std::map &options) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY +void TBEPluginManager::InitPreparation(const std::map &options) { options_.insert(options.begin(), options.end()); // Load TBE plugin TBEPluginManager::Instance().LoadCustomOpLib(); - Status ret = CheckCustomAiCpuOpLib(); - if (ret != SUCCESS) { - GELOGE(ret, "Check custom aicpu run so failed!"); - return; - } } } // namespace ge diff --git a/ge/common/ge/tbe_plugin_manager.h b/ge/common/ge/tbe_plugin_manager.h old mode 100644 new mode 100755 index 2a55e450..41db8ef9 --- a/ge/common/ge/tbe_plugin_manager.h +++ b/ge/common/ge/tbe_plugin_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,23 +32,23 @@ namespace ge { using SoHandlesVec = std::vector; -using std::function; -using std::map; -using std::string; using std::vector; +using std::string; +using std::map; +using std::function; class TBEPluginManager { public: Status Finalize(); // Get TBEPluginManager singleton instance - static TBEPluginManager &Instance(); + static TBEPluginManager& Instance(); static string GetPath(); static void InitPreparation(const std::map &options); - void LoadPluginSo(const std::map &options); + void LoadPluginSo(const std::map< string, string> &options); private: TBEPluginManager() = default; @@ -62,7 +62,6 @@ class TBEPluginManager { static void GetPluginSoFileList(const string &path, vector &file_list, string &caffe_parser_path); static void GetCustomOpPath(std::string &customop_path); void LoadCustomOpLib(); - static Status CheckCustomAiCpuOpLib(); SoHandlesVec handles_vec_; static std::map options_; diff --git a/ge/common/ge_common.mk b/ge/common/ge_common.mk old mode 100644 new mode 100755 diff --git a/ge/common/ge_format_util.cc b/ge/common/ge_format_util.cc old mode 100644 new mode 100755 diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc old mode 100644 new mode 100755 index b1a71b0a..d1e76447 --- a/ge/common/helper/model_cache_helper.cc +++ b/ge/common/helper/model_cache_helper.cc @@ -1007,10 +1007,9 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const { return PARAM_INVALID; } // Offset is needed by SaveVarVddr instead of logic address - ret = - VarManager::Instance(session_id_) - ->SaveVarAddr(iter.first, tensor_addr_mgr.tensor_desc, - reinterpret_cast(reinterpret_cast(offset)), tensor_addr_mgr.memory_type); + ret = VarManager::Instance(session_id_)->SaveVarAddr(iter.first, tensor_addr_mgr.tensor_desc, + reinterpret_cast(reinterpret_cast(offset)), + tensor_addr_mgr.memory_type); if (ret != SUCCESS) { GELOGW("Fail to recover VarAddr or TensorDesc of var[%s].", iter.first.c_str()); return ret; diff --git a/ge/common/helper/model_cache_helper.h b/ge/common/helper/model_cache_helper.h old mode 100644 new mode 100755 index 7524b224..68381e96 --- a/ge/common/helper/model_cache_helper.h +++ b/ge/common/helper/model_cache_helper.h @@ -42,7 +42,7 @@ class ModelCacheHelper { ModelCacheHelper(uint64_t session_id, uint32_t graph_id, ComputeGraphPtr &compute_graph); ~ModelCacheHelper(); - Status SaveCacheInfoToCache() const; + Status SaveCacheInfoToCache () const; Status SaveVarManagerToCache(bool before_build) const; Status SaveOmModelToCache(const GeModelPtr &ge_model) const; bool IsModelCacheHit() const; @@ -97,7 +97,7 @@ class ModelCacheHelper { std::vector> &var_addr_mgr_vector, std::unordered_set &var_offset_set); static Status ParseCurVarTensorDescMapFromJson( - const Json &json, std::unordered_map &cur_var_tensor_desc_map); + const Json &json, std::unordered_map &cur_var_tensor_desc_map); static Status ParseTransRoadsFromJson(const Json &json, std::unordered_map> &trans_roads); static Status ParseChangedGraphIdFromJson(const Json &json, diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 15683257..bb4502c7 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,13 +27,14 @@ #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" -using domi::ModelTaskDef; using std::string; +using domi::ModelTaskDef; namespace { const int64_t kOriginalOmPartitionNum = 1; } + namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelHelper::~ModelHelper() { (void)ReleaseLocalModelData(); } @@ -55,7 +56,7 @@ Status ModelHelper::SaveModelPartition(std::shared_ptr &om_fil item = "aicpu kernels"; } ErrorManager::GetInstance().ATCReportErrMessage("E19023", {"size", "item", "maxsize"}, - {std::to_string(size), item, std::to_string(UINT32_MAX)}); + {std::to_string(size), item, std::to_string(UINT32_MAX)}); } return PARAM_INVALID; } @@ -77,7 +78,7 @@ Status ModelHelper::SaveModelPartition(std::shared_ptr &om_fil FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, - ModelBufferData &model) { + ModelBufferData& model) { if (output_file.empty()) { GELOGE(FAILED, "GraphBuilder SaveModel received invalid file name prefix"); return FAILED; @@ -109,17 +110,19 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod GELOGI("WEIGHTS_DATA size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); // weight is not necessary if (ge_model_weight.GetSize() > 0) { - GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA, - ge_model_weight.GetData(), ge_model_weight.GetSize()), - "Add weight partition failed"); + GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, + ModelPartitionType::WEIGHTS_DATA, + ge_model_weight.GetData(), + ge_model_weight.GetSize()), "Add weight partition failed"); } TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); GELOGI("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize()); if (tbe_kernel_store.DataSize() > 0) { - GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::TBE_KERNELS, tbe_kernel_store.Data(), - tbe_kernel_store.DataSize()), - "Add tbe kernel partition failed"); + GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, + ModelPartitionType::TBE_KERNELS, + tbe_kernel_store.Data(), + tbe_kernel_store.DataSize()), "Add tbe kernel partition failed"); } // no need to check value, DATA->NetOutput @@ -128,8 +131,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore(); GELOGI("cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize()); if (cust_aicpu_kernel_store.DataSize() > 0) { - GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::CUST_AICPU_KERNELS, - cust_aicpu_kernel_store.Data(), cust_aicpu_kernel_store.DataSize()), + GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, + ModelPartitionType::CUST_AICPU_KERNELS, + cust_aicpu_kernel_store.Data(), + cust_aicpu_kernel_store.DataSize()), "Add cust aicpu kernel partition failed"); } @@ -454,8 +459,8 @@ Status ModelHelper::ReleaseLocalModelData() noexcept { return result; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetBaseNameFromFileName(const string &file_name, - string &base_name) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetBaseNameFromFileName( + const string &file_name, string &base_name) { GELOGD("Get base_name from file, file_name:%s", file_name.c_str()); GE_CHK_BOOL_EXEC_WARN(!file_name.empty(), return FAILED, "File path may not valid, check params --output"); size_t start_position = 0; @@ -470,8 +475,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetBaseName return SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status -ModelHelper::GetModelNameFromMergedGraphName(const string &graph_name, string &model_name) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetModelNameFromMergedGraphName( + const string &graph_name, string &model_name) { GELOGD("Get model_name from graph_name, graph_name:%s", graph_name.c_str()); // this can only be used after merged graph(graph name will be append with "_x", x is index); GE_CHK_BOOL_EXEC_WARN(!graph_name.empty(), return FAILED, "File path may not valid, check params --output"); diff --git a/ge/common/helper/om_file_helper.cc b/ge/common/helper/om_file_helper.cc index ca506731..39cd7ad7 100644 --- a/ge/common/helper/om_file_helper.cc +++ b/ge/common/helper/om_file_helper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,8 +89,8 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { // Model length too small if (model.model_len < (sizeof(ModelFileHeader) + sizeof(ModelPartitionTable))) { GELOGE(PARAM_INVALID, - "Invalid model. length[%u] < sizeof(ModelFileHeader)[%zu] + sizeof(ModelPartitionTable)[%zu].", - model.model_len, sizeof(ModelFileHeader), sizeof(ModelPartitionTable)); + "Invalid model. length[%u] < sizeof(ModelFileHeader)[%zu] + sizeof(ModelPartitionTable)[%zu].", + model.model_len, sizeof(ModelFileHeader), sizeof(ModelPartitionTable)); return PARAM_INVALID; } @@ -101,7 +101,7 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { (MODEL_FILE_MAGIC_NUM != model_header->magic)) { GELOGE(PARAM_INVALID, "Invalid model. file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != model->model_len[%u] || " - "MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", + "MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", model_header->length, sizeof(ModelFileHeader), model.model_len, MODEL_FILE_MAGIC_NUM, model_header->magic); return PARAM_INVALID; } diff --git a/ge/common/kernel_store.cc b/ge/common/kernel_store.cc old mode 100644 new mode 100755 index e465d184..e339b30c --- a/ge/common/kernel_store.cc +++ b/ge/common/kernel_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,8 +51,8 @@ bool KernelStore::Build() { kernel_head.name_len = static_cast(kernel->GetName().length()); kernel_head.bin_len = static_cast(kernel->GetBinDataSize()); - GELOGI("get kernel bin name %s, addr %p, size %u", kernel->GetName().c_str(), kernel->GetBinData(), - kernel->GetBinDataSize()); + GELOGI("get kernel bin name %s, addr %p, size %u", + kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize()); mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head)); GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false); next_buffer += sizeof(kernel_head); diff --git a/ge/common/kernel_store.h b/ge/common/kernel_store.h old mode 100644 new mode 100755 index d73f26c5..b3f4a62e --- a/ge/common/kernel_store.h +++ b/ge/common/kernel_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/math/fp16_math.cc b/ge/common/math/fp16_math.cc old mode 100644 new mode 100755 index 56183ced..e465c953 --- a/ge/common/math/fp16_math.cc +++ b/ge/common/math/fp16_math.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/math/fp16_math.h b/ge/common/math/fp16_math.h old mode 100644 new mode 100755 index c3a4eb28..48559eb3 --- a/ge/common/math/fp16_math.h +++ b/ge/common/math/fp16_math.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/math/math_util.h b/ge/common/math/math_util.h old mode 100644 new mode 100755 index e5a53d16..3255e3c1 --- a/ge/common/math/math_util.h +++ b/ge/common/math/math_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -612,268 +612,295 @@ inline Status CheckInt32DivOverflow(int32_t a, int32_t b) { return SUCCESS; } -#define FMK_INT_ADDCHECK(a, b) \ - if (ge::CheckIntAddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int %d and %d addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT_ADDCHECK(a, b) \ + if (ge::CheckIntAddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int %d and %d addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT8_ADDCHECK(a, b) \ - if (ge::CheckInt8AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int8 %d and %d addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT8_ADDCHECK(a, b) \ + if (ge::CheckInt8AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int8 %d and %d addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT16_ADDCHECK(a, b) \ - if (ge::CheckInt16AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int16 %d and %d addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT16_ADDCHECK(a, b) \ + if (ge::CheckInt16AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int16 %d and %d addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT32_ADDCHECK(a, b) \ - if (ge::CheckInt32AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int32 %d and %d addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT32_ADDCHECK(a, b) \ + if (ge::CheckInt32AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int32 %d and %d addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT64_ADDCHECK(a, b) \ - if (ge::CheckInt64AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int64 %ld and %ld addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT64_ADDCHECK(a, b) \ + if (ge::CheckInt64AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int64 %ld and %ld addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT8_ADDCHECK(a, b) \ - if (ge::CheckUint8AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Uint8 %u and %u addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT8_ADDCHECK(a, b) \ + if (ge::CheckUint8AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Uint8 %u and %u addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT16_ADDCHECK(a, b) \ - if (ge::CheckUint16AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("UINT16 %u and %u addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT16_ADDCHECK(a, b) \ + if (ge::CheckUint16AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("UINT16 %u and %u addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT32_ADDCHECK(a, b) \ - if (ge::CheckUint32AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Uint32 %u and %u addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT32_ADDCHECK(a, b) \ + if (ge::CheckUint32AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Uint32 %u and %u addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT64_ADDCHECK(a, b) \ - if (ge::CheckUint64AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Uint64 %lu and %lu addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT64_ADDCHECK(a, b) \ + if (ge::CheckUint64AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Uint64 %lu and %lu addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FP16_ADDCHECK(a, b) \ - if (ge::CheckFp16AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Fp16 %f and %f addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FP16_ADDCHECK(a, b) \ + if (ge::CheckFp16AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Fp16 %f and %f addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FLOAT_ADDCHECK(a, b) \ - if (ge::CheckFloatAddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Float %f and %f addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FLOAT_ADDCHECK(a, b) \ + if (ge::CheckFloatAddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Float %f and %f addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_DOUBLE_ADDCHECK(a, b) \ - if (ge::CheckDoubleAddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Double %lf and %lf addition can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_DOUBLE_ADDCHECK(a, b) \ + if (ge::CheckDoubleAddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Double %lf and %lf addition can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT_SUBCHECK(a, b) \ - if (ge::CheckIntSubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int %d and %d subtraction can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT_SUBCHECK(a, b) \ + if (ge::CheckIntSubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int %d and %d subtraction can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT8_SUBCHECK(a, b) \ - if (ge::CheckInt8SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int8 %d and %d subtraction can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT8_SUBCHECK(a, b) \ + if (ge::CheckInt8SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int8 %d and %d subtraction can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT16_SUBCHECK(a, b) \ - if (ge::CheckInt16SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int16 %d and %d subtraction can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT16_SUBCHECK(a, b) \ + if (ge::CheckInt16SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int16 %d and %d subtraction can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT32_SUBCHECK(a, b) \ - if (ge::CheckInt32SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int32 %d and %d subtraction can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT32_SUBCHECK(a, b) \ + if (ge::CheckInt32SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int32 %d and %d subtraction can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT64_SUBCHECK(a, b) \ - if (ge::CheckInt64SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int64 %ld and %ld subtraction can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT64_SUBCHECK(a, b) \ + if (ge::CheckInt64SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int64 %ld and %ld subtraction can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT8_SUBCHECK(a, b) \ - if (ge::CheckUint8SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Uint8 %u and %u subtraction can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT8_SUBCHECK(a, b) \ + if (ge::CheckUint8SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Uint8 %u and %u subtraction can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT16_SUBCHECK(a, b) \ - if (ge::CheckUint16SubOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT16_SUBCHECK(a, b) \ + if (ge::CheckUint16SubOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint16 %u and %u subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT32_SUBCHECK(a, b) \ - if (ge::CheckUint32SubOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT32_SUBCHECK(a, b) \ + if (ge::CheckUint32SubOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint32 %u and %u subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT64_SUBCHECK(a, b) \ - if (ge::CheckUint64SubOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT64_SUBCHECK(a, b) \ + if (ge::CheckUint64SubOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint64 %lu and %lu subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FP16_SUBCHECK(a, b) \ - if (ge::CheckFp16SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Fp16 %f and %f subtraction can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FP16_SUBCHECK(a, b) \ + if (ge::CheckFp16SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Fp16 %f and %f subtraction can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FLOAT_SUBCHECK(a, b) \ - if (ge::CheckFloatSubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Float %f and %f subtraction can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FLOAT_SUBCHECK(a, b) \ + if (ge::CheckFloatSubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Float %f and %f subtraction can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_DOUBLE_SUBCHECK(a, b) \ - if (ge::CheckDoubleSubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Double %lf and %lf subtraction can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_DOUBLE_SUBCHECK(a, b) \ + if (ge::CheckDoubleSubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Double %lf and %lf subtraction can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT_MULCHECK(a, b) \ - if (ge::CheckIntMulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int %d and %d multiplication can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT_MULCHECK(a, b) \ + if (ge::CheckIntMulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int %d and %d multiplication can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT8_MULCHECK(a, b) \ - if (ge::CheckInt8MulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int8 %d and %d multiplication can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT8_MULCHECK(a, b) \ + if (ge::CheckInt8MulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int8 %d and %d multiplication can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT16_MULCHECK(a, b) \ - if (ge::CheckInt16MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_INT16_MULCHECK(a, b) \ + if (ge::CheckInt16MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Int16 %d and %d multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT32_MULCHECK(a, b) \ - if (ge::CheckInt32MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_INT32_MULCHECK(a, b) \ + if (ge::CheckInt32MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Int32 %d and %d multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT64_MULCHECK(a, b) \ - if (ge::Int64MulCheckOverflow((a), (b)) != SUCCESS) { \ +#define FMK_INT64_MULCHECK(a, b) \ + if (ge::Int64MulCheckOverflow((a), (b)) != SUCCESS) { \ GELOGW("Int64 %ld and %ld multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT8_MULCHECK(a, b) \ - if (ge::CheckUint8MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT8_MULCHECK(a, b) \ + if (ge::CheckUint8MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint8 %u and %u multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT16_MULCHECK(a, b) \ - if (ge::CheckUint16MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT16_MULCHECK(a, b) \ + if (ge::CheckUint16MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint16 %u and %u multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT32_MULCHECK(a, b) \ - if (ge::CheckUint32MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT32_MULCHECK(a, b) \ + if (ge::CheckUint32MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint32 %u and %u multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT64_MULCHECK(a, b) \ - if (ge::CheckUint64MulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Uint64 %lu and %lu multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT64_MULCHECK(a, b) \ + if (ge::CheckUint64MulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Uint64 %lu and %lu multiplication can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FP16_MULCHECK(a, b) \ - if (ge::CheckFp16MulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Fp16 %f and %f multiplication can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FP16_MULCHECK(a, b) \ + if (ge::CheckFp16MulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Fp16 %f and %f multiplication can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FLOAT_MULCHECK(a, b) \ - if (ge::CheckFloatMulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Float %f and %f multiplication can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FLOAT_MULCHECK(a, b) \ + if (ge::CheckFloatMulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Float %f and %f multiplication can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_DOUBLE_MULCHECK(a, b) \ - if (ge::CheckDoubleMulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Double %lf and %lf multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_DOUBLE_MULCHECK(a, b) \ + if (ge::CheckDoubleMulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Double %lf and %lf multiplication can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT_DIVCHECK(a, b) \ - if (CheckIntDivOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int %d and %d division can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT_DIVCHECK(a, b) \ + if (CheckIntDivOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int %d and %d division can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT32_DIVCHECK(a, b) \ - if (CheckInt32DivOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int32 %d and %d division can result in overflow!", static_cast(a), static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT32_DIVCHECK(a, b) \ + if (CheckInt32DivOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int32 %d and %d division can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT64_UINT32_MULCHECK(a, b) \ - if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_INT64_UINT32_MULCHECK(a, b) \ + if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Int64 %ld and UINT32 %u multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FP16_ZEROCHECK(a) \ - if (fabs(a) < DBL_EPSILON || a < 0) { \ - GELOGW("Fp16 %f can not less than or equal to zero! ", a); \ - return INTERNAL_ERROR; \ +#define FMK_FP16_ZEROCHECK(a) \ + if (fabs(a) < DBL_EPSILON || a < 0) { \ + GELOGW("Fp16 %f can not less than or equal to zero! ", a); \ + return INTERNAL_ERROR; \ } -#define FMK_FLOAT_ZEROCHECK(a) \ - if (fabs(a) < FLT_EPSILON || a < 0) { \ - GELOGW("Float %f can not less than or equal to zero! ", a); \ - return INTERNAL_ERROR; \ +#define FMK_FLOAT_ZEROCHECK(a) \ + if (fabs(a) < FLT_EPSILON || a < 0) { \ + GELOGW("Float %f can not less than or equal to zero! ", a); \ + return INTERNAL_ERROR; \ } -#define FMK_DOUBLE_ZEROCHECK(a) \ - if (fabs(a) < DBL_EPSILON || a < 0) { \ - GELOGW("Double %lf can not less than or equal to zero! ", a); \ - return INTERNAL_ERROR; \ +#define FMK_DOUBLE_ZEROCHECK(a) \ + if (fabs(a) < DBL_EPSILON || a < 0) { \ + GELOGW("Double %lf can not less than or equal to zero! ", a); \ + return INTERNAL_ERROR; \ } } // namespace ge #endif // GE_COMMON_MATH_MATH_UTIL_H_ diff --git a/ge/common/math_util.h b/ge/common/math_util.h old mode 100644 new mode 100755 index a12be9e0..913a1572 --- a/ge/common/math_util.h +++ b/ge/common/math_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,13 +28,13 @@ namespace ge { /** - * @ingroup domi_calibration - * @brief Initializes an input array to a specified value - * @param [in] n array initialization length - * @param [in] alpha initialization value - * @param [out] output array to be initialized - * @return Status - */ +* @ingroup domi_calibration +* @brief Initializes an input array to a specified value +* @param [in] n array initialization length +* @param [in] alpha initialization value +* @param [out] output array to be initialized +* @return Status +*/ template Status NnSet(const int32_t n, const Dtype alpha, Dtype *output) { GE_CHECK_NOTNULL(output); diff --git a/ge/common/model_parser/base.cc b/ge/common/model_parser/base.cc index 3b6b9407..bc38cea8 100644 --- a/ge/common/model_parser/base.cc +++ b/ge/common/model_parser/base.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/model_parser/base.h b/ge/common/model_parser/base.h old mode 100644 new mode 100755 index 22d58ace..75db8b11 --- a/ge/common/model_parser/base.h +++ b/ge/common/model_parser/base.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/model_saver.cc b/ge/common/model_saver.cc old mode 100644 new mode 100755 index 821fde60..fb1cd0a7 --- a/ge/common/model_saver.cc +++ b/ge/common/model_saver.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,7 +69,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi // Write data to file mmSsize_t mmpa_ret = mmWrite(fd, const_cast((const void *)model_char), len); if (mmpa_ret == EN_ERROR || mmpa_ret == EN_INVALID_PARAM) { - ErrorManager::GetInstance().ATCReportErrMessage("E19004", {"file", "errmsg"}, {file_path, strerror(errno)}); + ErrorManager::GetInstance().ATCReportErrMessage( + "E19004", {"file", "errmsg"}, {file_path, strerror(errno)}); // Need to both print the error info of mmWrite and mmClose, so return ret after mmClose GELOGE(FAILED, "Write to file failed. errno = %d, %s", mmpa_ret, strerror(errno)); ret = FAILED; diff --git a/ge/common/model_saver.h b/ge/common/model_saver.h index 411d5e35..6da0a78c 100644 --- a/ge/common/model_saver.h +++ b/ge/common/model_saver.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/module.mk b/ge/common/module.mk old mode 100644 new mode 100755 diff --git a/ge/common/op/attr_value_util.cc b/ge/common/op/attr_value_util.cc index 5d74aa1d..4315a25d 100644 --- a/ge/common/op/attr_value_util.cc +++ b/ge/common/op/attr_value_util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include "framework/common/op/attr_value_util.h" #include "framework/common/debug/log.h" #include "framework/common/util.h" +#include "register/register_types.h" namespace ge { #define DEFINE_SET_ATTR_VALUE_ONE(ARG_TYPE, FIELD) \ @@ -83,27 +84,30 @@ DEFINE_SET_ATTR_VALUE_LIST(const std::string &, s); ADD_TO_ATTR_MAP(map_key, value, attr) \ } \ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void AddOpAttr(KEY_TYPE map_key, VALUE_TYPE value, \ - AttrDefMap *attr_map){ \ - ADD_TO_ATTR_MAP(map_key, value, attr_map)} FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void \ - AddModelAttr(KEY_TYPE map_key, VALUE_TYPE value, ModelDef *model_def) { \ + AttrDefMap *attr_map) { \ + ADD_TO_ATTR_MAP(map_key, value, attr_map) \ + } \ + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void AddModelAttr(KEY_TYPE map_key, VALUE_TYPE value, \ + ModelDef *model_def) { \ GE_CHECK_NOTNULL_JUST_RETURN(model_def); \ auto attr = model_def->mutable_attr(); \ ADD_TO_ATTR_MAP(map_key, value, attr) \ } -#define DEFINE_ADD_ATTR_VALUE_LIST(KEY_TYPE, VALUE_TYPE) \ - FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void AddOpAttrList(KEY_TYPE map_key, VALUE_TYPE value, \ - OpDef *op_def) { \ - GE_CHECK_NOTNULL_JUST_RETURN(op_def); \ - auto attr = op_def->mutable_attr(); \ - ADD_TO_ATTR_MAP_LIST(map_key, value, attr) \ - } \ - FMK_FUNC_DEV_VISIBILITY void AddOpAttrList(KEY_TYPE map_key, VALUE_TYPE value, AttrDefMap *attr_map){ \ - ADD_TO_ATTR_MAP_LIST(map_key, value, attr_map)} FMK_FUNC_DEV_VISIBILITY void \ - AddModelAttrList(KEY_TYPE map_key, VALUE_TYPE value, ModelDef *model_def) { \ - GE_CHECK_NOTNULL_JUST_RETURN(model_def); \ - auto attr = model_def->mutable_attr(); \ - ADD_TO_ATTR_MAP_LIST(map_key, value, attr) \ +#define DEFINE_ADD_ATTR_VALUE_LIST(KEY_TYPE, VALUE_TYPE) \ + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void AddOpAttrList(KEY_TYPE map_key, VALUE_TYPE value, \ + OpDef *op_def) { \ + GE_CHECK_NOTNULL_JUST_RETURN(op_def); \ + auto attr = op_def->mutable_attr(); \ + ADD_TO_ATTR_MAP_LIST(map_key, value, attr) \ + } \ + FMK_FUNC_DEV_VISIBILITY void AddOpAttrList(KEY_TYPE map_key, VALUE_TYPE value, AttrDefMap *attr_map) { \ + ADD_TO_ATTR_MAP_LIST(map_key, value, attr_map) \ + } \ + FMK_FUNC_DEV_VISIBILITY void AddModelAttrList(KEY_TYPE map_key, VALUE_TYPE value, ModelDef *model_def) { \ + GE_CHECK_NOTNULL_JUST_RETURN(model_def); \ + auto attr = model_def->mutable_attr(); \ + ADD_TO_ATTR_MAP_LIST(map_key, value, attr) \ } DEFINE_ADD_ATTR_VALUE(const std::string &, const std::string &); @@ -153,16 +157,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void AddOpAttr(const std::strin return false; \ } -#define DEFINE_GET_ATTR_CONST_POINT_REF(ARG_TYPE_KEY, ARG_TYPE_VALUE, FIELD) \ - FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool GetAttrDefValue( \ - ARG_TYPE_KEY map_key, const ARG_TYPE_VALUE *&value, const AttrDefMap &attr) { \ - auto it = attr.find(map_key); \ - if (it == attr.end()) { \ - return false; \ - } \ - \ - value = &(it->second.FIELD()); \ - return true; \ +#define DEFINE_GET_ATTR_CONST_POINT_REF(ARG_TYPE_KEY, ARG_TYPE_VALUE, FIELD) \ + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool GetAttrDefValue( \ + ARG_TYPE_KEY map_key, const ARG_TYPE_VALUE *&value, const AttrDefMap &attr) { \ + auto it = attr.find(map_key); \ + if (it == attr.end()) { \ + return false; \ + } \ + \ + value = &(it->second.FIELD()); \ + return true; \ } #define DEFINE_GET_BYTES_ATTR_VALUE(ARG_TYPE_KEY, ARG_TYPE_VALUE) \ diff --git a/ge/common/op/ge_op_utils.cc b/ge/common/op/ge_op_utils.cc index 1dc268b2..579190d6 100644 --- a/ge/common/op/ge_op_utils.cc +++ b/ge/common/op/ge_op_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/op/attr_value_util.h" #include "framework/common/util.h" +#include "framework/common/types.h" #include "graph/anchor.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/op_desc_utils.h" diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index d301f647..9a2b24a0 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,19 +55,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In bool convert_2_phy_device_id) { #ifdef DAVINCI_SUPPORT_PROFILING vector().swap(device_id_); - // profiling need phy device id - if (!convert_2_phy_device_id) { - device_id_.push_back(options.device_id); - } else { - uint32_t phy_device_id = 0; - rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast(options.device_id), &phy_device_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); - return FAILED; - } - device_id_.push_back(phy_device_id); - } - job_id_ = options.job_id; Status ret; @@ -76,6 +63,20 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In ret = InitFromAclCfg(recv_profiling_config_); } else { ret = InitFromOptions(options); + if (ret == SUCCESS && is_load_profiling_) { + // profiling need phy device id + if (!convert_2_phy_device_id) { + device_id_.push_back(options.device_id); + } else { + uint32_t phy_device_id = 0; + rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast(options.device_id), &phy_device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); + return FAILED; + } + device_id_.push_back(phy_device_id); + } + } } if (ret != SUCCESS) { GELOGE(ret, "Failed to init profiling."); @@ -107,7 +108,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromAclCfg( - const std::string &config) { + const std::string &config) { #ifdef DAVINCI_SUPPORT_PROFILING try { is_load_profiling_ = false; @@ -163,7 +164,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg( - const Json &features) { + const Json &features) { #ifdef DAVINCI_SUPPORT_PROFILING try { for (size_t i = 0; i < features.size(); ++i) { @@ -362,20 +363,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf } uint64_t module = GetProfilingModule(); int32_t device_num = static_cast(device_id_.size()); - uint32_t *device_id_ptr = new (std::nothrow) uint32_t[device_num]; + auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); if (device_id_ptr == nullptr) { - GELOGE(FAILED, "Stop profiling device id ptr is null."); + GELOGE(FAILED, "Stop profiling: device id ptr is null."); return; } for (int32_t i = 0; i < device_num; i++) { device_id_ptr[i] = static_cast(device_id_[i]); } - rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr); + rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); } - delete[] device_id_ptr; - device_id_ptr = nullptr; for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { int result = ProfMgrStop(prof_handle_vec_[i]); @@ -391,7 +390,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( - const std::vector &task_desc_info, const int32_t &device_id) { + const std::vector &task_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); if (reporter == nullptr) { @@ -406,12 +405,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin uint32_t block_dim = task.block_dim; uint32_t task_id = task.task_id; uint32_t stream_id = task.stream_id; - data = model_name.append(" ").append(op_name).append(" ").append(std::to_string(block_dim) - .append(" ") - .append(std::to_string(task_id)) - .append(" ") - .append(std::to_string(stream_id)) - .append("\n")); + data = model_name.append(" ") + .append(op_name).append(" ") + .append(std::to_string(block_dim).append(" ") + .append(std::to_string(task_id)).append(" ") + .append(std::to_string(stream_id)).append("\n")); Msprof::Engine::ReporterData reporter_data{}; reporter_data.deviceId = device_id; @@ -435,7 +433,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( - const std::vector &compute_graph_desc_info, const int32_t &device_id) { + const std::vector &compute_graph_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return;); @@ -443,19 +441,19 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin std::string data; for (const auto &graph : compute_graph_desc_info) { data.append("model_name:") - .append(graph.model_name) - .append(" op_name:") - .append(graph.op_name) - .append(" op_type:") - .append(graph.op_type); + .append(graph.model_name) + .append(" op_name:") + .append(graph.op_name) + .append(" op_type:") + .append(graph.op_type); for (size_t i = 0; i < graph.input_format.size(); ++i) { data.append(" input_id:") - .append(std::to_string(i)) - .append(" input_format:") - .append(std::to_string(graph.input_format.at(i))) - .append(" input_data_type:") - .append(std::to_string(graph.input_data_type.at(i))) - .append(" input_shape:\""); + .append(std::to_string(i)) + .append(" input_format:") + .append(std::to_string(graph.input_format.at(i))) + .append(" input_data_type:") + .append(std::to_string(graph.input_data_type.at(i))) + .append(" input_shape:\""); size_t input_shape_len = graph.input_shape.at(i).size(); if (input_shape_len == 0) { data.append(""); @@ -473,12 +471,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin for (size_t i = 0; i < graph.output_format.size(); ++i) { data.append(" output_id:") - .append(std::to_string(i)) - .append(" output_format:") - .append(std::to_string(graph.output_format.at(i))) - .append(" output_data_type:") - .append(std::to_string(graph.output_data_type.at(i))) - .append(" output_shape:\""); + .append(std::to_string(i)) + .append(" output_format:") + .append(std::to_string(graph.output_format.at(i))) + .append(" output_data_type:") + .append(std::to_string(graph.output_data_type.at(i))) + .append(" output_shape:\""); size_t output_shape_len = graph.output_shape.at(i).size(); if (output_shape_len == 0) { data.append(""); @@ -504,8 +502,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Report( - const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, - Msprof::Engine::ReporterData &reporter_data) { + const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, + Msprof::Engine::ReporterData &reporter_data) { #ifdef DAVINCI_SUPPORT_PROFILING size_t index = data.size() / kReportMaxLen; if (index >= 1) { @@ -547,7 +545,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( - const std::vector &task_desc_info, const std::vector &compute_graph_desc_info) { + const std::vector &task_desc_info, const std::vector &compute_graph_desc_info) { #ifdef DAVINCI_SUPPORT_PROFILING int32_t logic_device_id = 0; rtError_t rt_ret = rtGetDevice(&logic_device_id); @@ -580,15 +578,22 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::SetProfilingConfig( - const std::string &profiling_cfg) { + const std::string &profiling_cfg) { recv_profiling_config_ = profiling_cfg; } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetProfilingModule() { - uint64_t module = PROF_MODEL_EXECUTE_MASK | PROF_RUNTIME_API_MASK | PROF_RUNTIME_TRACE_MASK | - PROF_SCHEDULE_TIMELINE_MASK | PROF_SCHEDULE_TRACE_MASK | PROF_TASK_TIME_MASK | - PROF_SUBTASK_TIME_MASK | PROF_AICPU_TRACE_MASK | PROF_AICORE_METRICS_MASK | - PROF_AIVECTORCORE_METRICS_MASK | PROF_MODEL_LOAD_MASK; + uint64_t module = PROF_MODEL_EXECUTE_MASK | + PROF_RUNTIME_API_MASK | + PROF_RUNTIME_TRACE_MASK | + PROF_SCHEDULE_TIMELINE_MASK | + PROF_SCHEDULE_TRACE_MASK | + PROF_TASK_TIME_MASK | + PROF_SUBTASK_TIME_MASK | + PROF_AICPU_TRACE_MASK | + PROF_AICORE_METRICS_MASK | + PROF_AIVECTORCORE_METRICS_MASK | + PROF_MODEL_LOAD_MASK; return module; } @@ -704,8 +709,8 @@ Status ProfilingManager::ProfParseDeviceId(const std::map &config_para, int32_t &device_num, - vector &device_list) { +Status ProfilingManager::ProfParseParam(const std::map &config_para, + int32_t &device_num, vector &device_list) { #ifdef DAVINCI_SUPPORT_PROFILING // device num auto iter = config_para.find(kConfigNumsdev); @@ -739,8 +744,8 @@ Status ProfilingManager::ProfParseParam(const std::map return SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status -ProfilingManager::ProfStartProfiling(uint64_t module, const std::map &config_para) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfStartProfiling( + uint64_t module, const std::map &config_para) { #ifdef DAVINCI_SUPPORT_PROFILING std::lock_guard lock(mutex_); int32_t device_num = 0; @@ -749,23 +754,21 @@ ProfilingManager::ProfStartProfiling(uint64_t module, const std::map(new (std::nothrow) uint32_t[device_num]); + if (device_id_ptr == nullptr) { + GELOGE(FAILED, "Prof start: device id ptr is null."); return FAILED; } for (int32_t i = 0; i < device_num; i++) { - device_id[i] = static_cast(device_list[i]); + device_id_ptr[i] = static_cast(device_list[i]); } GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); - rtError_t rt_ret = rtProfilerStart(module, device_num, device_id); + rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { - delete[] device_id; GELOGE(FAILED, "Runtime profiler config proc failed."); return FAILED; } - delete[] device_id; - device_id = nullptr; if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) { for (int32_t i = 0; i < device_num; i++) { if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) { @@ -783,8 +786,8 @@ ProfilingManager::ProfStartProfiling(uint64_t module, const std::map &config_para) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfStopProfiling(uint64_t module, + const std::map &config_para) { #ifdef DAVINCI_SUPPORT_PROFILING std::lock_guard lock(mutex_); int32_t device_num = 0; @@ -793,23 +796,20 @@ ProfilingManager::ProfStopProfiling(uint64_t module, const std::map(new (std::nothrow) uint32_t[device_num]); + if (device_id_ptr == nullptr) { + GELOGE(FAILED, "Prof stop: device id ptr is null."); return FAILED; } for (int32_t i = 0; i < device_num; i++) { - device_id[i] = static_cast(device_list[i]); + device_id_ptr[i] = static_cast(device_list[i]); } GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); - rtError_t rt_ret = rtProfilerStop(module, device_num, device_id); + rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { - delete[] device_id; GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); return FAILED; } - delete[] device_id; - device_id = nullptr; uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK; if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) { for (int32_t i = 0; i < device_num; i++) { @@ -829,8 +829,8 @@ ProfilingManager::ProfStopProfiling(uint64_t module, const std::map &device_list) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::UpdateDeviceIdModuleMap(string prof_type, + uint64_t module, const vector &device_list) { #ifdef DAVINCI_SUPPORT_PROFILING if (prof_type == kProfStart) { for (uint32_t i = 0; i < device_list.size(); i++) { @@ -868,14 +868,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin } GELOGI("Current logic_device_id:%d", logic_device_id); - uint32_t phy_device_id = 0; - rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); - } - GELOGI("Current phy_device_id:%d", phy_device_id); bool execute_model_prof_on = false; - auto iter = std::find(device_id_.begin(), device_id_.end(), phy_device_id); + auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id); if (iter != device_id_.end()) { execute_model_prof_on = true; } diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h old mode 100644 new mode 100755 index f4249451..a8f16deb --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,7 +36,7 @@ using std::vector; using Json = nlohmann::json; namespace { -const std::string GE_PROFILING_MODULE = "Framework"; + const std::string GE_PROFILING_MODULE = "Framework"; } // namespace namespace ge { // register Plugin @@ -83,7 +83,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { bool ProfilingTrainingTraceOn() const { return is_training_trace_; } bool ProfilingModelLoadOn() const { return is_load_profiling_; } bool ProfilingModelExecuteOn() const; - bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern + bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } void ReportProfilingData(const std::vector &task_desc_info, const std::vector &compute_graph_desc_info); @@ -93,14 +93,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { void ProfilingGraphDescInfo(const std::vector &compute_graph_desc_info, const int32_t &device_id); void SetProfilingConfig(const string &profiling_cfg); - vector GetProfilingDeviceId() const { return device_id_; } + vector GetProfilingDeviceId() const { return device_id_; } void PluginUnInit(const std::string &module) const; - private: ge::Status ParseFeaturesFromAclCfg(const Json &feature); ge::Status ProfParseParam(const std::map &config_para, int32_t &device_num, vector &device_list); - ge::Status ProfParseDeviceId(const std::map &config_para, vector &device_list); + ge::Status ProfParseDeviceId(const std::map &config_para, + vector &device_list); uint64_t GetProfilingModule(); void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector &device_list); bool is_load_profiling_ = false; @@ -121,7 +121,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { string system_trace_conf_; string task_trace_conf_; const ProfilingEngineImpl engine_; - map device_id_module_map_; // key: device_id, value: profiling on module + map device_id_module_map_; // key: device_id, value: profiling on module std::mutex mutex_; }; } // namespace ge diff --git a/ge/common/properties_manager.cc b/ge/common/properties_manager.cc index a4879460..3ca5bd27 100644 --- a/ge/common/properties_manager.cc +++ b/ge/common/properties_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -135,7 +135,7 @@ std::string PropertiesManager::Trim(const std::string &str) { // Get property value, if not found, return "" FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string PropertiesManager::GetPropertyValue( - const std::string &map_key) { + const std::string &map_key) { std::lock_guard lock(mutex_); auto iter = properties_map_.find(map_key); if (properties_map_.end() != iter) { @@ -166,14 +166,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void PropertiesManager::SetProp } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &PropertiesManager::GetDumpProperties( - uint64_t session_id) { + uint64_t session_id) { std::lock_guard lock(mutex_); // If session_id is not found in dump_properties_map_, operator[] will insert one. return dump_properties_map_[session_id]; } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void PropertiesManager::AddDumpProperties( - uint64_t session_id, const DumpProperties &dump_properties) { + uint64_t session_id, const DumpProperties &dump_properties) { std::lock_guard lock(mutex_); dump_properties_map_.emplace(session_id, dump_properties); } diff --git a/ge/common/properties_manager.h b/ge/common/properties_manager.h index 9ba7f88e..634113a8 100644 --- a/ge/common/properties_manager.h +++ b/ge/common/properties_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/proto/ge_ir.proto b/ge/common/proto/ge_ir.proto new file mode 120000 index 00000000..f60a0f89 --- /dev/null +++ b/ge/common/proto/ge_ir.proto @@ -0,0 +1 @@ +../../../../inc/common/proto/ge_ir.proto \ No newline at end of file diff --git a/ge/common/proto/insert_op.proto b/ge/common/proto/insert_op.proto new file mode 120000 index 00000000..7db5a53b --- /dev/null +++ b/ge/common/proto/insert_op.proto @@ -0,0 +1 @@ +../../proto/insert_op.proto \ No newline at end of file diff --git a/ge/common/proto/om.proto b/ge/common/proto/om.proto new file mode 120000 index 00000000..91c581bb --- /dev/null +++ b/ge/common/proto/om.proto @@ -0,0 +1 @@ +../../../../inc/common/proto/om.proto \ No newline at end of file diff --git a/ge/common/proto/op_mapping_info.proto b/ge/common/proto/op_mapping_info.proto new file mode 120000 index 00000000..9e26bcda --- /dev/null +++ b/ge/common/proto/op_mapping_info.proto @@ -0,0 +1 @@ +../../../../inc/common/proto/op_mapping_info.proto \ No newline at end of file diff --git a/ge/common/proto/task.proto b/ge/common/proto/task.proto new file mode 120000 index 00000000..36ae4847 --- /dev/null +++ b/ge/common/proto/task.proto @@ -0,0 +1 @@ +../../proto/task.proto \ No newline at end of file diff --git a/ge/common/proto/tensorflow/attr_value.proto b/ge/common/proto/tensorflow/attr_value.proto new file mode 120000 index 00000000..e19c0ff1 --- /dev/null +++ b/ge/common/proto/tensorflow/attr_value.proto @@ -0,0 +1 @@ +../../../../../inc/register/proto/tensorflow/attr_value.proto \ No newline at end of file diff --git a/ge/common/proto/tensorflow/function.proto b/ge/common/proto/tensorflow/function.proto new file mode 120000 index 00000000..c87e106d --- /dev/null +++ b/ge/common/proto/tensorflow/function.proto @@ -0,0 +1 @@ +../../../../../inc/register/proto/tensorflow/function.proto \ No newline at end of file diff --git a/ge/common/proto/tensorflow/graph.proto b/ge/common/proto/tensorflow/graph.proto new file mode 120000 index 00000000..5be9cbe4 --- /dev/null +++ b/ge/common/proto/tensorflow/graph.proto @@ -0,0 +1 @@ +../../../../../inc/register/proto/tensorflow/graph.proto \ No newline at end of file diff --git a/ge/common/proto/tensorflow/node_def.proto b/ge/common/proto/tensorflow/node_def.proto new file mode 120000 index 00000000..74d23d14 --- /dev/null +++ b/ge/common/proto/tensorflow/node_def.proto @@ -0,0 +1 @@ +../../../../../inc/register/proto/tensorflow/node_def.proto \ No newline at end of file diff --git a/ge/common/proto/tensorflow/op_def.proto b/ge/common/proto/tensorflow/op_def.proto new file mode 120000 index 00000000..4a674add --- /dev/null +++ b/ge/common/proto/tensorflow/op_def.proto @@ -0,0 +1 @@ +../../../../../inc/register/proto/tensorflow/op_def.proto \ No newline at end of file diff --git a/ge/common/proto/tensorflow/resource_handle.proto b/ge/common/proto/tensorflow/resource_handle.proto new file mode 120000 index 00000000..740d9729 --- /dev/null +++ b/ge/common/proto/tensorflow/resource_handle.proto @@ -0,0 +1 @@ +../../../../../inc/register/proto/tensorflow/resource_handle.proto \ No newline at end of file diff --git a/ge/common/proto/tensorflow/tensor.proto b/ge/common/proto/tensorflow/tensor.proto new file mode 120000 index 00000000..45814795 --- /dev/null +++ b/ge/common/proto/tensorflow/tensor.proto @@ -0,0 +1 @@ +../../../../../inc/register/proto/tensorflow/tensor.proto \ No newline at end of file diff --git a/ge/common/proto/tensorflow/tensor_shape.proto b/ge/common/proto/tensorflow/tensor_shape.proto new file mode 120000 index 00000000..fdf88677 --- /dev/null +++ b/ge/common/proto/tensorflow/tensor_shape.proto @@ -0,0 +1 @@ +../../../../../inc/register/proto/tensorflow/tensor_shape.proto \ No newline at end of file diff --git a/ge/common/proto/tensorflow/types.proto b/ge/common/proto/tensorflow/types.proto new file mode 120000 index 00000000..eecf0952 --- /dev/null +++ b/ge/common/proto/tensorflow/types.proto @@ -0,0 +1 @@ +../../../../../inc/register/proto/tensorflow/types.proto \ No newline at end of file diff --git a/ge/common/proto/tensorflow/versions.proto b/ge/common/proto/tensorflow/versions.proto new file mode 120000 index 00000000..f6130cd3 --- /dev/null +++ b/ge/common/proto/tensorflow/versions.proto @@ -0,0 +1 @@ +../../../../../inc/register/proto/tensorflow/versions.proto \ No newline at end of file diff --git a/ge/common/singleton.h b/ge/common/singleton.h old mode 100644 new mode 100755 index 1a347bfe..314e824e --- a/ge/common/singleton.h +++ b/ge/common/singleton.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_COMMON_SINGLETON_H_ #define GE_COMMON_SINGLETON_H_ diff --git a/ge/common/tbe_kernel_store.cc b/ge/common/tbe_kernel_store.cc old mode 100644 new mode 100755 index 9acead2d..2fb9a04a --- a/ge/common/tbe_kernel_store.cc +++ b/ge/common/tbe_kernel_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,9 @@ namespace ge { TBEKernelStore::TBEKernelStore() {} -void TBEKernelStore::AddTBEKernel(const TBEKernelPtr &kernel) { AddKernel(kernel); } +void TBEKernelStore::AddTBEKernel(const TBEKernelPtr &kernel) { + AddKernel(kernel); +} void TBEKernelStore::LoadTBEKernelBinToOpDesc(const std::shared_ptr &op_desc) const { if (op_desc != nullptr) { diff --git a/ge/common/tbe_kernel_store.h b/ge/common/tbe_kernel_store.h old mode 100644 new mode 100755 index ab1ab9b4..6304af50 --- a/ge/common/tbe_kernel_store.h +++ b/ge/common/tbe_kernel_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/thread_pool.cc b/ge/common/thread_pool.cc index 700892f2..dead0127 100644 --- a/ge/common/thread_pool.cc +++ b/ge/common/thread_pool.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/thread_pool.h b/ge/common/thread_pool.h old mode 100644 new mode 100755 index 92157275..e173618f --- a/ge/common/thread_pool.h +++ b/ge/common/thread_pool.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/types.cc b/ge/common/types.cc old mode 100644 new mode 100755 index de293d34..1b96b094 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,9 +49,9 @@ const std::string RTS_PROFILE_PATH = "RTS_PATH"; const std::string PROFILE_STOP_KEY = "stop"; const std::string PROFILE_STOP_VALUE = "enable"; const std::map PROFILE_COMPONENT_MAP{ - {"ome", OME_PROFILE}, - {"cce", CCE_PROFILE}, - {"runtime", RTS_PROFILE}, + {"ome", OME_PROFILE}, + {"cce", CCE_PROFILE}, + {"runtime", RTS_PROFILE}, }; const std::string PROFILE_CONFIG = "config"; @@ -794,7 +794,7 @@ const uint32_t XRGB_CHN_NUM = 4; /// const bool DEFAULT_GLOBAL_POOLING = false; -const uint32_t MODEL_VERSION = 0x10000000; ///< Model version 1.0/// +const uint32_t MODEL_VERSION = 0x10000000; ///< Model version 1.0/// // Eltwise's input size const int ELTWISE_MIN_INPUT_SIZE = 2; diff --git a/ge/common/util.cc b/ge/common/util.cc index cbd2ee71..e41f3dbd 100644 --- a/ge/common/util.cc +++ b/ge/common/util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,11 +54,10 @@ const int kProtoReadBytesLimit = INT_MAX; // Max size of 2 GB minus 1 byte. const int kWarningThreshold = 536870912 * 2; // 536870912 represent 512M /// The maximum length of the file. -/// Based on the security coding specification and the current actual (protobuf) model size, it is determined as 2G-1 -const int kMaxFileSizeLimit = INT_MAX; +const uint32_t kMaxFileSizeLimit = UINT32_MAX; // 4G for now const int kMaxBuffSize = 256; const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; -constexpr uint32_t MAX_CONFIG_FILE_BYTE = 10 * 1024 * 1024; +constexpr uint32_t kMaxConfigFileByte = 10 * 1024 * 1024; } // namespace namespace ge { @@ -118,19 +117,20 @@ long GetFileLength(const std::string &input_file) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return -1, "input_file path '%s' not valid", input_file.c_str()); unsigned long long file_length = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, - ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {input_file, strerror(errno)}); - return -1, "Open file[%s] failed. %s", input_file.c_str(), strerror(errno)); + mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, + ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {input_file, strerror(errno)}); + return -1, "Open file[%s] failed. %s", input_file.c_str(), strerror(errno)); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0), ErrorManager::GetInstance().ATCReportErrMessage("E19015", {"filepath"}, {input_file}); return -1, "File[%s] size is 0, not valid.", input_file.c_str()); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - file_length > kMaxFileSizeLimit, ErrorManager::GetInstance().ATCReportErrMessage( - "E19016", {"filepath", "filesize", "maxlen"}, - {input_file, std::to_string(file_length), std::to_string(kMaxFileSizeLimit)}); - return -1, "File[%s] size %lld is out of limit: %d.", input_file.c_str(), file_length, kMaxFileSizeLimit); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > kMaxFileSizeLimit, + ErrorManager::GetInstance().ATCReportErrMessage( + "E19016", {"filepath", "filesize", "maxlen"}, + {input_file, std::to_string(file_length), std::to_string(kMaxFileSizeLimit)}); + return -1, "File[%s] size %lld is out of limit: %d.", input_file.c_str(), file_length, + kMaxFileSizeLimit); return static_cast(file_length); } @@ -186,7 +186,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadBytesFromBinaryFile(co std::streamsize size = file.tellg(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((size <= 0), file.close(); return false, "file length <= 0, not valid."); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size > kMaxFileSizeLimit, file.close(); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size > static_cast(kMaxFileSizeLimit), file.close(); return false, "file size %ld is out of limit: %d.", size, kMaxFileSizeLimit); file.seekg(0, std::ios::beg); // [no need to check value] @@ -263,7 +263,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromText(const ch std::string real_path = RealPath(file); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), ErrorManager::GetInstance().ATCReportErrMessage( - "E19000", {"path", "errmsg"}, {file, strerror(errno)}); + "E19000", {"path", "errmsg"}, {file, strerror(errno)}); return false, "Path[%s]'s realpath is empty, errmsg[%s]", file, strerror(errno)); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path) == -1, return false, "file size not valid."); @@ -299,12 +299,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromMem(const cha google::protobuf::io::IstreamInputStream input(&fs); bool ret = google::protobuf::TextFormat::Parse(&input, message); GE_IF_BOOL_EXEC( - !ret, GELOGE(ret, "Call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file.")); + !ret, GELOGE(ret, "Call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file.")); return ret; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestap() { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp() { struct timeval tv {}; int ret = gettimeofday(&tv, nullptr); GE_LOGE_IF(ret != 0, "Func gettimeofday may failed: ret=%d", ret); @@ -348,9 +348,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInt64MulOverflow(int6 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char *path) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path == nullptr, return "", "path pointer is NULL."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - strlen(path) >= PATH_MAX, - ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)}); - return "", "Path[%s] len is too long, it must be less than %d", path, PATH_MAX); + strlen(path) >= PATH_MAX, + ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)}); + return "", "Path[%s] len is too long, it must be less than %d", path, PATH_MAX); // Nullptr is returned when the path does not exist or there is no permission // Return absolute path when path is accessible @@ -386,10 +386,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - !ValidateStr(real_path, mode), - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, real_path, kPathValidReason}); - return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); + !ValidateStr(real_path, mode), + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {atc_param, real_path, kPathValidReason}); + return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); // The absolute path points to a file that is not readable if (access(real_path.c_str(), R_OK) != 0) { @@ -411,9 +411,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const } GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - strlen(file_path.c_str()) >= PATH_MAX, ErrorManager::GetInstance().ATCReportErrMessage( - "E19002", {"filepath", "size"}, {file_path, std::to_string(PATH_MAX)}); - return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), PATH_MAX); + strlen(file_path.c_str()) >= PATH_MAX, ErrorManager::GetInstance().ATCReportErrMessage( + "E19002", {"filepath", "size"}, {file_path, std::to_string(PATH_MAX)}); + return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), PATH_MAX); // A regular matching expression to verify the validity of the input file path // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores @@ -421,10 +421,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - !ValidateStr(file_path, mode), - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, file_path, kPathValidReason}); - return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), file_path.c_str(), kPathValidReason); + !ValidateStr(file_path, mode), + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {atc_param, file_path, kPathValidReason}); + return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), file_path.c_str(), kPathValidReason); std::string real_path = RealPath(file_path.c_str()); // Can get absolute path (file exists) @@ -512,9 +512,9 @@ FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) { stat.st_mode); return false; } - if (stat.st_size > MAX_CONFIG_FILE_BYTE) { + if (stat.st_size > kMaxConfigFileByte) { GELOGE(PARAM_INVALID, "config file %s size[%ld] is larger than max config file Bytes[%u]", - resolved_file_path.c_str(), stat.st_size, MAX_CONFIG_FILE_BYTE); + resolved_file_path.c_str(), stat.st_size, kMaxConfigFileByte); return false; } return true; diff --git a/ge/engine_manager/dnnengine_manager.cc b/ge/engine_manager/dnnengine_manager.cc index 3389e1b9..f8d58208 100644 --- a/ge/engine_manager/dnnengine_manager.cc +++ b/ge/engine_manager/dnnengine_manager.cc @@ -216,19 +216,19 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { if (kernel_info_store != kernel_map.end()) { std::string unsupported_reason; // It will be replaced by engine' checksupport - uint64_t start_time = GetCurrentTimestap(); + uint64_t start_time = GetCurrentTimestamp(); if (kernel_info_store->second->CheckSupported(op_desc, unsupported_reason)) { - checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time; + checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time; op_desc->SetOpEngineName(it.engine); op_desc->SetOpKernelLibName(kernel_name); // set attrs for taking information when load txt to graph object - (void)AttrUtils::SetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, it.engine); - (void)AttrUtils::SetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name); + (void) AttrUtils::SetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, it.engine); + (void) AttrUtils::SetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name); GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s to op_desc %s", kernel_name.c_str(), it.engine.c_str(), op_desc->GetName().c_str()); return it.engine; } else { - checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time; + checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time; bool is_custom_op = false; if ((ge::AttrUtils::GetBool(op_desc, kCustomOpFlag, is_custom_op)) && is_custom_op) { ErrorManager::GetInstance().ATCReportErrMessage("E13001", {"kernelname", "optype", "opname"}, @@ -237,9 +237,8 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { "The custom operator registered by the user does not support the logic function delivered by this " "network. Check support failed, kernel_name is %s, op type is %s, op name is %s", kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); - std::string error_info = - "The custom operator registered by the user does not support the logic function" - "delivered by this network"; + std::string error_info = "The custom operator registered by the user does not support the logic function" + "delivered by this network"; return ""; } unsupported_reasons.emplace(kernel_name, unsupported_reason); @@ -251,9 +250,9 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { } } else { GELOGW( - "DNNEngineManager:Can not find any supported ops kernel info store by kernel_name %s," - "op type is %s, op name is %s", - kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); + "DNNEngineManager:Can not find any supported ops kernel info store by kernel_name %s," + "op type is %s, op name is %s", + kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); } } @@ -261,19 +260,19 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { string reason; for (const auto &it : unsupported_reasons) { reason += it.first + ":" + it.second + ";"; - ErrorManager::GetInstance().ATCReportErrMessage("E13002", {"optype", "opskernel", "reason"}, - {op_desc->GetType(), it.first, it.second}); + ErrorManager::GetInstance().ATCReportErrMessage( + "E13002", {"optype", "opskernel", "reason"}, {op_desc->GetType(), it.first, it.second}); GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "GetDNNEngineName:Op type %s of ops kernel %s is unsupported, reason:%s", op_desc->GetType().c_str(), it.first.c_str(), it.second.c_str()); } - analyzer::DataInfo analyze_info{root_graph->GetSessionID(), root_graph->GetGraphID(), analyzer::CHECKSUPPORT, - node_ptr, reason}; + analyzer::DataInfo analyze_info{root_graph->GetSessionID(), root_graph->GetGraphID(), + analyzer::CHECKSUPPORT, node_ptr, reason}; // do not change original process (void)Analyzer::GetInstance()->DoAnalyze(analyze_info); - ErrorManager::GetInstance().ATCReportErrMessage("E13003", {"opname", "optype"}, - {op_desc->GetName(), op_desc->GetType()}); + ErrorManager::GetInstance().ATCReportErrMessage( + "E13003", {"opname", "optype"}, {op_desc->GetName(), op_desc->GetType()}); GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "Can't find any supported ops kernel and engine of %s, type is %s", op_desc->GetName().c_str(), op_desc->GetType().c_str()); return ""; @@ -285,13 +284,13 @@ std::string DNNEngineManager::GetHostCpuEngineName(const std::vector &op if ((it.engine == kHostCpuEngineName) && (it.opKernelLib == kHostCpuOpKernelLibName)) { op_desc->SetOpEngineName(kHostCpuEngineName); op_desc->SetOpKernelLibName(kHostCpuOpKernelLibName); - GELOGI("DNNEngineManager: Set OpKernelLibName %s and OpEngineName %s to %s", kHostCpuOpKernelLibName, - kHostCpuEngineName, op_desc->GetName().c_str()); + GELOGI("DNNEngineManager: Set OpKernelLibName %s and OpEngineName %s to %s", + kHostCpuOpKernelLibName, kHostCpuEngineName, op_desc->GetName().c_str()); return kHostCpuEngineName; } } - GELOGE(FAILED, "DNNEngineManager: HostCpuEngine not support [%s, %s].", op_desc->GetName().c_str(), - op_desc->GetType().c_str()); + GELOGE(FAILED, "DNNEngineManager: HostCpuEngine not support [%s, %s].", + op_desc->GetName().c_str(), op_desc->GetType().c_str()); return ""; } diff --git a/ge/engine_manager/dnnengine_manager.h b/ge/engine_manager/dnnengine_manager.h old mode 100644 new mode 100755 diff --git a/ge/engine_manager/engine_conf.json b/ge/engine_manager/engine_conf.json index 82360562..ad43c9ab 100755 --- a/ge/engine_manager/engine_conf.json +++ b/ge/engine_manager/engine_conf.json @@ -41,6 +41,13 @@ "skip_assign_stream": false, "attach": true }, + { + "id": "DNN_VM_AICPU_ASCEND", + "name": "AICPU_ASCEND", + "independent": false, + "skip_assign_stream": false, + "attach": true + }, { "id": "DNN_HCCL", "name": "HCCL", diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 7358585a..d818497c 100755 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -1,125 +1,111 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ +set(PROTO_LIST + "${METADEF_DIR}/proto/om.proto" + "${METADEF_DIR}/proto/ge_ir.proto" + "${METADEF_DIR}/proto/insert_op.proto" + "${METADEF_DIR}/proto/task.proto" + "${METADEF_DIR}/proto/op_mapping_info.proto" + "${METADEF_DIR}/proto/dump_task.proto" +) -# libge_executor.so -# add all proto files, generate corresponding .h and .cc files -# add src files -file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "../../proto/task.proto" - "../../proto/om.proto" - "../../proto/insert_op.proto" - "../../proto/op_mapping_info.proto" - "../../proto/ge_ir.proto" - "../../proto/dump_task.proto" - ) +protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) -file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "ge_executor.cc" - "../common/dump/dump_properties.cc" - "../common/dump/dump_manager.cc" - "../common/dump/dump_op.cc" - "../common/ge/op_tiling_manager.cc" - "../common/ge/plugin_manager.cc" - "../common/profiling/profiling_manager.cc" - "../graph/execute/graph_execute.cc" - "../graph/load/graph_loader.cc" - "../graph/load/new_model_manager/aipp_utils.cc" - "../graph/load/new_model_manager/cpu_queue_schedule.cc" - "../graph/load/new_model_manager/data_dumper.cc" - "../graph/load/new_model_manager/data_inputer.cc" - "../graph/load/new_model_manager/davinci_model.cc" - "../graph/load/new_model_manager/davinci_model_parser.cc" - "../graph/load/new_model_manager/model_manager.cc" - "../graph/load/new_model_manager/model_utils.cc" - "../graph/load/new_model_manager/task_info/end_graph_task_info.cc" - "../graph/load/new_model_manager/task_info/event_record_task_info.cc" - "../graph/load/new_model_manager/task_info/event_wait_task_info.cc" - "../graph/load/new_model_manager/task_info/fusion_start_task_info.cc" - "../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" - "../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" - "../graph/load/new_model_manager/task_info/kernel_task_info.cc" - "../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" - "../graph/load/new_model_manager/task_info/label_set_task_info.cc" - "../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" - "../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" - "../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" - "../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_active_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_switch_task_info.cc" - "../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" - "../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" - "../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" - "../graph/load/new_model_manager/task_info/task_info.cc" - "../graph/load/new_model_manager/tbe_handle_store.cc" - "../graph/load/new_model_manager/zero_copy_offset.cc" - "../graph/load/new_model_manager/zero_copy_task.cc" - "../graph/manager/graph_caching_allocator.cc" - "../graph/manager/graph_manager_utils.cc" - "../graph/manager/graph_mem_allocator.cc" - "../graph/manager/graph_var_manager.cc" - "../graph/manager/rdma_pool_allocator.cc" - "../graph/manager/trans_var_data_utils.cc" - "../graph/manager/util/debug.cc" - "../hybrid/hybrid_davinci_model_stub.cc" - "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" - "../model/ge_model.cc" - "../model/ge_root_model.cc" - "../omm/csa_interact.cc" - "../single_op/single_op.cc" - "../single_op/single_op_manager.cc" - "../single_op/single_op_model.cc" - "../single_op/stream_resource.cc" - "../single_op/task/aicpu_task_builder.cc" - "../single_op/task/build_task_utils.cc" - "../single_op/task/op_task.cc" - "../single_op/task/tbe_task_builder.cc" - ) +set(SRC_LIST + "ge_executor.cc" + "../common/profiling/profiling_manager.cc" + "../common/ge/plugin_manager.cc" + "../common/ge/op_tiling_manager.cc" + "../common/dump/dump_properties.cc" + "../common/dump/dump_manager.cc" + "../common/dump/dump_op.cc" + "../graph/load/graph_loader.cc" + "../graph/execute/graph_execute.cc" + "../omm/csa_interact.cc" + "../graph/manager/graph_manager_utils.cc" + "../graph/manager/graph_var_manager.cc" + "../graph/manager/graph_mem_allocator.cc" + "../graph/manager/graph_caching_allocator.cc" + "../graph/manager/trans_var_data_utils.cc" + "../graph/manager/util/debug.cc" + "../graph/manager/rdma_pool_allocator.cc" + "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" + "../model/ge_model.cc" + "../model/ge_root_model.cc" + "../graph/load/new_model_manager/davinci_model.cc" + "../graph/load/new_model_manager/davinci_model_parser.cc" + "../graph/load/new_model_manager/model_manager.cc" + "../graph/load/new_model_manager/tbe_handle_store.cc" + "../graph/load/new_model_manager/cpu_queue_schedule.cc" + "../graph/load/new_model_manager/model_utils.cc" + "../graph/load/new_model_manager/aipp_utils.cc" + "../graph/load/new_model_manager/data_inputer.cc" + "../graph/load/new_model_manager/data_dumper.cc" + "../graph/load/new_model_manager/zero_copy_task.cc" + "../graph/load/new_model_manager/zero_copy_offset.cc" + "../graph/load/new_model_manager/task_info/task_info.cc" + "../graph/load/new_model_manager/task_info/event_record_task_info.cc" + "../graph/load/new_model_manager/task_info/event_wait_task_info.cc" + "../graph/load/new_model_manager/task_info/fusion_start_task_info.cc" + "../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" + "../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" + "../graph/load/new_model_manager/task_info/kernel_task_info.cc" + "../graph/load/new_model_manager/task_info/label_set_task_info.cc" + "../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" + "../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" + "../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" + "../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" + "../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" + "../graph/load/new_model_manager/task_info/stream_active_task_info.cc" + "../graph/load/new_model_manager/task_info/stream_switch_task_info.cc" + "../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" + "../graph/load/new_model_manager/task_info/end_graph_task_info.cc" + "../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" + "../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" + "../single_op/single_op_manager.cc" + "../single_op/single_op_model.cc" + "../single_op/single_op.cc" + "../single_op/stream_resource.cc" + "../single_op/task/op_task.cc" + "../single_op/task/build_task_utils.cc" + "../single_op/task/tbe_task_builder.cc" + "../single_op/task/aicpu_task_builder.cc" + "../single_op/task/aicpu_kernel_task_builder.cc" + "../hybrid/hybrid_davinci_model_stub.cc" +) -ge_protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) +######## libge_executor.a ######## +add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_HDRS}) -# include directories -include_directories(${CMAKE_CURRENT_LIST_DIR}) -include_directories(${GE_SOURCE_DIR}/src/ge) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/inc/external/graph) -include_directories(${GE_SOURCE_DIR}/inc/framework) -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/graph) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/proto/ge) +target_compile_options(ge_executor PRIVATE + -Werror + -O2 +) -######## libge_executor.so ######## -add_library(ge_executor SHARED ${SRC_LIST} ${PROTO_HDRS}) target_compile_definitions(ge_executor PRIVATE - Werror - PROTOBUF_INLINE_NOT_IN_HEADERS=0 - DAVINCI_SUPPORT_PROFILING - FMK_HOST_INFER) -target_link_libraries(ge_executor - ge_common - graph - ${PROTOBUF_LIBRARY} - ${register} - ${c_sec} - ${runtime} - ${slog} - ${mmpa} - ${msprof} - ${error_manager} - rt - dl) + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + DAVINCI_SUPPORT_PROFILING +) +target_include_directories(ge_executor PRIVATE + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/cce +) + +target_link_libraries(ge_executor PRIVATE + $ + json + protobuf + c_sec + -lrt + -ldl +) diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc old mode 100644 new mode 100755 index bf1e250b..6c22c38a --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -183,7 +183,8 @@ bool IsDynamicImageSizeMatchModel(uint64_t image_height, uint64_t image_width, return false; } -bool IsDynmaicDimsSizeMatchModel(const vector cur_dynamic_dims, const vector> &batch_info) { +bool IsDynmaicDimsSizeMatchModel(const vector cur_dynamic_dims, + const vector> &batch_info) { if (batch_info.empty()) { GELOGE(ge::FAILED, "Dynamic batch info is empty."); return false; @@ -192,8 +193,8 @@ bool IsDynmaicDimsSizeMatchModel(const vector cur_dynamic_dims, const bool find_match = false; for (auto resolution : batch_info) { if (cur_dynamic_dims.size() != resolution.size()) { - GELOGE(ge::FAILED, "Cur dynamic dims param num is %zu, current resolution size is %zu.", cur_dynamic_dims.size(), - resolution.size()); + GELOGE(ge::FAILED, "Cur dynamic dims param num is %zu, current resolution size is %zu.", + cur_dynamic_dims.size(), resolution.size()); return false; } bool flag = true; @@ -282,11 +283,14 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad return PARAM_INVALID; } - uint64_t size = sizeof(uint64_t); + uint64_t size = sizeof(uint32_t); if (length < size) { GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, size); return PARAM_INVALID; } + if (length >= sizeof(uint64_t)) { + size = sizeof(uint64_t); + } // Verify whether the input dynamic batch matches the model gear std::vector> batch_info; @@ -324,12 +328,15 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad return PARAM_INVALID; } - uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint64_t); + uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint32_t); if (length < dynamic_input_size) { GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); return PARAM_INVALID; } - + uint64_t size = sizeof(uint32_t); + if (length >= kDynamicImageSizeInputSize * sizeof(uint64_t)) { + size = sizeof(uint64_t); + } // Verify whether the input dynamic resolution matches the model gear std::vector> batch_info; std::vector batch_num{image_height, image_width}; @@ -350,18 +357,19 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad GELOGE(ret, "Set dynamic size failed"); return ret; } - // Memcpy dynamic resolution height from host to device + + // Memcpy dynamic resolution height from host to device rtError_t rt_ret = - rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); + rtMemcpy(dynamic_input_addr, size, &image_height, size, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "memcpy dynamic resolution input data failed! ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } - uint64_t remain_size = length - sizeof(uint64_t); + uint64_t remain_size = length - size; // Memcpy dynamic resolution width from host to device - if (rtMemcpy(reinterpret_cast(reinterpret_cast(dynamic_input_addr) + sizeof(uint64_t)), - remain_size, &image_width, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { + if (rtMemcpy(reinterpret_cast(reinterpret_cast(dynamic_input_addr) + size), + remain_size, &image_width, size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { GELOGE(FAILED, "memcpy dynamic resolution input data failed!"); return FAILED; } @@ -401,16 +409,19 @@ Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, u } size_t dynamic_dim_num = cur_dynamic_dims.size(); - uint64_t dynamic_input_size = static_cast(dynamic_dim_num * sizeof(uint64_t)); + uint64_t dynamic_input_size = static_cast(dynamic_dim_num * sizeof(uint32_t)); if (length < dynamic_input_size) { GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); return FAILED; } + uint64_t size = sizeof(uint32_t); + if (length >= dynamic_dim_num * sizeof(uint64_t)) { + size = sizeof(uint64_t); + } for (uint32_t i = 0; i < dynamic_dim_num; ++i) { // Memcpy dynamic dim[i] from host to device - if (rtMemcpy(reinterpret_cast(reinterpret_cast(dynamic_input_addr) + sizeof(uint64_t) * i), - length - sizeof(uint64_t) * i, &cur_dynamic_dims[i], sizeof(uint64_t), - RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { + if (rtMemcpy(reinterpret_cast(reinterpret_cast(dynamic_input_addr) + size * i), + length - size * i, &cur_dynamic_dims[i], size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { GELOGE(FAILED, "memcpy dynamic resolution input data failed!"); return FAILED; } @@ -445,17 +456,17 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector & } } } - if (dynamic_dims.size() != all_data_dims.size()) { - GELOGE(FAILED, "Dynamic input size [%lu] is not equal with all data dims size [%lu]!", dynamic_dims.size(), - all_data_dims.size()); + if (dynamic_dims.size() != all_data_dims.size()){ + GELOGE(FAILED, "Dynamic input size [%lu] is not equal with all data dims size [%lu]!", + dynamic_dims.size(), all_data_dims.size()); return FAILED; } for (std::size_t i = 0; i < all_data_dims.size(); ++i) { if (all_data_dims[i] < 0) { cur_dynamic_dims.push_back(dynamic_dims[i]); } else if (static_cast(all_data_dims[i]) != dynamic_dims[i]) { - GELOGE(PARAM_INVALID, "Static dims should be same, index: %zu value: %d should be %d", i, dynamic_dims[i], - all_data_dims[i]); + GELOGE(PARAM_INVALID, "Static dims should be same, index: %zu value: %d should be %d", + i, dynamic_dims[i], all_data_dims[i]); return PARAM_INVALID; } } @@ -492,9 +503,9 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add uint64_t real_aippParms_size = sizeof(kAippDynamicPara) - sizeof(kAippDynamicBatchPara); uint64_t struct_len = batch_num * sizeof(kAippDynamicBatchPara) + real_aippParms_size; GELOGI( - "Get acl input dynamic aipp data, model_id is %u, length is %lu," - "batch num is %lu, struct_len is %lu", - model_id, length, batch_num, struct_len); + "Get acl input dynamic aipp data, model_id is %u, length is %lu," + "batch num is %lu, struct_len is %lu", + model_id, length, batch_num, struct_len); if (struct_len > length) { GELOGE(PARAM_INVALID, "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length); return PARAM_INVALID; @@ -745,6 +756,22 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo GELOGI("GetAIPPInfo succ."); return SUCCESS; } + +Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { + GELOGI("Begin to get aipp type."); + if (!isInit_) { + GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + return GE_EXEC_NOT_INIT; + } + Status ret = GraphExecutor::GetAippType(model_id, index, type, aipp_index); + if (ret != SUCCESS) { + GELOGW("Get aipp type is not success."); + return ret; + } + GELOGI("Get aipp type success."); + return SUCCESS; +} + Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info) { GELOGI("Begin to get dynamic batch output shape info"); if (!isInit_) { @@ -1097,7 +1124,7 @@ Status GeExecutor::SetDump(const DumpConfig &dump_config) { GELOGE(ret, "Set dump conf failed"); return ret; } - GELOGI("Set dump config succ."); + GELOGI("Set dump config successfully"); return SUCCESS; } } // namespace ge diff --git a/ge/executor/module.mk b/ge/executor/module.mk old mode 100644 new mode 100755 diff --git a/ge/executor/proto/dump_task.proto b/ge/executor/proto/dump_task.proto new file mode 120000 index 00000000..2ee8c3dc --- /dev/null +++ b/ge/executor/proto/dump_task.proto @@ -0,0 +1 @@ +../../../../inc/common/proto/dump_task.proto \ No newline at end of file diff --git a/ge/executor/proto/ge_ir.proto b/ge/executor/proto/ge_ir.proto new file mode 120000 index 00000000..f60a0f89 --- /dev/null +++ b/ge/executor/proto/ge_ir.proto @@ -0,0 +1 @@ +../../../../inc/common/proto/ge_ir.proto \ No newline at end of file diff --git a/ge/executor/proto/insert_op.proto b/ge/executor/proto/insert_op.proto new file mode 120000 index 00000000..27b233e5 --- /dev/null +++ b/ge/executor/proto/insert_op.proto @@ -0,0 +1 @@ +../../../../inc/common/proto/insert_op.proto \ No newline at end of file diff --git a/ge/executor/proto/om.proto b/ge/executor/proto/om.proto new file mode 120000 index 00000000..91c581bb --- /dev/null +++ b/ge/executor/proto/om.proto @@ -0,0 +1 @@ +../../../../inc/common/proto/om.proto \ No newline at end of file diff --git a/ge/executor/proto/op_mapping_info.proto b/ge/executor/proto/op_mapping_info.proto new file mode 120000 index 00000000..9e26bcda --- /dev/null +++ b/ge/executor/proto/op_mapping_info.proto @@ -0,0 +1 @@ +../../../../inc/common/proto/op_mapping_info.proto \ No newline at end of file diff --git a/ge/executor/proto/task.proto b/ge/executor/proto/task.proto new file mode 120000 index 00000000..36ae4847 --- /dev/null +++ b/ge/executor/proto/task.proto @@ -0,0 +1 @@ +../../proto/task.proto \ No newline at end of file diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk old mode 100644 new mode 100755 index f83e590a..36bcb603 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -29,6 +29,7 @@ COMMON_LOCAL_SRC_FILES := \ common/dump/dump_properties.cc \ common/dump/dump_manager.cc \ common/dump/dump_op.cc \ + common/dump/dump_server.cc \ common/helper/model_cache_helper.cc \ ge_local_engine/engine/host_cpu_engine.cc \ @@ -59,6 +60,7 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \ generator/ge_generator.cc \ generator/generator_api.cc \ graph/manager/graph_var_manager.cc \ + graph/manager/host_mem_manager.cc \ graph/manager/rdma_pool_allocator.cc \ graph/manager/graph_mem_allocator.cc \ graph/manager/graph_caching_allocator.cc \ @@ -177,6 +179,7 @@ OMG_HOST_SRC_FILES := \ graph/passes/multi_batch_pass.cc \ graph/passes/multi_batch_clone_pass.cc \ graph/passes/subexpression_migration_pass.cc \ + graph/passes/subgraph_const_migration_pass.cc \ graph/passes/unused_args_clean_pass.cc \ graph/passes/next_iteration_pass.cc \ graph/passes/control_trigger_pass.cc \ @@ -371,7 +374,6 @@ LOCAL_SRC_FILES += $(BUILER_SRC_FILES) LOCAL_SRC_FILES += $(ANALYZER_SRC_FILES) LOCAL_STATIC_LIBRARIES := libge_memory \ - libadump_server_stub \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ @@ -436,7 +438,6 @@ LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) LOCAL_C_INCLUDES += $(ANALYZER_LOCAL_INCLUDES) LOCAL_STATIC_LIBRARIES := libge_memory \ - libadump_server_stub \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt index e43dcbaa..7223c697 100755 --- a/ge/ge_local_engine/CMakeLists.txt +++ b/ge/ge_local_engine/CMakeLists.txt @@ -1,53 +1,215 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -# libge_local_engine.so -# add all proto files, generate corresponding .h and .cc files -file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "${GE_SOURCE_DIR}/metadef/proto/task.proto" - ) - -file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "engine/ge_local_engine.cc" - "ops_kernel_store/*.cc" - "ops_kernel_store/op/*.cc" - ) - -ge_protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) - -# include directories -include_directories(${CMAKE_CURRENT_LIST_DIR}) -include_directories(${GE_SOURCE_DIR}/ge) -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/inc/framework) -include_directories(${GE_SOURCE_DIR}/metadef/inc) -include_directories(${GE_SOURCE_DIR}/metadef/inc/graph) -include_directories(${GE_SOURCE_DIR}/metadef/inc/external) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/proto/ge) - -######### libge_local_engine.so ############# +set(PROTO_LIST + "${METADEF_DIR}/proto/task.proto" +) + +set(SRC_LIST + "engine/ge_local_engine.cc" + "ops_kernel_store/ge_local_ops_kernel_info.cc" + "ops_kernel_store/op/op_factory.cc" + "ops_kernel_store/op/op.cc" + "ops_kernel_store/op/ge_deleted_op.cc" + "ops_kernel_store/op/no_op.cc" +) + +set(OPS_KERNEL_SRC_LIST + "ops_kernel_store/ge_local_ops_kernel_builder.cc" + "ops_kernel_store/op/op_factory.cc" + "ops_kernel_store/op/op.cc" + "ops_kernel_store/op/ge_deleted_op.cc" + "ops_kernel_store/op/no_op.cc" +) + +protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) + +############ libge_local_engine.so ############ add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) -target_compile_definitions(ge_local_engine PRIVATE Werror) -target_link_libraries(ge_local_engine - graph - ${PROTOBUF_LIBRARY} - ${register} - ${c_sec} - ${slog} - ${runtime}) + +target_compile_options(ge_local_engine PRIVATE + -Werror +) + +target_include_directories(ge_local_engine PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) + +target_link_libraries(ge_local_engine PRIVATE + $ + -Wl,--no-as-needed + graph + protobuf + register + c_sec + slog + runtime + -Wl,--as-needed +) + +######### atclib/libge_local_engine.so ############# +add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(atc_ge_local_engine PRIVATE + -Werror +) + +target_compile_definitions(atc_ge_local_engine PRIVATE + COMPILE_OMG_PACKAGE +) + +target_include_directories(atc_ge_local_engine PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) + +target_link_libraries(atc_ge_local_engine PRIVATE + $ + -Wl,--no-as-needed + graph + protobuf + register + c_sec + slog + runtime_compile + -Wl,--as-needed +) + +set_target_properties(atc_ge_local_engine PROPERTIES + OUTPUT_NAME ge_local_engine + LIBRARY_OUTPUT_DIRECTORY atclib +) + +############ libge_local_opskernel_builder.so ############ +add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(ge_local_opskernel_builder PRIVATE + -Werror +) + +target_include_directories(ge_local_opskernel_builder PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) + +target_link_libraries(ge_local_opskernel_builder PRIVATE + $ + -Wl,--no-as-needed + protobuf + c_sec + slog + register + graph + -Wl,--as-needed +) + +############ atclib/libge_local_opskernel_builder.so ############ +add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(atc_ge_local_opskernel_builder PRIVATE + -Werror +) + +target_include_directories(atc_ge_local_opskernel_builder PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) + +target_link_libraries(atc_ge_local_opskernel_builder PRIVATE + $ + -Wl,--no-as-needed + protobuf + c_sec + slog + register + graph + -Wl,--as-needed +) + +set_target_properties(atc_ge_local_opskernel_builder PROPERTIES + OUTPUT_NAME ge_local_opskernel_builder + LIBRARY_OUTPUT_DIRECTORY atclib +) + +############ libge_local_opskernel_builder.a ############ +add_library(ge_local_opskernel_builder_static SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(ge_local_opskernel_builder_static PRIVATE + -Werror +) + +target_include_directories(ge_local_opskernel_builder_static PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) + +target_link_libraries(ge_local_opskernel_builder_static PRIVATE + $ + protobuf + c_sec +) + +############ install ############ +set(INSTALL_BASE_DIR "") +set(INSTALL_LIBRARY_DIR lib) + +install(TARGETS ge_local_engine ge_local_opskernel_builder OPTIONAL + LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} +) + +install(TARGETS atc_ge_local_engine atc_ge_local_opskernel_builder OPTIONAL + LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}/atclib +) diff --git a/ge/ge_local_engine/common/constant/constant.h b/ge/ge_local_engine/common/constant/constant.h index c517d267..42084f2a 100644 --- a/ge/ge_local_engine/common/constant/constant.h +++ b/ge/ge_local_engine/common/constant/constant.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/engine/ge_local_engine.cc b/ge/ge_local_engine/engine/ge_local_engine.cc old mode 100644 new mode 100755 index 9525e81b..58f24d45 --- a/ge/ge_local_engine/engine/ge_local_engine.cc +++ b/ge/ge_local_engine/engine/ge_local_engine.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/engine/ge_local_engine.h b/ge/ge_local_engine/engine/ge_local_engine.h index e5f9a24d..65dfe65b 100644 --- a/ge/ge_local_engine/engine/ge_local_engine.h +++ b/ge/ge_local_engine/engine/ge_local_engine.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc old mode 100644 new mode 100755 index fc46385b..bee9db76 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "host_cpu_engine.h" #include #include "graph/common/omg_util.h" @@ -28,70 +27,69 @@ #include "common/math/math_util.h" namespace { -#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ - case (DTYPE): { \ - GeTensorPtr ge_tensor = nullptr; \ - if (need_create_flag) { \ - int64_t num_size = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize(); \ - if (out_desc.GetShape().IsUnknownShape()) { \ - std::vector> range; \ - if (out_desc.GetShapeRange(range) != GRAPH_SUCCESS) { \ - GELOGE(INTERNAL_ERROR, "Get shape range failed, node:%s", op_desc->GetName().c_str()); \ - return INTERNAL_ERROR; \ - } \ - int64_t max_range_size = 1; \ - for (const auto &item : range) { \ - FMK_INT64_MULCHECK(max_range_size, item.second); \ - max_range_size *= item.second; \ - } \ - num_size = max_range_size; \ - } \ - if (num_size < 0) { \ - GELOGE(INTERNAL_ERROR, "node:%s, get size for output %zu failed, num=%lld", op_desc->GetName().c_str(), i, \ - num_size); \ - return INTERNAL_ERROR; \ - } \ - auto data_num = static_cast(num_size); \ - GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ - std::unique_ptr buf(new (std::nothrow) TYPE[data_num]()); \ - if (buf == nullptr) { \ - GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \ - static_cast(sizeof(TYPE) * data_num)); \ - return MEMALLOC_FAILED; \ - } \ - ge_tensor = MakeShared(out_desc); \ - GE_CHECK_NOTNULL(ge_tensor); \ - GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, \ - data_num * sizeof(TYPE)); \ - if (ge_tensor->SetData(reinterpret_cast(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ - GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ - return MEMALLOC_FAILED; \ - } \ - ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ - ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ - outputs.emplace_back(ge_tensor); \ - } else { \ - ge_tensor = outputs[i]; \ - GE_CHECK_NOTNULL(ge_tensor); \ - GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \ - reinterpret_cast(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \ - } \ - auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ - auto tensor_name = op_desc->GetOutputNameByIndex(i); \ - GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ - op_desc->GetName().c_str(), i); \ - GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \ - op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \ - named_outputs.emplace(tensor_name, tensor); \ - break; \ +#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ + case (DTYPE): { \ + GeTensorPtr ge_tensor = nullptr; \ + if (need_create_flag) { \ + int64_t num_size = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize(); \ + if (out_desc.GetShape().IsUnknownShape()) { \ + std::vector> range; \ + if (out_desc.GetShapeRange(range) != GRAPH_SUCCESS) { \ + GELOGE(INTERNAL_ERROR, "Get shape range failed, node:%s", op_desc->GetName().c_str()); \ + return INTERNAL_ERROR; \ + } \ + int64_t max_range_size = 1; \ + for (const auto &item : range) { \ + FMK_INT64_MULCHECK(max_range_size, item.second); \ + max_range_size *= item.second; \ + } \ + num_size = max_range_size; \ + } \ + if (num_size < 0) { \ + GELOGE(INTERNAL_ERROR, "node:%s, get size for output %zu failed, num=%lld", \ + op_desc->GetName().c_str(), i, num_size); \ + return INTERNAL_ERROR; \ + } \ + auto data_num = static_cast(num_size); \ + GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ + std::unique_ptr buf(new (std::nothrow) TYPE[data_num]()); \ + if (buf == nullptr) { \ + GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \ + static_cast(sizeof(TYPE) * data_num)); \ + return MEMALLOC_FAILED; \ + } \ + ge_tensor = MakeShared(out_desc); \ + GE_CHECK_NOTNULL(ge_tensor); \ + GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\ + if (ge_tensor->SetData(reinterpret_cast(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ + GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ + return MEMALLOC_FAILED; \ + } \ + ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ + ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ + outputs.emplace_back(ge_tensor); \ + } else { \ + ge_tensor = outputs[i]; \ + GE_CHECK_NOTNULL(ge_tensor); \ + GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \ + reinterpret_cast(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \ + } \ + auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ + auto tensor_name = op_desc->GetOutputNameByIndex(i); \ + GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ + op_desc->GetName().c_str(), i); \ + GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \ + op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \ + named_outputs.emplace(tensor_name, tensor); \ + break; \ } -} // namespace +} namespace ge { namespace { const char *kEnvKeyOppPath = "ASCEND_OPP_PATH"; const char *kHostCpuLibRelativePath = "/op_impl/built-in/host_cpu"; -} // namespace +} void HostCpuEngine::CloseSo() { for (auto handle : lib_handles_) { @@ -105,22 +103,24 @@ void HostCpuEngine::CloseSo() { ge::Status HostCpuEngine::Initialize() { std::lock_guard lock(mu_); if (initialized_) { - GELOGI("HostCpuEngine is already initialized"); - return SUCCESS; + GELOGI("HostCpuEngine is already initialized"); + return SUCCESS; } std::string lib_dir; GE_CHK_STATUS_RET_NOLOG(GetLibPath(lib_dir)); std::vector so_paths; if (ListSoFiles(lib_dir, so_paths) == SUCCESS) { - (void)LoadLibs(so_paths); + (void) LoadLibs(so_paths); } initialized_ = true; return SUCCESS; } -void HostCpuEngine::Finalize() { GELOGI("start HostCpuEngine::Finalize"); } +void HostCpuEngine::Finalize() { + GELOGI("start HostCpuEngine::Finalize"); +} bool HostCpuEngine::CheckSupported(const string &op_type) { return OpKernelRegistry::GetInstance().IsRegistered(op_type); @@ -142,11 +142,14 @@ Status HostCpuEngine::FindOpKernel(const ge::NodePtr &node, std::unique_ptr &inputs, +Status HostCpuEngine::PrepareInputs(const ge::ConstOpDescPtr &op_desc, + const vector &inputs, map &named_inputs) { auto num_inputs = op_desc->GetInputsSize(); if (num_inputs != inputs.size()) { - GELOGE(PARAM_INVALID, "Mismatching input sizes. op_desc has %zu input(s), but given %zu", num_inputs, + GELOGE(PARAM_INVALID, + "Mismatching input sizes. op_desc has %zu input(s), but given %zu", + num_inputs, inputs.size()); return PARAM_INVALID; } @@ -156,21 +159,22 @@ Status HostCpuEngine::PrepareInputs(const ge::ConstOpDescPtr &op_desc, const vec GE_CHECK_NOTNULL(ge_tensor); auto tensor = TensorAdapter::AsTensor(*ge_tensor); auto tensor_name = op_desc->GetInputNameByIndex(i); - GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get input name. node = %s, index = %zu", - op_desc->GetName().c_str(), i); - GELOGD("Successfully inserted input tensor. node = %s, index = %zu, input name = %s", op_desc->GetName().c_str(), i, - tensor_name.c_str()); + GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), + "Failed to get input name. node = %s, index = %zu", op_desc->GetName().c_str(), i); + GELOGD("Successfully inserted input tensor. node = %s, index = %zu, input name = %s", + op_desc->GetName().c_str(), i, tensor_name.c_str()); named_inputs.emplace(tensor_name, tensor); } return SUCCESS; } -Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, vector &outputs, +Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, + vector &outputs, map &named_outputs) { if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { - GELOGW("size of ouputs not match, size of outputs = %zu, exactly output_num=%zu.", outputs.size(), - op_desc->GetOutputsSize()); + GELOGW("size of ouputs not match, size of outputs = %zu, exactly output_num=%zu.", + outputs.size(), op_desc->GetOutputsSize()); outputs.clear(); } bool need_create_flag = (outputs.size() != op_desc->GetOutputsSize()); @@ -199,7 +203,8 @@ Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, vector &named_inputs, map &named_outputs) { GELOGD("Run operation on host cpu, op name: %s", op_desc->GetName().c_str()); @@ -259,7 +264,9 @@ ge::Status HostCpuEngine::GetLibPath(std::string &lib_path) { return SUCCESS; } -static int RegularFileFilterFn(const mmDirent *entry) { return entry->d_type == DT_REG; } +static int RegularFileFilterFn(const mmDirent *entry) { + return entry->d_type == DT_REG; +} Status HostCpuEngine::ListSoFiles(const std::string &base_dir, std::vector &names) { std::string real_path = base_dir; @@ -312,7 +319,7 @@ Status HostCpuEngine::LoadLib(const std::string &lib_path) { return INTERNAL_ERROR; } - auto initialize = (Status(*)(const HostCpuContext &))dlsym(handle, "Initialize"); + auto initialize = (Status (*)(const HostCpuContext &))dlsym(handle, "Initialize"); if (initialize != nullptr) { GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str()); if (initialize(HostCpuContext()) != SUCCESS) { @@ -335,4 +342,4 @@ Status HostCpuEngine::GetRealPath(std::string &path) { path = real_path; return SUCCESS; } -} // namespace ge +} // namespace ge diff --git a/ge/ge_local_engine/engine/host_cpu_engine.h b/ge/ge_local_engine/engine/host_cpu_engine.h index 1987138d..cc6b578c 100644 --- a/ge/ge_local_engine/engine/host_cpu_engine.h +++ b/ge/ge_local_engine/engine/host_cpu_engine.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ #define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ @@ -21,7 +20,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "graph/node.h" #include "graph/operator.h" -#include "inc/register/register.h" +#include "register/register.h" namespace ge { class HostCpuEngine { diff --git a/ge/ge_local_engine/module.mk b/ge/ge_local_engine/module.mk old mode 100644 new mode 100755 index ee6b15c1..574f08b8 --- a/ge/ge_local_engine/module.mk +++ b/ge/ge_local_engine/module.mk @@ -8,6 +8,12 @@ local_lib_src_files := engine/ge_local_engine.cc \ ops_kernel_store/op/ge_deleted_op.cc \ ops_kernel_store/op/no_op.cc \ +ops_kernel_builder_src_files := ops_kernel_store/ge_local_ops_kernel_builder.cc \ + ops_kernel_store/op/op_factory.cc \ + ops_kernel_store/op/op.cc \ + ops_kernel_store/op/ge_deleted_op.cc \ + ops_kernel_store/op/no_op.cc \ + local_lib_inc_path := proto/task.proto \ ${LOCAL_PATH} \ ${TOPDIR}inc \ @@ -57,3 +63,64 @@ LOCAL_SRC_FILES := $(local_lib_src_files) LOCAL_C_INCLUDES := $(local_lib_inc_path) include ${BUILD_HOST_SHARED_LIBRARY} + +#compiler for libge_local_opskernel_builder.so +include $(CLEAR_VARS) +LOCAL_MODULE := libge_local_opskernel_builder +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +LOCAL_SHARED_LIBRARIES := libprotobuf \ + libc_sec \ + libslog \ + libregister \ + libgraph + +LOCAL_SRC_FILES := $(ops_kernel_builder_src_files) + +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_HOST_SHARED_LIBRARY} + + +#compiler for libge_local_opskernel_builder.so in atc +include $(CLEAR_VARS) +LOCAL_MODULE := atclib/libge_local_opskernel_builder +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +LOCAL_SHARED_LIBRARIES := libprotobuf \ + libc_sec \ + libslog \ + libregister \ + libgraph + +LOCAL_SRC_FILES := $(ops_kernel_builder_src_files) + +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_HOST_SHARED_LIBRARY} + +#compiler for libge_local_opskernel_builder.a +include $(CLEAR_VARS) +LOCAL_MODULE := libge_local_opskernel_builder +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := libprotobuf \ + libregister \ + libgraph \ + +LOCAL_SHARED_LIBRARIES := libc_sec \ + libslog \ + +LOCAL_SRC_FILES := $(ops_kernel_builder_src_files) + +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_HOST_STATIC_LIBRARY} diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc new file mode 100644 index 00000000..9496d0fc --- /dev/null +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc @@ -0,0 +1,181 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ge_local_ops_kernel_builder.h" +#include +#include "common/ge_inner_error_codes.h" +#include "common/ge/ge_util.h" +#include "framework/common/debug/ge_log.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" +#include "ge_local_engine/ops_kernel_store/op/op_factory.h" +#include "ge_local_engine/common/constant/constant.h" +#include "register/ops_kernel_builder_registry.h" + +namespace ge { +namespace ge_local { +REGISTER_OPS_KERNEL_BUILDER(kGeLocalOpKernelLibName, GeLocalOpsKernelBuilder); + +namespace { +const char *const kConstantOpType = "Constant"; +const char *const kConstantOpAttrName = "value"; +const char *const kDataOpType = "Data"; +} // namespace + +GeLocalOpsKernelBuilder::~GeLocalOpsKernelBuilder() { + GELOGI("GeLocalOpsKernelBuilder destroyed"); +} + +Status GeLocalOpsKernelBuilder::Initialize(const map &options) { + return SUCCESS; +} + +Status GeLocalOpsKernelBuilder::Finalize() { + return SUCCESS; +} + +Status GeLocalOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { + GELOGD("[%s] CalcOpRunningParam In.", ge_node.GetName().c_str()); + OpDescPtr op_desc = ge_node.GetOpDesc(); + if (op_desc == nullptr) { + GELOGE(FAILED, "CalcOpRunningParam failed, as op desc is null"); + return FAILED; + } + + bool is_shape_unknown = false; + if (NodeUtils::GetNodeUnknownShapeStatus(ge_node, is_shape_unknown) == GRAPH_SUCCESS) { + if (is_shape_unknown) { + GELOGI("op:%s is unknown shape, does not need to calc output size.", ge_node.GetName().c_str()); + return SUCCESS; + } + } + + const string node_name = ge_node.GetName(); + const string node_type = ge_node.GetType(); + size_t output_size = op_desc->GetOutputsSize(); + GELOGD("Calc op[%s:%s] running param, output size=%zu.", node_name.c_str(), node_type.c_str(), output_size); + + for (size_t i = 0; i < output_size; ++i) { + GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast(i)); + Format format = output_tensor.GetFormat(); + DataType data_type = output_tensor.GetDataType(); + + int64_t mem_size = 0; + graphStatus graph_status = TensorUtils::GetSize(output_tensor, mem_size); + // If mem size has been set, no need reset. + if ((graph_status == GRAPH_SUCCESS) && (mem_size > 0) && (data_type != DT_STRING)) { + GELOGD("Op[%s:%s] out[%zu] mem size has been set, no need calc again, format=%s, data_type=%s, mem_size=%ld.", + node_name.c_str(), node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), mem_size); + continue; + } + + int64_t output_mem_size = 0; + GeShape output_shape = output_tensor.GetShape(); + if ((node_type == kConstantOpType) && (data_type == DT_STRING)) { + graph_status = CalcConstantStrMemSize(op_desc, output_mem_size); + } else if (node_type == kDataOpType) { + int64_t o_size = 0; + graph_status = TensorUtils::GetTensorMemorySizeInBytes(output_tensor, o_size); + output_mem_size = o_size; + } else { + graph_status = TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size); + } + + if (graph_status != GRAPH_SUCCESS) { + GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, format=%s, data_type=%s, error=%u.", node_name.c_str(), + node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), graph_status); + return FAILED; + } + + if (output_mem_size < 0) { + GELOGE(FAILED, + "Calc op[%s:%s] out[%zu] mem size is negative(not support)," + " format=%s, data_type=%s, mem_size=%ld.", + node_name.c_str(), node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), output_mem_size); + return FAILED; + } + GELOGI( + "Calc op[%s:%s] out[%zu] mem size is %ld," + " format=%s, data_type=%s.", + node_name.c_str(), node_type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); + + TensorUtils::SetSize(output_tensor, output_mem_size); + + graph_status = op_desc->UpdateOutputDesc(static_cast(i), output_tensor); + if (graph_status != GRAPH_SUCCESS) { + GELOGE(FAILED, "Update op[%s:%s] out[%zu] desc failed, format=%s, data_type=%s, error=%u.", node_name.c_str(), + node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), graph_status); + return FAILED; + } + } + GELOGD("Calc op[%s:%s] running param success.", node_name.c_str(), node_type.c_str()); + return SUCCESS; +} + +Status GeLocalOpsKernelBuilder::CalcConstantStrMemSize(const OpDescPtr &op_desc, int64_t &mem_size) { + if (op_desc == nullptr) { + GELOGE(FAILED, "CalcConstantStrMemSize failed, as op desc is null"); + return FAILED; + } + ConstGeTensorPtr value = MakeShared(); + if (value == nullptr) { + GELOGE(FAILED, "make shared ConstGeTensor exception."); + return FAILED; + } + // Constant op attr name is "value" + if (!AttrUtils::GetTensor(op_desc, kConstantOpAttrName, value)) { + GELOGE(FAILED, "Get Constant op attr value failed"); + return FAILED; + } + mem_size = static_cast(value->GetData().size()); + return SUCCESS; +} + +Status GeLocalOpsKernelBuilder::GenerateTask(const Node &node, RunContext &context, std::vector &tasks) { + bool is_shape_unknown = false; + if (NodeUtils::GetNodeUnknownShapeStatus(node, is_shape_unknown) == GRAPH_SUCCESS) { + if (is_shape_unknown) { + GELOGI("op:%s is unknown shape, does not need to generate task", + node.GetName().c_str()); + return SUCCESS; + } + } + string name = node.GetName(); + string type = node.GetType(); + GELOGD("Ge local generate task for node:%s(%s) begin, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); + + auto op = OpFactory::Instance().CreateOp(node, context); + if (op == nullptr) { + GELOGE(FAILED, "CreateOp for node:%s(%s) failed.", name.c_str(), type.c_str()); + return FAILED; + } + + Status ret = op->Run(); + if (ret != SUCCESS) { + GELOGE(ret, "Node:%s(%s) op run failed.", name.c_str(), type.c_str()); + return ret; + } + GELOGI("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); + return ret; +} +} // namespace ge_local +} // namespace ge diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h new file mode 100644 index 00000000..8a7dafe2 --- /dev/null +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GE_LOCAL_ENGINE_OPS_KERNEL_UTILS_GE_LOCAL_OPS_KERNEL_UTILS_H_ +#define GE_GE_LOCAL_ENGINE_OPS_KERNEL_UTILS_GE_LOCAL_OPS_KERNEL_UTILS_H_ + +#include "external/ge/ge_api_error_codes.h" +#include "common/opskernel/ops_kernel_builder.h" + +namespace ge { +namespace ge_local { +class GeLocalOpsKernelBuilder : public OpsKernelBuilder { + public: + ~GeLocalOpsKernelBuilder() override; + Status Initialize(const map &options) override; + + Status Finalize() override; + + Status CalcOpRunningParam(Node &node) override; + + Status GenerateTask(const Node &node, RunContext &context, std::vector &tasks) override; + + private: + /** + * Calc memSize for constant which type is DT_STRING. + * @param op_desc OpDesc information + * @param mem_size output size + * @return whether this operation success + */ + Status CalcConstantStrMemSize(const OpDescPtr &op_desc, int64_t &mem_size); +}; +} // namespace ge_local +} // namespace ge + +#endif // GE_GE_LOCAL_ENGINE_OPS_KERNEL_UTILS_GE_LOCAL_OPS_KERNEL_UTILS_H_ diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc old mode 100644 new mode 100755 index adf936c0..773abd21 --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,8 +73,8 @@ Status GeLocalOpsKernelInfoStore::CalcOpRunningParam(Node &ge_node) { bool is_shape_unknown = false; if (NodeUtils::GetNodeUnknownShapeStatus(ge_node, is_shape_unknown) == GRAPH_SUCCESS) { if (is_shape_unknown) { - GELOGI("op:%s is unknown shape, does not need to calc output size.", ge_node.GetName().c_str()); - return SUCCESS; + GELOGI("op:%s is unknown shape, does not need to calc output size.", ge_node.GetName().c_str()); + return SUCCESS; } } @@ -126,10 +126,10 @@ Status GeLocalOpsKernelInfoStore::CalcOpRunningParam(Node &ge_node) { return FAILED; } GELOGI( - "Calc op[%s:%s] out[%zu] mem size is %ld," - " format=%s, data_type=%s.", - node_name.c_str(), node_type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); + "Calc op[%s:%s] out[%zu] mem size is %ld," + " format=%s, data_type=%s.", + node_name.c_str(), node_type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); TensorUtils::SetSize(output_tensor, output_mem_size); @@ -170,8 +170,9 @@ Status GeLocalOpsKernelInfoStore::GenerateTask(const Node &node, RunContext &con bool is_shape_unknown = false; if (NodeUtils::GetNodeUnknownShapeStatus(node, is_shape_unknown) == GRAPH_SUCCESS) { if (is_shape_unknown) { - GELOGI("op:%s is unknown shape, does not need to generate task", node.GetName().c_str()); - return SUCCESS; + GELOGI("op:%s is unknown shape, does not need to generate task", + node.GetName().c_str()); + return SUCCESS; } } string name = node.GetName(); diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h old mode 100644 new mode 100755 index ce123751..3dbef99e --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,14 +78,14 @@ class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { * Create session * @param session_options Session Options * @return status whether this operation success - */ + */ Status CreateSession(const std::map &session_options) override; /** * Destroy session * @param session_options Session Options * @return status whether this operation success - */ + */ Status DestroySession(const std::map &session_options) override; // Copy prohibited diff --git a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc old mode 100644 new mode 100755 index badca5a3..b2f3d095 --- a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h index ebaeef2d..55587b2e 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/no_op.cc b/ge/ge_local_engine/ops_kernel_store/op/no_op.cc old mode 100644 new mode 100755 index 62fe1b5d..51c65ce0 --- a/ge/ge_local_engine/ops_kernel_store/op/no_op.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/no_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/no_op.h b/ge/ge_local_engine/ops_kernel_store/op/no_op.h index 31199b25..40e5766b 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/no_op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/no_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op.cc b/ge/ge_local_engine/ops_kernel_store/op/op.cc index 0a5625de..11229b2c 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op.h b/ge/ge_local_engine/ops_kernel_store/op/op.h index 1b184dad..c5a3df7a 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,10 +21,10 @@ #include #include #include "common/ge_inner_error_codes.h" -#include "common/opskernel/ops_kernel_info_types.h" #include "graph/node.h" namespace ge { +struct RunContext; namespace ge_local { /** * The base class for all op. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc b/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc index 49fc1084..c57b4f4d 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op_factory.h b/ge/ge_local_engine/ops_kernel_store/op/op_factory.h index 6d0c16f4..0faab508 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op_factory.h +++ b/ge/ge_local_engine/ops_kernel_store/op/op_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/proto/task.proto b/ge/ge_local_engine/proto/task.proto new file mode 120000 index 00000000..36ae4847 --- /dev/null +++ b/ge/ge_local_engine/proto/task.proto @@ -0,0 +1 @@ +../../proto/task.proto \ No newline at end of file diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 7a65787c..6947e679 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -1,5 +1,5 @@ LOCAL_PATH := $(call my-dir) - +include $(LOCAL_PATH)/stub/Makefile LIBGE_LOCAL_SRC_FILES := \ proto/fusion_model.proto \ proto/optimizer_priority.proto \ @@ -89,7 +89,9 @@ LIBGE_LOCAL_SRC_FILES := \ graph/manager/graph_mem_allocator.cc \ graph/manager/graph_caching_allocator.cc \ graph/manager/graph_var_manager.cc \ + graph/manager/host_mem_manager.cc \ graph/manager/rdma_pool_allocator.cc \ + graph/manager/memory_api.cc \ graph/manager/model_manager/event_manager.cc \ graph/manager/trans_var_data_utils.cc \ graph/manager/util/debug.cc \ @@ -179,6 +181,7 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/multi_batch_pass.cc \ graph/passes/multi_batch_clone_pass.cc \ graph/passes/subexpression_migration_pass.cc \ + graph/passes/subgraph_const_migration_pass.cc \ graph/passes/unused_args_clean_pass.cc \ graph/passes/net_output_pass.cc \ graph/passes/next_iteration_pass.cc \ @@ -368,7 +371,6 @@ LOCAL_SHARED_LIBRARIES := \ libmsprof \ liberror_manager \ - LOCAL_LDFLAGS := -lrt -ldl LOCAL_SHARED_LIBRARIES += \ @@ -392,9 +394,8 @@ endif LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) -LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc -LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_prof.cc - +LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc \ + ../../out/ge/lib64/stub/ge_prof.cc \ LOCAL_SHARED_LIBRARIES := diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt deleted file mode 100755 index 5a90202d..00000000 --- a/ge/ge_runtime/CMakeLists.txt +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -# libge_runtime.so -# include directories -include_directories(${CMAKE_CURRENT_LIST_DIR}) -include_directories(${GE_SOURCE_DIR}/ge) -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/metadef/inc) -include_directories(${GE_SOURCE_DIR}/metadef/inc/graph) -include_directories(${GE_SOURCE_DIR}/metadef/inc/external) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/inc/framework) -include_directories(${GE_SOURCE_DIR}/inc/framework/common) -include_directories(${GE_SOURCE_DIR}/inc/framework/ge_runtime) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/proto/ge) - -######### libge_runtime.so ############# -file(GLOB_RECURSE GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "model_runner.cc" - "runtime_model.cc" - "output.cc" - "task/*.cc" - ) - -add_library(ge_runtime SHARED ${GE_SRC_LIST}) -target_compile_definitions(ge_runtime PUBLIC - PROTOBUF_INLINE_NOT_IN_HEADERS=0 - Werror) -target_link_libraries(ge_runtime - graph - ${slog} - ${runtime} - ${c_sec} - rt - dl - ) diff --git a/ge/ge_runtime/model_context.h b/ge/ge_runtime/model_context.h old mode 100644 new mode 100755 index 259ff91f..8860f0da --- a/ge/ge_runtime/model_context.h +++ b/ge/ge_runtime/model_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,13 +27,8 @@ class ModelContext { ModelContext(uint32_t device_id, uint64_t session_id, int32_t priority, rtModel_t rt_model_handle, rtStream_t rt_model_stream, const std::vector &stream_list, const std::vector &label_list, const std::vector &event_list) - : device_id_(device_id), - session_id_(session_id), - priority_(priority), - rt_model_handle_(rt_model_handle), - rt_model_stream_(rt_model_stream), - stream_list_(stream_list), - label_list_(label_list), + : device_id_(device_id), session_id_(session_id), priority_(priority), rt_model_handle_(rt_model_handle), + rt_model_stream_(rt_model_stream), stream_list_(stream_list), label_list_(label_list), event_list_(event_list) {} ~ModelContext() {} diff --git a/ge/ge_runtime/model_runner.cc b/ge/ge_runtime/model_runner.cc index 9961ab4e..2c2efde4 100644 --- a/ge/ge_runtime/model_runner.cc +++ b/ge/ge_runtime/model_runner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,6 @@ namespace ge { namespace model_runner { - using RuntimeModelPtr = std::shared_ptr; using DavinciModelPtr = std::shared_ptr; diff --git a/ge/ge_runtime/module.mk b/ge/ge_runtime/module.mk new file mode 100755 index 00000000..43d81bfa --- /dev/null +++ b/ge/ge_runtime/module.mk @@ -0,0 +1,66 @@ +LOCAL_PATH := $(call my-dir) + +# task.proto is old task, add it for ops_kernel_info_store +local_ge_runtime_src_files := \ + model_runner.cc \ + runtime_model.cc \ + output.cc \ + task/aicpu_task.cc \ + task/cce_task.cc \ + task/tbe_task.cc \ + task/event_record_task.cc \ + task/event_wait_task.cc \ + task/stream_active_task.cc \ + task/stream_switch_task.cc \ + task/hccl_task.cc \ + task/memcpy_async_task.cc \ + task/profiler_task.cc \ + +local_ge_runtime_include := \ + $(LOCAL_PATH)/ \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/external/graph \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc/graph \ + $(TOPDIR)inc \ + $(LOCAL_PATH)/../ \ + third_party/protobuf/include + +local_ge_runtime_shared_library := \ + libruntime \ + libslog \ + libc_sec + +local_ge_runtime_ldflags := -lrt -ldl + +# compile device libge_runtime +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_runtime +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 +LOCAL_CFLAGS += -Werror +LOCAL_SRC_FILES := $(local_ge_runtime_src_files) +LOCAL_C_INCLUDES := $(local_ge_runtime_include) +LOCAL_SHARED_LIBRARIES := $(local_ge_runtime_shared_library) +LOCAL_LDFLAGS += $(local_ge_runtime_ldflags) + +include $(BUILD_SHARED_LIBRARY) + +# compile host libge_runtime +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_runtime +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 +ifeq ($(DEBUG), 1) + LOCAL_CFLAGS += -g -O0 +else + LOCAL_CFLAGS += -O2 +endif +LOCAL_SRC_FILES := $(local_ge_runtime_src_files) +LOCAL_C_INCLUDES := $(local_ge_runtime_include) +LOCAL_SHARED_LIBRARIES := $(local_ge_runtime_shared_library) +LOCAL_LDFLAGS += $(local_ge_runtime_ldflags) + +include $(BUILD_HOST_SHARED_LIBRARY) diff --git a/ge/ge_runtime/output.cc b/ge/ge_runtime/output.cc index 5153f688..eec8d170 100644 --- a/ge/ge_runtime/output.cc +++ b/ge/ge_runtime/output.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,7 +76,7 @@ bool Output::CopyRslt(OutputData *rslt, uint32_t data_begin, uint32_t &data_inde DataBuffer data_buf = rslt->blobs[data_begin + data_count]; bool ret = SetDataBuf(data_buf, data_begin, data_count, i, support_mem_share); if (!ret) { - GELOGE(FAILED, "Copy data to host error. index: %lu, addr: %p", i, v_input_data_addr_[i]); + GELOGE(FAILED, "Copy data to host failed. index: %lu, addr: %p", i, v_input_data_addr_[i]); return ret; } data_index = data_begin + data_count; @@ -89,6 +89,5 @@ bool Output::SetDataBuf(DataBuffer &data_buf, uint32_t data_begin, uint32_t &dat bool support_mem_share) { return true; } - } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/output.h b/ge/ge_runtime/output.h old mode 100644 new mode 100755 index 1f7f91ee..13ea956d --- a/ge/ge_runtime/output.h +++ b/ge/ge_runtime/output.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,6 @@ namespace ge { namespace model_runner { - class Output { public: Output(const OpInfoPtr &op_info, const std::shared_ptr &model); @@ -33,7 +32,8 @@ class Output { bool CopyRslt(OutputData *rslt, uint32_t data_begin, uint32_t &data_index, bool support_mem_share); - bool SetDataBuf(DataBuffer &data_buf, uint32_t data_begin, uint32_t &data_count, size_t i, bool support_mem_share); + bool SetDataBuf(DataBuffer &data_buf, uint32_t data_begin, uint32_t &data_count, size_t i, + bool support_mem_share); // Copy assignment operator and copy constructor are deleted Output &operator=(const Output &output) = delete; diff --git a/ge/ge_runtime/runtime_model.cc b/ge/ge_runtime/runtime_model.cc index f0405056..0b76cbaf 100644 --- a/ge/ge_runtime/runtime_model.cc +++ b/ge/ge_runtime/runtime_model.cc @@ -74,8 +74,8 @@ bool RuntimeModel::InitStream(std::shared_ptr &davinci_model) { for (uint32_t i = 0; i < davinci_model->GetStreamNum(); ++i) { rtStream_t stream = nullptr; uint32_t flag = (force_copy_streams.find(i) != force_copy_streams.end()) - ? (RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY) - : (RT_STREAM_PERSISTENT); + ? (RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY) + : (RT_STREAM_PERSISTENT); rtError_t rt_ret = rtStreamCreateWithFlags(&stream, davinci_model->GetPriority(), flag); if (rt_ret != RT_ERROR_NONE) { @@ -115,34 +115,23 @@ bool RuntimeModel::InitEvent(uint32_t event_num) { return true; } -bool RuntimeModel::InitLabel(std::shared_ptr &davinci_model) { - GELOGI("batch number:%u.", davinci_model->GetBatchNum()); - label_list_.resize(davinci_model->GetBatchNum()); - for (auto &task_info : davinci_model->GetTaskInfoList()) { - if (task_info == nullptr) { - GELOGE(PARAM_INVALID, "task_info is null."); - continue; - } - - if (task_info->type() != TaskInfoType::LABEL_SET) { - continue; - } - auto label_set_task_info = std::static_pointer_cast(task_info); - - if (label_set_task_info->stream_id() >= stream_list_.size()) { - GELOGE(PARAM_INVALID, "Invalid stream id."); +bool RuntimeModel::InitLabel(uint32_t batch_num) { + GELOGI("batch number:%u.", batch_num); + for (uint32_t i = 0; (batch_num != 0 && i <= batch_num); ++i) { + rtLabel_t rt_lLabel = nullptr; + rtError_t rt_ret = rtLabelCreate(&rt_lLabel); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api rtLabelCreate failed, i; %u; ret: 0x%X", i, rt_ret); return false; } - rtLabel_t rt_label = nullptr; - rtError_t rt_ret = rtLabelCreateEx(&rt_label, stream_list_[label_set_task_info->stream_id()]); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api rtLabelCreate failed, ret: 0x%X", rt_ret); + if (rt_lLabel == nullptr) { + GELOGE(RT_FAILED, "rtLabel is nullptr!"); return false; } - label_list_[label_set_task_info->label_id()] = rt_label; - } + label_list_.emplace_back(rt_lLabel); + } return true; } @@ -174,7 +163,7 @@ bool RuntimeModel::InitResource(std::shared_ptr &davinci_model) { return false; } - if (!InitLabel(davinci_model)) { + if (!InitLabel(davinci_model->GetBatchNum())) { return false; } @@ -292,6 +281,7 @@ bool RuntimeModel::DistributeTask() { GELOGE(FAILED, "DistributeTask failed"); return false; } + return true; } @@ -303,14 +293,10 @@ bool RuntimeModel::Run() { return false; } - GELOGI("Run rtModelExecute success, ret = 0x%X", ret); + GELOGI("Run rtModelExecute success"); ret = rtStreamSynchronize(rt_model_stream_); if (ret != RT_ERROR_NONE) { - if (ret == RT_ERROR_END_OF_SEQUENCE) { - GELOGI("Model stream RT_ERROR_END_OF_SEQUENCE signal received, ret = 0x%X", ret); - return true; - } GELOGE(RT_FAILED, "Model stream sync failed, ret = 0x%X", ret); return false; } @@ -470,7 +456,7 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model } if (constant->output_tensors[0].size < constant->weight_data.size()) { - GELOGE(PARAM_INVALID, "Output size:%u less than weight data size:%zu", constant->output_tensors[0].size, + GELOGE(PARAM_INVALID, "Output size:%u is less than weight data size:%zu", constant->output_tensors[0].size, constant->weight_data.size()); return false; } @@ -485,8 +471,11 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model /// The logic of GetShapeSize is wrong, the scaler tensor's GetShapeSize is zero /// and that of unknown shape is zero too. /// Unknown shape will not appear here, so we can use zero judge a tensor is scaler or not. - int64_t elem_num = - (constant->weight_tensors[0].GetShapeSize() == 0) ? 1 : constant->weight_tensors[0].GetShapeSize(); + int64_t elem_num = constant->weight_tensors[0].GetShapeSize(); + if (elem_num == 0 && constant->weight_tensors[0].size == 0) { + elem_num = 1; + } + if (constant->weight_data.size() < sizeof(uint64_t)) { GELOGE(FAILED, "weight_data size is smaller than sizeof(uint64_t)"); return false; diff --git a/ge/ge_runtime/runtime_model.h b/ge/ge_runtime/runtime_model.h index d0c466d4..6109915f 100644 --- a/ge/ge_runtime/runtime_model.h +++ b/ge/ge_runtime/runtime_model.h @@ -40,11 +40,13 @@ class RuntimeModel { const std::vector &GetTaskIdList() const; const std::vector &GetStreamIdList() const; const std::map> &GetRuntimeInfoMap() const { return runtime_info_map_; } - rtModel_t GetModelHandle() const { return rt_model_handle_; } + const rtModel_t GetModelHandle() const { return rt_model_handle_; } bool Run(); bool CopyInputData(const InputData &input_data); - bool GetInputOutputDescInfo(bool zero_copy, std::vector *input_desc, - std::vector *output_desc, std::vector *input_format, + bool GetInputOutputDescInfo(bool zero_copy, + std::vector *input_desc, + std::vector *output_desc, + std::vector *input_format, std::vector *output_format); private: @@ -53,7 +55,7 @@ class RuntimeModel { bool LoadTask(); bool InitStream(std::shared_ptr &davinci_model); bool InitEvent(uint32_t event_num); - bool InitLabel(std::shared_ptr &davinci_model); + bool InitLabel(uint32_t batch_num); bool InitDataInfo(std::shared_ptr &davinci_model); bool InitOutputInfo(std::shared_ptr &davinci_model); bool InitConstantInfo(std::shared_ptr &davinci_model); @@ -85,7 +87,6 @@ class RuntimeModel { std::vector stream_id_list_{}; std::map> runtime_info_map_; }; - } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/task/aicpu_task.cc b/ge/ge_runtime/task/aicpu_task.cc old mode 100644 new mode 100755 index 5b3d8e82..61ef7a3c --- a/ge/ge_runtime/task/aicpu_task.cc +++ b/ge/ge_runtime/task/aicpu_task.cc @@ -26,7 +26,6 @@ AicpuTask::AicpuTask(const ModelContext &model_context, const std::shared_ptr(io_addrs.size()); auto io_addrs_size = static_cast(io_addrs_num * sizeof(void *)); constexpr uint32_t io_addr_offset = sizeof(aicpu::AicpuParamHead); - uint32_t node_def_len_offset = io_addr_offset + io_addrs_size; - uint32_t node_def_addr_offset = node_def_len_offset + sizeof(uint32_t); - uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addrs_size + - static_cast(task_info_->node_def().size()) + sizeof(uint32_t); - - aicpu::AicpuParamHead aicpu_param_head; - aicpu_param_head.length = args_size; - aicpu_param_head.ioAddrNum = io_addrs_num; - auto ext_info = task_info_->ext_info(); - uint32_t ext_size = ext_info.size(); - if (ext_info.empty()) { - aicpu_param_head.extInfoLength = 0; - aicpu_param_head.extInfoAddr = 0; - } else { - rtError_t flag = rtMalloc(&ext_info_, ext_size, RT_MEMORY_HBM); - if (flag != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api(rtMalloc) failed, ret: 0x%X.", flag); - return false; - } - - flag = rtMemcpy(ext_info_, ext_size, const_cast(reinterpret_cast(ext_info.data())), ext_size, - RT_MEMCPY_HOST_TO_DEVICE); - if (flag != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api(rtMemCpy) failed, ret: 0x%X.", flag); - return false; - } - - GELOGI("ext info size:", ext_size); - aicpu_param_head.extInfoLength = ext_size; - aicpu_param_head.extInfoAddr = reinterpret_cast(ext_info_); - } + uint32_t node_def_addr_offset = io_addr_offset + io_addrs_size; + uint32_t args_size = + sizeof(aicpu::AicpuParamHead) + io_addrs_size + static_cast(task_info_->node_def().size()); + aicpu::AicpuParamHead aicpu_param_head = {args_size, io_addrs_num}; // Malloc device memory for args rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM); @@ -111,17 +80,6 @@ bool AicpuTask::Distribute() { return false; } } - - // Memcpy node def - auto size = task_info_->node_def().size(); - rt_ret = - rtMemcpy(reinterpret_cast(reinterpret_cast(args_) + node_def_len_offset), sizeof(uint32_t), - reinterpret_cast(&size), sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X.", rt_ret); - return false; - } - // Memcpy node def rt_ret = rtMemcpy(reinterpret_cast(reinterpret_cast(args_) + node_def_addr_offset), task_info_->node_def().size(), reinterpret_cast(task_info_->node_def().data()), diff --git a/ge/ge_runtime/task/aicpu_task.h b/ge/ge_runtime/task/aicpu_task.h old mode 100644 new mode 100755 index 2d3c5040..cc21af8a --- a/ge/ge_runtime/task/aicpu_task.h +++ b/ge/ge_runtime/task/aicpu_task.h @@ -41,7 +41,6 @@ class AicpuTask : public TaskRepeater { std::shared_ptr task_info_; void *stream_; void *args_; - void *ext_info_; void *input_output_addr_; }; } // namespace model_runner diff --git a/ge/ge_runtime/task/cce_task.cc b/ge/ge_runtime/task/cce_task.cc old mode 100644 new mode 100755 index 04fd5610..1c1807b5 --- a/ge/ge_runtime/task/cce_task.cc +++ b/ge/ge_runtime/task/cce_task.cc @@ -103,9 +103,9 @@ bool CceTask::Distribute() { // Modify flowtable addr in args auto args = const_cast(task_info_->args().data()); auto task_offset = reinterpret_cast(const_cast(task_info_->args_offset().data())); - if (task_info_->args().size() < (task_offset[0] + sizeof(uint64_t))) { - GELOGE(FAILED, "(context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > kernelDef.args().size():%zu", + GELOGE(FAILED, + "(context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > kernelDef.args().size():%zu", static_cast(task_offset[0]), sizeof(uint64_t), task_info_->args().size()); return false; } @@ -136,7 +136,8 @@ bool CceTask::Distribute() { return false; } - rt_ret = rtMemcpy(sm_desc_, task_info_->sm_desc().size(), task_info_->sm_desc().data(), + rt_ret = rtMemcpy(sm_desc_, task_info_->sm_desc().size(), + task_info_->sm_desc().data(), task_info_->sm_desc().size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); @@ -145,8 +146,12 @@ bool CceTask::Distribute() { } // Kernel launch - rt_ret = rtKernelLaunch(stub_func_, task_info_->block_dim(), args_, task_info_->args_size(), - static_cast(sm_desc_), stream_); + rt_ret = rtKernelLaunch(stub_func_, + task_info_->block_dim(), + args_, + task_info_->args_size(), + static_cast(sm_desc_), + stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; diff --git a/ge/ge_runtime/task/cce_task.h b/ge/ge_runtime/task/cce_task.h old mode 100644 new mode 100755 diff --git a/ge/ge_runtime/task/event_record_task.h b/ge/ge_runtime/task/event_record_task.h old mode 100644 new mode 100755 index 7c1d4f80..b9ae5dba --- a/ge/ge_runtime/task/event_record_task.h +++ b/ge/ge_runtime/task/event_record_task.h @@ -33,7 +33,7 @@ class EventRecordTask : public TaskRepeater { private: std::shared_ptr task_info_; rtStream_t stream_; - rtEvent_t event_; + rtEvent_t event_; }; } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/task/event_wait_task.cc b/ge/ge_runtime/task/event_wait_task.cc index 558c2a59..5f1ffaad 100644 --- a/ge/ge_runtime/task/event_wait_task.cc +++ b/ge/ge_runtime/task/event_wait_task.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_runtime/task/event_wait_task.h b/ge/ge_runtime/task/event_wait_task.h old mode 100644 new mode 100755 index 9104bbf8..685be897 --- a/ge/ge_runtime/task/event_wait_task.h +++ b/ge/ge_runtime/task/event_wait_task.h @@ -33,7 +33,7 @@ class EventWaitTask : public TaskRepeater { private: std::shared_ptr task_info_; rtStream_t stream_; - rtEvent_t event_; + rtEvent_t event_; }; } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/task/hccl_task.cc b/ge/ge_runtime/task/hccl_task.cc index 3d5f8504..771341c1 100644 --- a/ge/ge_runtime/task/hccl_task.cc +++ b/ge/ge_runtime/task/hccl_task.cc @@ -115,6 +115,7 @@ bool HcclTask::Distribute() { rt_ret = rtModelBindStream(rt_model_handle_, stream, RT_HEAD_STREAM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + (void)rtStreamDestroy(stream); return false; } @@ -128,6 +129,8 @@ bool HcclTask::Distribute() { ge_task.type = static_cast(RT_MODEL_TASK_HCCL); ge_task.stream = stream_; + GETaskKernelHcclInfo kernel_hccl_info; + ge_task.kernelHcclInfo.emplace_back(kernel_hccl_info); ge_task.kernelHcclInfo[0].hccl_type = task_info_->hccl_type(); ge_task.kernelHcclInfo[0].inputDataAddr = task_info_->input_data_addr(); ge_task.kernelHcclInfo[0].outputDataAddr = task_info_->output_data_addr(); diff --git a/ge/ge_runtime/task/hccl_task.h b/ge/ge_runtime/task/hccl_task.h old mode 100644 new mode 100755 diff --git a/ge/ge_runtime/task/label_goto_task.cc b/ge/ge_runtime/task/label_goto_task.cc deleted file mode 100644 index d357accb..00000000 --- a/ge/ge_runtime/task/label_goto_task.cc +++ /dev/null @@ -1,70 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ge_runtime/task/label_goto_task.h" -#include "ge_runtime/task/task_factory.h" - -namespace ge { -namespace model_runner { -LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr &task_info) - : TaskRepeater(model_context, task_info), - task_info_(task_info), - stream_(nullptr), - label_(nullptr) { - if (task_info_ == nullptr) { - GELOGW("task_info_ is null!"); - return; - } - auto stream_list = model_context.stream_list(); - auto label_list = model_context.label_list(); - uint32_t stream_id = task_info->stream_id(); - uint32_t label_id = task_info->label_id(); - GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); - GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); - if (stream_id >= stream_list.size() || label_id >= label_list.size()) { - GELOGW("Stream/Label id invalid."); - return; - } - stream_ = stream_list[stream_id]; - label_ = label_list[label_id]; -} - -LabelGotoTask::~LabelGotoTask() {} - -bool LabelGotoTask::Distribute() { - GELOGI("LabelGotoTask Distribute start."); - if (stream_ == nullptr) { - GELOGE(PARAM_INVALID, "stream is null!"); - return false; - } - if (label_ == nullptr) { - GELOGE(PARAM_INVALID, "label is null!"); - return false; - } - rtError_t rt_ret = rtLabelGotoEx(label_, stream_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - GELOGI("DistributeTask end."); - return true; -} - -REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo); - -} // namespace model_runner -} // namespace ge diff --git a/ge/ge_runtime/task/label_goto_task.h b/ge/ge_runtime/task/label_goto_task.h deleted file mode 100644 index 4fd6d1bc..00000000 --- a/ge/ge_runtime/task/label_goto_task.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ -#define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ - -#include -#include "ge_runtime/task/task.h" - -namespace ge { -namespace model_runner { -class LabelGotoTask : public TaskRepeater { - public: - LabelGotoTask(const ModelContext &model_context, const std::shared_ptr &task_info); - - ~LabelGotoTask() override; - - bool Distribute() override; - - private: - std::shared_ptr task_info_; - void *stream_; - void *label_; -}; -} // namespace model_runner -} // namespace ge - -#endif // GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ diff --git a/ge/ge_runtime/task/label_set_task.cc b/ge/ge_runtime/task/label_set_task.cc deleted file mode 100644 index 3ab5802c..00000000 --- a/ge/ge_runtime/task/label_set_task.cc +++ /dev/null @@ -1,70 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ge_runtime/task/label_set_task.h" -#include "ge_runtime/task/task_factory.h" - -namespace ge { -namespace model_runner { -LabelSetTask::LabelSetTask(const ModelContext &model_context, const std::shared_ptr &task_info) - : TaskRepeater(model_context, task_info), - task_info_(task_info), - stream_(nullptr), - label_(nullptr) { - if (task_info_ == nullptr) { - GELOGW("task_info_ is null!"); - return; - } - auto stream_list = model_context.stream_list(); - auto label_list = model_context.label_list(); - uint32_t stream_id = task_info->stream_id(); - uint32_t label_id = task_info->label_id(); - GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); - GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); - if (stream_id >= stream_list.size() || label_id >= label_list.size()) { - GELOGW("Stream/Label id invalid."); - return; - } - stream_ = stream_list[stream_id]; - label_ = label_list[label_id]; -} - -LabelSetTask::~LabelSetTask() {} - -bool LabelSetTask::Distribute() { - GELOGI("LabelSetTask Distribute start."); - if (stream_ == nullptr) { - GELOGE(PARAM_INVALID, "stream is null!"); - return false; - } - if (label_ == nullptr) { - GELOGE(PARAM_INVALID, "label is null!"); - return false; - } - rtError_t rt_ret = rtLabelSet(label_, stream_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - GELOGI("DistributeTask end."); - return true; -} - -REGISTER_TASK(TaskInfoType::LABEL_SET, LabelSetTask, LabelSetTaskInfo); - -} // namespace model_runner -} // namespace ge diff --git a/ge/ge_runtime/task/label_set_task.h b/ge/ge_runtime/task/label_set_task.h deleted file mode 100644 index 70bf1584..00000000 --- a/ge/ge_runtime/task/label_set_task.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ -#define GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ - -#include -#include "ge_runtime/task/task.h" - -namespace ge { -namespace model_runner { -class LabelSetTask : public TaskRepeater { - public: - LabelSetTask(const ModelContext &model_context, const std::shared_ptr &task_info); - - ~LabelSetTask() override; - - bool Distribute() override; - - private: - std::shared_ptr task_info_; - void *stream_; - void *label_; -}; -} // namespace model_runner -} // namespace ge - -#endif // GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ diff --git a/ge/ge_runtime/task/label_switch_task.cc b/ge/ge_runtime/task/label_switch_task.cc deleted file mode 100644 index a3c2d41a..00000000 --- a/ge/ge_runtime/task/label_switch_task.cc +++ /dev/null @@ -1,131 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ge_runtime/task/label_switch_task.h" -#include "ge_runtime/task/task_factory.h" - -namespace ge { -namespace model_runner { -LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, - const std::shared_ptr &task_info) - : TaskRepeater(model_context, task_info), - task_info_(task_info), - stream_(nullptr), - all_label_resource_(), - label_info_(nullptr) { - if (task_info_ == nullptr) { - GELOGW("task_info_ is null!"); - return; - } - - all_label_resource_ = model_context.label_list(); - auto stream_list = model_context.stream_list(); - uint32_t stream_id = task_info->stream_id(); - GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); - if (stream_id >= stream_list.size()) { - GELOGW("Stream id invalid."); - return; - } - stream_ = stream_list[stream_id]; -} - -LabelSwitchTask::~LabelSwitchTask() { - if (label_info_ != nullptr) { - rtError_t rt_ret = rtFree(label_info_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret); - } - label_info_ = nullptr; - } -} - -bool LabelSwitchTask::Distribute() { - GELOGI("LabelSwitchTask Distribute start."); - if (!CheckParamValid()) { - return false; - } - - const std::vector &label_index_list = task_info_->label_list(); - std::vector label_list(task_info_->label_size(), nullptr); - - for (size_t i = 0; i < task_info_->label_size(); ++i) { - uint32_t label_index = label_index_list[i]; - if (label_index >= all_label_resource_.size()) { - GELOGE(PARAM_INVALID, "label %zu index is %u, but there are %zu labels in total.", i, label_index, - all_label_resource_.size()); - return false; - } - label_list[i] = all_label_resource_[label_index]; - GELOGI("Case %zu: label id %zu.", i, label_index); - } - - uint32_t label_info_size = sizeof(rtLabelDevInfo) * task_info_->label_size(); - rtError_t rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - rt_ret = rtLabelSwitchByIndex(task_info_->cond(), label_list.size(), label_info_, stream_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - GELOGI("DistributeTask end."); - return true; -} - -bool LabelSwitchTask::CheckParamValid() { - if (stream_ == nullptr) { - GELOGE(PARAM_INVALID, "stream is null!"); - return false; - } - - if (task_info_->label_list().empty()) { - GELOGE(PARAM_INVALID, "label_list is empty."); - return false; - } - - if (task_info_->label_size() != task_info_->label_list().size()) { - GELOGE(PARAM_INVALID, "label_list size %zu but label_size is %u.", task_info_->label_list().size(), - task_info_->label_size()); - return false; - } - - if (task_info_->label_size() >= UINT32_MAX / sizeof(rtLabelDevInfo)) { - GELOGE(PARAM_INVALID, "label_size %u will overflow.", task_info_->label_size()); - return false; - } - - if (label_info_ != nullptr) { - GELOGE(PARAM_INVALID, "label_info_ has dirty data."); - return false; - } - - return true; -} - -REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo); - -} // namespace model_runner -} // namespace ge diff --git a/ge/ge_runtime/task/label_switch_task.h b/ge/ge_runtime/task/label_switch_task.h deleted file mode 100644 index 463faa31..00000000 --- a/ge/ge_runtime/task/label_switch_task.h +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ -#define GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ - -#include -#include "ge_runtime/task/task.h" - -namespace ge { -namespace model_runner { -class LabelSwitchTask : public TaskRepeater { - public: - LabelSwitchTask(const ModelContext &model_context, const std::shared_ptr &task_info); - - ~LabelSwitchTask() override; - - bool Distribute() override; - - private: - bool CheckParamValid(); - - std::shared_ptr task_info_; - void *stream_; - std::vector all_label_resource_; - void *label_info_; -}; -} // namespace model_runner -} // namespace ge - -#endif // GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ diff --git a/ge/ge_runtime/task/memcpy_async_task.h b/ge/ge_runtime/task/memcpy_async_task.h old mode 100644 new mode 100755 diff --git a/ge/ge_runtime/task/profiler_task.h b/ge/ge_runtime/task/profiler_task.h old mode 100644 new mode 100755 diff --git a/ge/ge_runtime/task/stream_active_task.h b/ge/ge_runtime/task/stream_active_task.h old mode 100644 new mode 100755 diff --git a/ge/ge_runtime/task/stream_switch_task.cc b/ge/ge_runtime/task/stream_switch_task.cc index 2adcb4bd..91141139 100644 --- a/ge/ge_runtime/task/stream_switch_task.cc +++ b/ge/ge_runtime/task/stream_switch_task.cc @@ -51,7 +51,7 @@ bool StreamSwitchTask::Distribute() { } if (static_cast(task_info_->true_stream_id()) >= stream_list_.size()) { - GELOGE(PARAM_INVALID, "true_stream_id %ld must less than stream_list_ size %zu!", task_info_->true_stream_id(), + GELOGE(PARAM_INVALID, "true_stream_id %ld must be less than stream_list_ size %zu!", task_info_->true_stream_id(), stream_list_.size()); return false; } diff --git a/ge/ge_runtime/task/stream_switch_task.h b/ge/ge_runtime/task/stream_switch_task.h old mode 100644 new mode 100755 index 81c12507..2caad200 --- a/ge/ge_runtime/task/stream_switch_task.h +++ b/ge/ge_runtime/task/stream_switch_task.h @@ -37,7 +37,6 @@ class StreamSwitchTask : public TaskRepeater { void *stream_; std::vector stream_list_; }; - } // namespace model_runner } // namespace ge #endif // GE_GE_RUNTIME_TASK_STREAM_SWITCH_TASK_H_ diff --git a/ge/ge_runtime/task/task.h b/ge/ge_runtime/task/task.h old mode 100644 new mode 100755 index 6c4df248..b8a937b7 --- a/ge/ge_runtime/task/task.h +++ b/ge/ge_runtime/task/task.h @@ -42,7 +42,7 @@ class Task { template class TaskRepeater : public Task { - static_assert(std::is_base_of(), "Wrong TaskInfo Type!"); + static_assert(std::is_base_of(), "Wrong TaskInfo Type!"); /*lint !e30*/ public: TaskRepeater(const ModelContext &model_context, std::shared_ptr task_info) {} diff --git a/ge/ge_runtime/task/task_factory.h b/ge/ge_runtime/task/task_factory.h index 670d1fef..29da1388 100644 --- a/ge/ge_runtime/task/task_factory.h +++ b/ge/ge_runtime/task/task_factory.h @@ -81,7 +81,6 @@ class TaskFactory { std::shared_ptr concrete_task_info = std::static_pointer_cast(task_info); \ return std::make_shared(model_context, concrete_task_info); \ }); - } // namespace model_runner } // namespace ge #endif // GE_GE_RUNTIME_TASK_TASK_FACTORY_H_ diff --git a/ge/ge_runtime/task/tbe_task.cc b/ge/ge_runtime/task/tbe_task.cc old mode 100644 new mode 100755 diff --git a/ge/ge_runtime/task/tbe_task.h b/ge/ge_runtime/task/tbe_task.h old mode 100644 new mode 100755 diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index edd7a155..1f91ae08 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,7 +48,7 @@ const char *const kAIcoreEngine = "AIcoreEngine"; const char *const kFileNameSuffix = "online"; std::map engine_type_map{ - {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; + {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) { @@ -136,6 +136,13 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen bool attr) { GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); + + auto format = tensor.GetFormat(); + auto data_type = tensor.GetDataType(); + if (format == FORMAT_RESERVED && data_type == DT_UNDEFINED) { + return SUCCESS; + } + string op_type; if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) { op_type = DATA; @@ -244,7 +251,9 @@ class GeGenerator::Impl { bool SetOppVersionInfo(AttrHolder &obj); }; -Status GeGenerator::Initialize(const map &options) { return Initialize(options, domi::GetContext()); } +Status GeGenerator::Initialize(const map &options) { + return Initialize(options, domi::GetContext()); +} Status GeGenerator::Initialize(const map &options, OmgContext &omg_context) { impl_ = ge::MakeShared(omg_context); @@ -482,7 +491,9 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr if ((impl_->build_mode_ == BUILD_MODE_TUNING) && (impl_->build_step_ == BUILD_STEP_BEFORE_UB_MATCH || impl_->build_step_ == BUILD_STEP_AFTER_BUILDER || impl_->build_step_ == BUILD_STEP_AFTER_BUILDER_SUB)) { - GELOGI("Build mode:%s with step:%s no need SaveModel.", impl_->build_mode_.c_str(), impl_->build_step_.c_str()); + GELOGI("Build mode:%s with step:%s no need SaveModel.", + impl_->build_mode_.c_str(), + impl_->build_step_.c_str()); return SUCCESS; } @@ -521,8 +532,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, bool is_offline) { GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); - if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) { - GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetInputsSize()); + if (!inputs.empty() && (inputs.size() != op_desc->GetAllInputsSize())) { + GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetAllInputsSize()); return PARAM_INVALID; } if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { diff --git a/ge/generator/generator_api.cc b/ge/generator/generator_api.cc index 3f92f1a2..675b8811 100644 --- a/ge/generator/generator_api.cc +++ b/ge/generator/generator_api.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "generator/generator_api.h" #include "common/ge/ge_util.h" #include "common/util.h" @@ -116,7 +115,7 @@ Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int CHECK_PARAM_NOT_NULL(om_file); const std::string om_file_name(om_file); - std::string op_name = std::string(op_type) + "_" + std::to_string(ge::GetCurrentTimestap()); + std::string op_name = std::string(op_type) + "_" + std::to_string(ge::GetCurrentTimestamp()); ge::OpDescPtr op_desc = ge::MakeShared(op_name, op_type); if (op_desc == nullptr) { return ge::FAILED; diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index 69eaa0a3..670e929d 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,26 +17,74 @@ #include "graph/build/graph_builder.h" #include "common/ge/ge_util.h" #include "common/helper/model_helper.h" -#include "common/opskernel/ops_kernel_info_types.h" #include "graph/build/logical_stream_allocator.h" #include "graph/build/run_context.h" #include "graph/build/stream_graph_optimizer.h" +#include "graph/common/ge_call_wrapper.h" +#include "graph/ge_context.h" #include "graph/manager/graph_var_manager.h" #include "graph/passes/mark_same_addr_pass.h" #include "graph/utils/node_utils.h" #include "graph/utils/type_utils.h" -#include "graph/common/ge_call_wrapper.h" #include "init/gelib.h" -#include "model/ge_model.h" -#include "graph/ge_context.h" using domi::BuildMode; - namespace { const int32_t kInvalidPerfLevel = -1; +enum NodeType { kSubgraphData, kSubgraphNode, kOthers }; } // namespace namespace ge { +NodeType TransferNodeType(const NodePtr &node) { + const std::string type = node->GetType(); + if (type == ge::DATA) { + if (node->GetOwnerComputeGraph()->GetParentNode() == nullptr) { + GELOGD("access src data node:%s", node->GetName().c_str()); + return kOthers; + } + GELOGD("access subgraph input node:%s", node->GetName().c_str()); + return kSubgraphData; + } else if (type == PARTITIONEDCALL) { + GELOGD("access subgraph node:%s", node->GetName().c_str()); + return kSubgraphNode; + } + GELOGD("access other node:%s", node->GetName().c_str()); + return kOthers; +} + +Status HandleSubgraphNode(NodePtr &src_node, OutDataAnchorPtr &src_out_anchor) { + auto subgraph = NodeUtils::GetSubgraph(*src_node, 0); + GE_CHECK_NOTNULL(subgraph); + const NodePtr &net_output_node = subgraph->FindFirstNodeMatchType(NETOUTPUT); + GE_CHECK_NOTNULL(net_output_node); + const InDataAnchorPtr &in_data_anchor = net_output_node->GetInDataAnchor(src_out_anchor->GetIdx()); + GE_CHECK_NOTNULL(in_data_anchor); + const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_anchor); + + src_node = peer_out_anchor->GetOwnerNode(); + src_out_anchor = peer_out_anchor; + return SUCCESS; +} + +Status HandleSubgraphDataNode(NodePtr &src_node, OutDataAnchorPtr &src_out_anchor) { + uint32_t index = 0; + if (!AttrUtils::GetInt(src_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, index)) { + GELOGE(FAILED, "Get attr ATTR_NAME_PARENT_NODE_INDEX failed, node:%s.", src_node->GetName().c_str()); + return FAILED; + } + const NodePtr &parent_node = src_node->GetOwnerComputeGraph()->GetParentNode(); + GE_CHECK_NOTNULL(parent_node); + const InDataAnchorPtr &in_data_anchor = parent_node->GetInDataAnchor(index); + GE_CHECK_NOTNULL(in_data_anchor); + const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_anchor); + + src_node = peer_out_anchor->GetOwnerNode(); + src_out_anchor = peer_out_anchor; + return SUCCESS; +} + GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {} void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { @@ -158,8 +206,8 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vectorGetOpDesc(), ATTR_INPUT_MEMORY_TYPE, mem_type)) { - GELOGD("[%s] has attr input_memory_type %ld", node->GetName().c_str(), mem_type); - for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { - const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); - const auto &src_node = peer_out_anchor->GetOwnerNode(); - const auto &src_op = src_node->GetOpDesc(); - GE_IF_BOOL_EXEC(src_op == nullptr, continue); - if (!AttrUtils::SetInt(src_op, ATTR_OUTPUT_MEMORY_TYPE, mem_type)) { - GELOGE(INTERNAL_ERROR, "Set out_memory_type attr failed."); + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + uint32_t mem_type; + if (!AttrUtils::GetInt(op_desc, ATTR_INPUT_MEMORY_TYPE, mem_type)) { + return SUCCESS; + } + GELOGD("[%s] has attr input_memory_type %ld", op_desc->GetName().c_str(), mem_type); + for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { + const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); + bool valid_flag = false; + auto src_node = peer_out_anchor->GetOwnerNode(); + auto src_out_anchor = peer_out_anchor; + while (true) { + const auto &src_desc = src_node->GetOpDesc(); + GE_IF_BOOL_EXEC(src_desc == nullptr, continue); + GELOGD("[%s:%u] set attr output_memory_type %ld", src_desc->GetName().c_str(), src_out_anchor->GetIdx(), + mem_type); + if (!AttrUtils::SetInt(src_desc->MutableOutputDesc(src_out_anchor->GetIdx()), ATTR_OUTPUT_MEMORY_TYPE, + mem_type)) { + GELOGE(INTERNAL_ERROR, "Set out_memory_type attr for [%s:%d] failed.", src_desc->GetName().c_str(), + src_out_anchor->GetIdx()); return INTERNAL_ERROR; } - return SUCCESS; + switch (TransferNodeType(src_node)) { + case kSubgraphNode: + GE_CHK_STATUS_RET(HandleSubgraphNode(src_node, src_out_anchor), "Handle subgraph node %s failed", + src_node->GetName().c_str()); + break; + case kSubgraphData: + GE_CHK_STATUS_RET(HandleSubgraphDataNode(src_node, src_out_anchor), "Handle Data node %s in subgraph failed", + src_node->GetName().c_str()); + break; + case kOthers: + default: + valid_flag = true; + break; + } + if (valid_flag) { + break; + } } } + return SUCCESS; } } // namespace ge diff --git a/ge/graph/build/graph_builder.h b/ge/graph/build/graph_builder.h index a70a5464..329f3ebc 100644 --- a/ge/graph/build/graph_builder.h +++ b/ge/graph/build/graph_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/label_allocator.cc b/ge/graph/build/label_allocator.cc index f8fbe28b..0f3eff16 100644 --- a/ge/graph/build/label_allocator.cc +++ b/ge/graph/build/label_allocator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/label_allocator.h b/ge/graph/build/label_allocator.h index 01811e1d..7c7b2f00 100644 --- a/ge/graph/build/label_allocator.h +++ b/ge/graph/build/label_allocator.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/logical_stream_allocator.cc b/ge/graph/build/logical_stream_allocator.cc index d1866584..5b8ce824 100644 --- a/ge/graph/build/logical_stream_allocator.cc +++ b/ge/graph/build/logical_stream_allocator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,10 +25,10 @@ #include "graph/common/ge_call_wrapper.h" using std::map; -using std::queue; using std::set; using std::string; using std::vector; +using std::queue; namespace ge { LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} @@ -210,8 +210,8 @@ bool AssignByDependencyPass::CouldReuse(const SubgraphPtr &subgraph, const Subgr } LogicalStreamPass::SubgraphPtr AssignByDependencyPass::GetReusableSubgraph( - const SubgraphPtr &subgraph, const map &end_subgraph_map, - const map &pld_subgraph_map) { + const SubgraphPtr &subgraph, const map &end_subgraph_map, + const map &pld_subgraph_map) { const SubGraphInfo &subgraph_info = subgraph->subgraph_info; for (const auto &pld_2_end : subgraph_info.GetPld2EndMap()) { const NodePtr &peer_end = pld_2_end.second; @@ -481,7 +481,7 @@ Status AllReduceParallelPass::Run(ComputeGraphPtr graph, const vectorGetOpDesc(), ATTR_NAME_STREAM_LABEL, out_stream_label); // normally, Allreduce do not have streamLabel. when in horovod scenario Allreduce will have streamLabel bool isSuccessorParallel = - (out_stream_label == reduce_stream_label) || (!reduce_stream_label.empty() && out_stream_label.empty()); + (out_stream_label == reduce_stream_label) || (!reduce_stream_label.empty() && out_stream_label.empty()); if (isSuccessorParallel) { all_reduce_succs.emplace(out_node); all_out_data_nodes.emplace(out_node); @@ -671,6 +671,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra int64_t stream_num = context_.next_stream; vector stream_has_node(stream_num); + for (const NodePtr &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { if (node != nullptr) { auto op_desc = node->GetOpDesc(); diff --git a/ge/graph/build/logical_stream_allocator.h b/ge/graph/build/logical_stream_allocator.h index 280a4104..e09d7cd6 100644 --- a/ge/graph/build/logical_stream_allocator.h +++ b/ge/graph/build/logical_stream_allocator.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt index 65450bba..dda7b9ea 100644 --- a/ge/graph/build/memory/CMakeLists.txt +++ b/ge/graph/build/memory/CMakeLists.txt @@ -1,52 +1,36 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ +set(SRC_LIST + "memory_assigner.cc" + "graph_mem_assigner.cc" + "binary_block_mem_assigner.cc" + "block_mem_assigner.cc" + "hybrid_mem_assigner.cc" + "max_block_mem_assigner.cc" + "var_mem_assign_util.cc" +) -# libge_memosy.a -file(GLOB_RECURSE SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "memory_assigner.cc" - "graph_mem_assigner.cc" - "binary_block_mem_assigner.cc" - "block_mem_assigner.cc" - "hybrid_mem_assigner.cc" - "max_block_mem_assigner.cc" - "var_mem_assign_util.cc" - ) +############ libge_memory.a ############ +add_library(ge_memory STATIC ${SRC_LIST}) -# include directories -include_directories(${CMAKE_CURRENT_LIST_DIR}) -include_directories(${GE_SOURCE_DIR}/ge) -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/metadef/inc) -include_directories(${GE_SOURCE_DIR}/metadef/inc/external) -include_directories(${GE_SOURCE_DIR}/metadef/inc/external/graph) -include_directories(${GE_SOURCE_DIR}/inc/framework) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/proto/ge) +target_compile_options(ge_memory PRIVATE + -Werror + -O2 +) -######### libge_memory.a ############# -add_library(ge_memory STATIC ${SRC_LIST}) -target_compile_definitions(ge_memory PRIVATE - Werror - DAVINCI_CLOUD) -target_link_libraries(ge_memory - graph - ge_common - ${PROTOBUF_LIBRARY} - ${c_sec} - ${slog} - rt - dl) +target_link_libraries(ge_memory PRIVATE + $ + protobuf + c_sec +) + +target_include_directories(ge_memory PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${GE_CODE_DIR}/inc/framework + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index 8668e81e..61dd3462 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "graph/build/memory/binary_block_mem_assigner.h" #include #include "framework/common/debug/ge_log.h" diff --git a/ge/graph/build/memory/binary_block_mem_assigner.h b/ge/graph/build/memory/binary_block_mem_assigner.h index de6cae0d..96a31aac 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.h +++ b/ge/graph/build/memory/binary_block_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc old mode 100644 new mode 100755 index 746f73c2..64d5aa95 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,10 +37,10 @@ #include "omg/omg_inner_types.h" #include "runtime/mem.h" -using std::list; using std::map; -using std::pair; using std::set; +using std::list; +using std::pair; using std::string; using std::stringstream; using std::unordered_map; @@ -168,10 +168,10 @@ void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLi auto it_block = std::max_element(std::begin(block->NoAlignSizeList()), std::end(block->NoAlignSizeList())); auto it_this = std::max_element(std::begin(NoAlignSizeList()), std::end(NoAlignSizeList())); if (it_block != std::end(block->NoAlignSizeList()) && it_this != std::end(NoAlignSizeList())) { - if ((continuous_block_ && block->continuous_block_) || (continuous_block_ && (*it_this < *it_block)) || - (block->continuous_block_ && (*it_this > *it_block))) { - GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d", *it_this, - continuous_block_, *it_block, block->continuous_block_); + if ((continuous_block_ && block->continuous_block_) || + (continuous_block_ && (*it_this < *it_block)) || (block->continuous_block_ && (*it_this > *it_block))) { + GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d", + *it_this, continuous_block_, *it_block, block->continuous_block_); return; } } @@ -189,11 +189,10 @@ void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLi parent->child_blocks_.emplace_back(child); parent->child_offset_ += child->AlignSize(); child->deleted_block_ = true; - GELOGI( - "Add continuous block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" - " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", - child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, - parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); + GELOGI("Add continuous block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" + " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", child, child->block_size_, + child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, parent->block_size_, + parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); } } @@ -221,11 +220,10 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ parent->child_blocks_.emplace_back(child); parent->child_offset_ += child->AlignSize(); child->deleted_block_ = true; - GELOGI( - "Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" - " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", - child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, - parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); + GELOGI("Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" + " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", child, child->block_size_, + child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, parent->block_size_, + parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); } } @@ -262,9 +260,9 @@ size_t MemoryBlock::GetDependLifeBegin(int64_t stream_id, DependStreamLife &tota void AddDependLife(const ge::NodePtr &org_node, const ge::NodePtr &node, int64_t stream_id, std::map &depend_stream_life, DependStreamLife &total_node_depend_stream_life) { - GE_CHECK_NOTNULL_EXEC(node, return ); + GE_CHECK_NOTNULL_EXEC(node, return); auto node_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL_EXEC(node_desc, return ); + GE_CHECK_NOTNULL_EXEC(node_desc, return); auto node_id = node_desc->GetId(); auto stream_life = total_node_depend_stream_life.find(node_id); if (stream_life != total_node_depend_stream_life.end()) { @@ -294,8 +292,8 @@ void AddDependLife(const ge::NodePtr &org_node, const ge::NodePtr &node, int64_t depend_stream_life[peer_node_stream_id] = peer_node_life_time; if (peer_node_stream_id != stream_id) { GELOGI("Node:%s stream id:%ld depend node:%s stream id:%ld index[%d] life time[%zu].", - org_node->GetName().c_str(), stream_id, peer_node_desc->GetName().c_str(), peer_node_stream_id, - peer_out_anchor->GetIdx(), peer_node_life_time); + org_node->GetName().c_str(), stream_id, peer_node_desc->GetName().c_str(), + peer_node_stream_id, peer_out_anchor->GetIdx(), peer_node_life_time); } AddDependLife(org_node, peer_node, stream_id, depend_stream_life, total_node_depend_stream_life); } @@ -360,9 +358,9 @@ Status GetNoAlignSize(const ge::OpDesc &desc, uint32_t index, size_t &size) { // calculate tensor real size auto output_op_desc = desc.GetOutputDescPtr(index); if (output_op_desc == nullptr) { - GELOGI("GetNoAlignSize failed. OpName: %s, OpType: %s, index: %d", desc.GetName().c_str(), desc.GetType().c_str(), - index); - return FAILED; + GELOGI("GetNoAlignSize failed. OpName: %s, OpType: %s, index: %d", + desc.GetName().c_str(), desc.GetType().c_str(), index); + return FAILED; } int64_t tensor_size = 0; GeShape shape = output_op_desc->GetShape(); @@ -398,7 +396,7 @@ string MemoryBlock::String() { for (auto x : NodeTypeIndexList()) { ss << "__node: " << ToString(x) << " "; } - for (const auto &symbol : SymbolList()) { + for (const auto& symbol : SymbolList()) { ss << "__symbol: " << symbol << " "; } return ss.str(); @@ -406,14 +404,12 @@ string MemoryBlock::String() { BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map &anchor_to_symbol, const map> &symbol_to_anchors) - : mem_offset_(0), - compute_graph_(std::move(compute_graph)), - symbol_to_anchors_(symbol_to_anchors), - anchor_to_symbol_(anchor_to_symbol), - life_time_(0) {} + : mem_offset_(0), compute_graph_(std::move(compute_graph)), symbol_to_anchors_(symbol_to_anchors), + anchor_to_symbol_(anchor_to_symbol), life_time_(0) {} BlockMemAssigner::~BlockMemAssigner() { - for (MemoryBlock *memory_block : memory_blocks_) { + GELOGD("blocks_store_ size : %lu", blocks_store_.size()); + for (MemoryBlock *memory_block : blocks_store_) { GE_DELETE_NEW_SINGLE(memory_block); } } @@ -535,16 +531,17 @@ bool CanReuseBySize(const map &reusable_block_counts, const Me string key = std::to_string(reusable_block.Size()); key += "_" + std::to_string(reusable_block.stream_id_); auto it = reusable_block_counts.find(key); - GE_IF_BOOL_EXEC( - (it != reusable_block_counts.end() && (it->second > kReuseMaxCount)) && (reusable_block.Size() > block_size), - can_reuse = true; - GELOGD("Less size mem reuse, reuse block size:%zu, current block size:%zu", reusable_block.Size(), block_size);); + GE_IF_BOOL_EXEC((it != reusable_block_counts.end() && (it->second > kReuseMaxCount)) && + (reusable_block.Size() > block_size), + can_reuse = true; + GELOGD("Less size mem reuse, reuse block size:%zu, current block size:%zu", + reusable_block.Size(), block_size);); } return can_reuse; } bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, - uint32_t &peer_input_index) { + uint32_t &peer_input_index, bool &no_need_assign_memory) { if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { return false; } @@ -571,6 +568,12 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou // If GetBool fail, is_input_continuous is false. (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + + GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), + GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); + no_need_assign_memory = true; + return false;); + if (is_input_continuous) { if (n->GetOwnerComputeGraph() != nullptr) { string graph_name = n->GetOwnerComputeGraph()->GetName(); @@ -598,11 +601,11 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou /// @return void /// void BlockMemAssigner::InitReuseFlag() { - static const std::set kPreReuseTypes = {ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ANN_DATA_TYPE, - ge::NETOUTPUT, ge::PROPOSAL, ge::ZEROSLIKE, - ge::CONSTANT, ge::CONSTANTOP}; - static const std::set kPostReuseTypes = {ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ENTER, - ge::REFENTER, ge::NEXTITERATION, ge::REFNEXTITERATION}; + static const std::set kPreReuseTypes = { ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ANN_DATA_TYPE, + ge::NETOUTPUT, ge::PROPOSAL, ge::ZEROSLIKE, + ge::CONSTANT, ge::CONSTANTOP }; + static const std::set kPostReuseTypes = { ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ENTER, ge::REFENTER, + ge::NEXTITERATION, ge::REFNEXTITERATION }; for (const auto &pair : symbol_to_anchors_) { std::string symbol = pair.first; bool pre_reuse_flag = true; @@ -741,8 +744,8 @@ bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) { if (is_output_continuous) { if (n->GetOwnerComputeGraph() != nullptr) { string graph_name = n->GetOwnerComputeGraph()->GetName(); - GELOGI("%s name[%s] set continuous, output size[%u].", graph_name.c_str(), n->GetName().c_str(), - n->GetAllOutDataAnchorsSize()); + GELOGI("%s name[%s] set continuous, output size[%u].", graph_name.c_str(), + n->GetName().c_str(), n->GetAllOutDataAnchorsSize()); return true; } } @@ -780,8 +783,11 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, (void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env); if (ge_disable_reuse_mem_env != "1") { bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); - is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && - reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); + is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && + !node_op_desc->HasAttr(kOpNoReuseMem) && + reuse_mem_flag && + is_op_reuse_mem && + (IsPreReuse(n, out_index)); auto stream_id = node_op_desc->GetStreamId(); if (is_reuse_memory && !continuous) { for (auto it = reusable_blocks_[stream_id].begin(); it != reusable_blocks_[stream_id].end(); ++it) { @@ -828,6 +834,9 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } } memory_blocks_.emplace_back(block); + // cause memory_blocks_ may reduce when swap after, + // create blocks_store_ to assure blocks deleted finally + blocks_store_.emplace_back(block); return block; } @@ -859,8 +868,8 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec } auto block_size = GetBlockSize(total_size, ranges); - GELOGI("Node[%s] continuous out memory size[%ld] block size[%zu]", node_op_desc->GetName().c_str(), total_size, - block_size); + GELOGI("Node[%s] continuous out memory size[%ld] block size[%zu]", node_op_desc->GetName().c_str(), + total_size, block_size); vector workspace_reuse_flag; block = ApplyMemory(block_size, total_size, total_size, kOutput, n, 0, workspace_reuse_flag, is_op_reuse_mem, true); @@ -885,8 +894,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); } size_t no_align_size = 0; - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, return nullptr, - "Get no align size failed"); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, + return nullptr, "Get no align size failed"); std::string symbol; if (IsSymbolExist(node_index_io, symbol)) { @@ -904,8 +913,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, } auto block_size = GetBlockSize(max_size, ranges); vector workspace_reuse_flag; - block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, workspace_reuse_flag, is_op_reuse_mem, - continuous); + block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, + workspace_reuse_flag, is_op_reuse_mem, continuous); } GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); int out_count_reuse_input = block->ref_count_; @@ -1134,8 +1143,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector // fusion: other type's size not means malloc HBM memory bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; if (l1_flag) { - GELOGI("fusion: node[%s], output[%s], output memory type [%d]", op_desc->GetName().c_str(), - op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); + GELOGI("fusion: node[%s], output[%s], output memory type [%d]", + op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); size = 0; } std::string peer_name; @@ -1143,8 +1152,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector bool out_node_set_continuous_input = false; bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); if (!no_need_assign_memory) { - out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index); - no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input); + out_node_set_continuous_input = + IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, no_need_assign_memory); + GE_IF_BOOL_EXEC(!no_need_assign_memory, + no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);); } no_need_assign_memory = (no_need_assign_memory || IsKnownSubgraphData(node)); if (no_need_assign_memory) { @@ -1228,8 +1239,9 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { continue; } MemoryBlock *mem_block = ApplyMemory(GetBlockSize(static_cast(temp[i]), ranges), - static_cast(temp[i]), static_cast(temp[i]), kWorkspace, n, - static_cast(i), workspace_reuse_flag, is_op_reuse_mem_, false); + static_cast(temp[i]), static_cast(temp[i]), + kWorkspace, n, static_cast(i), workspace_reuse_flag, + is_op_reuse_mem_, false); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block); } @@ -1268,10 +1280,10 @@ void BlockMemAssigner::GetNodeWorkSpaceSize(const NodePtr &node, vector GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node->GetOpDesc() == nullptr, return, "Op desc is null."); vector workspace_byte_nums = node->GetOpDesc()->GetWorkspaceBytes(); - GELOGD("GetNodeWorkSpaceSize: node[%s] size:%zu", node->GetOpDesc()->GetName().c_str(), workspace_byte_nums.size()); + GELOGD("node[%s] size:%zu", node->GetOpDesc()->GetName().c_str(), workspace_byte_nums.size()); for (int64_t byte_size : workspace_byte_nums) { workspace_memory.emplace_back(byte_size); - GELOGD("GetNodeWorkSpaceSize: push back size:%ld", byte_size); + GELOGD("push back size:%ld", byte_size); } } @@ -1297,15 +1309,16 @@ void MergeBlocks(std::vector &dest, std::vector &s } if (dest[i] != nullptr && src[i] != nullptr) { if (!dest[i]->reuse_mem_ || !src[i]->reuse_mem_) { - GELOGD("Diff batch's workspace can't be reused, i: %zu, dest[i]: %s, stream: %ld, src[i]: %s, stream: %ld.", i, - dest[i]->String().c_str(), dest[i]->stream_id_, src[i]->String().c_str(), src[i]->stream_id_); + GELOGD("Diff batch's workspace can't be reused, i: %zu, dest[i]: %s, stream: %ld, src[i]: %s, stream: %ld.", + i, dest[i]->String().c_str(), dest[i]->stream_id_, src[i]->String().c_str(), src[i]->stream_id_); continue; } for (auto &symbol : src[i]->SymbolList()) { dest[i]->AddSymbol(symbol); } for (size_t j = 0; j < src[i]->NodeTypeIndexList().size(); ++j) { - dest[i]->AddNodeTypeIndex(src[i]->NodeTypeIndexList()[j], src[i]->RealSizeList()[j], + dest[i]->AddNodeTypeIndex(src[i]->NodeTypeIndexList()[j], + src[i]->RealSizeList()[j], src[i]->NoAlignSizeList()[j]); src[i]->deleted_block_ = true; } @@ -1489,8 +1502,8 @@ void BlockMemAssigner::ResizeMemoryBlocks() { /// @param [in] real_size memory size in need /// @return Status result /// -void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, size_t real_size, size_t no_align_size, - bool child_block) { +void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, + size_t real_size, size_t no_align_size, bool child_block) { ge::OpDescPtr op_desc = node_type.node->GetOpDesc(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(op_desc == nullptr, return, "op_desc is null."); string graph_name = node_type.node->GetOwnerComputeGraph()->GetName(); @@ -1508,7 +1521,7 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, siz return; } - static const set kSetOffsetTypes = {DATA_TYPE, AIPP_DATA_TYPE, MULTISHAPE, NETOUTPUT}; + static const set kSetOffsetTypes = { DATA_TYPE, AIPP_DATA_TYPE, MULTISHAPE, NETOUTPUT }; if ((kSetOffsetTypes.count(op_desc->GetType()) > 0) && !IsKnownSubgraphData(node_type.node)) { if ((output_list[node_type.index] == kInvalidOffset) || (output_list[node_type.index] < offset)) { output_list.at(node_type.index) = offset; @@ -1516,7 +1529,7 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, siz } else { // fusion: keep the original other type offset value from op_desc bool set_out_offset = (!has_mem_type_attr) || - (memorys_type.size() > node_type.index && memorys_type[node_type.index] != RT_MEMORY_L1); + (memorys_type.size() > node_type.index && memorys_type[node_type.index] != RT_MEMORY_L1); if (set_out_offset) { output_list.at(node_type.index) = offset; } @@ -1531,19 +1544,18 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, siz vector workspace_mem_type; bool has_workspace_mem_type = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_mem_type); // fusion: keep the original other type offset value from op_desc - bool set_workspace_offset = (!has_workspace_mem_type) || (workspace_mem_type.size() > node_type.index && - workspace_mem_type[node_type.index] != RT_MEMORY_L1); + bool set_workspace_offset = (!has_workspace_mem_type) || + (workspace_mem_type.size() > node_type.index && workspace_mem_type[node_type.index] != RT_MEMORY_L1); if (set_workspace_offset) { workspace_list.at(node_type.index) = offset; } op_desc->SetWorkspace(workspace_list); } - GELOGI( - "[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" - " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", - graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, - op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, - block->reuse_mem_, block->continuous_block_, block->deleted_block_, node_type.ref_input); + GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" + " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", graph_name.c_str(), + op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), + block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, block->reuse_mem_, + block->continuous_block_, block->deleted_block_, node_type.ref_input); } void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { @@ -1604,8 +1616,8 @@ Status BlockMemAssigner::Assign() { bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || - (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || - (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || + (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || + (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || (node_type == HVDCALLBACKBROADCAST); } } // namespace ge diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h old mode 100644 new mode 100755 index 7e37fe8e..d1a5e69d --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,7 +106,9 @@ class MemoryBlock { no_align_size_list_.emplace_back(no_align_size); } - void AddSymbol(const std::string &symbol) { symbol_list_.emplace_back(symbol); } + void AddSymbol(const std::string &symbol) { + symbol_list_.emplace_back(symbol); + } const std::vector &NodeTypeIndexList() const { return node_type_index_list_; } const std::vector &SymbolList() const { return symbol_list_; } @@ -170,11 +172,11 @@ class BlockMemAssigner : public MemAssigner { Status Assign() override; - size_t GetMemOffset() const { return mem_offset_; }; + size_t GetMemOffset() const { return mem_offset_; } - int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; }; + int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; } - std::vector GetMemoryBlocks() const { return memory_blocks_; }; + std::vector GetMemoryBlocks() const { return memory_blocks_; } /// /// @ingroup domi @@ -259,6 +261,7 @@ class BlockMemAssigner : public MemAssigner { ge::ComputeGraphPtr compute_graph_; std::vector memory_blocks_; + std::vector blocks_store_; std::vector zero_memory_list_; @@ -309,8 +312,8 @@ class BlockMemAssigner : public MemAssigner { /// @return void /// @author /// - void CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, int64_t stream_id, - MemoryBlock *mem_block); + void CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, + int64_t stream_id, MemoryBlock *mem_block); /// /// @ingroup GE @@ -357,7 +360,7 @@ class BlockMemAssigner : public MemAssigner { bool IsZeroCopyBlock(const NodePtr &node, bool continuous); bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, - uint32_t &peer_input_index); + uint32_t &peer_input_index, bool &no_need_assign_memory); /// /// @ingroup GE diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc old mode 100644 new mode 100755 index 583f65d8..b5f415ed --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,6 +39,33 @@ const size_t kVirtualInputNodeOutputSize = 1; const size_t kVirtualOutputNodeInputSize = 1; const size_t kVirtualNodeDataIndex = 0; const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; +int64_t GetSymbolOutputOffset(const std::map &anchor_to_symbol, + const std::map> &symbol_to_anchors, + const ge::NodePtr &node, const uint32_t i) { + ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut); + auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString()); + if (iter1 == anchor_to_symbol.end()) { + return ge::kInvalidOffset; + } + auto out_symbol = iter1->second; + auto iter2 = symbol_to_anchors.find(out_symbol); + if (iter2 == symbol_to_anchors.end()) { + return ge::kInvalidOffset; + } + for (const auto &node_index_io : iter2->second) { + if (node_index_io.value_ == out_symbol) { + vector output_list = node->GetOpDesc()->GetOutputOffset(); + vector symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset(); + if (node_index_io.index_ >= symbol_output_list.size()) { + return ge::kInvalidOffset; + } + GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i, + output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_)); + return symbol_output_list.at(node_index_io.index_); + } + } + return ge::kInvalidOffset; +} } // namespace namespace ge { Status VariableMemoryAssigner::Assign() { @@ -227,10 +254,8 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, size_t &mem_offse if (mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize()); - ErrorManager::GetInstance().ATCReportErrMessage( - "E19022", {"size", "item", "maxsize"}, - {std::to_string(mem_offset), "featuremap", - std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())}); + ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"size", "item", "maxsize"}, {std::to_string(mem_offset), + "featuremap", std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())}); return ge::FAILED; } return SUCCESS; @@ -295,11 +320,19 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { GELOGE(ge::FAILED, "There is an atomic conflict between the current node and the peer out node, not supported!"); return ge::FAILED; - } else if (is_loop_graph) { - GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start)); - } else { - GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}), - "SetAtomicCleanAttr failed."); + } + + const auto &in_control_anchor = node->GetInControlAnchor(); + GE_CHECK_NOTNULL(in_control_anchor); + for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { + auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); + if (peer_out_node->GetType() == ATOMICADDRCLEAN) { + ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); + if (ret != SUCCESS) { + GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); + return ret; + } + } } } } @@ -370,7 +403,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, vector output_list = peer_op_desc->GetOutputOffset(); std::vector offsets_for_fusion = {}; bool has_offset_attr = - AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); + AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); if (peer_out_data_anchor->GetIdx() < static_cast(output_list.size())) { if (continuous_input_alloc && !has_offset_attr) { if (in_data_anchor->GetIdx() == 0) { @@ -468,10 +501,10 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node } mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; GELOGI( - "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); + "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld].", + node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), + output_list[out_data_anchor->GetIdx()] , out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); } out_op_desc->SetOutputOffset(output_list); return ge::SUCCESS; @@ -526,11 +559,11 @@ Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t mem_offset_reuse += output_mem_size; extra_memory_size = extra_memory_size + out_size - output_mem_size; - GELOGI( - "[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx(), - pre_mem_offset, peer_op_desc->GetStreamId(), out_size, output_mem_size); + GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld].", + node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), + peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size, + output_mem_size); } mem_offset_reuse += extra_memory_size; size_t after_mem_offset = mem_offset_reuse; @@ -586,7 +619,7 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { } string fixed_name = current_node_full_name.substr(0, pos); vector parallel_virtual_input_nodes; - if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) { + if(mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) { parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name]; } parallel_virtual_input_nodes.emplace_back(n); @@ -652,8 +685,8 @@ Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t int64_t out_size; if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != SUCCESS) { - GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", op_desc->GetName().c_str(), - out_data_anchor->GetIdx()); + GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", + op_desc->GetName().c_str(), out_data_anchor->GetIdx()); return FAILED; } @@ -719,7 +752,7 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { } string fixed_name = current_node_full_name.substr(0, pos); vector parallel_virtual_output_nodes; - if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) { + if(mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) { parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name]; } parallel_virtual_output_nodes.emplace_back(n); @@ -813,68 +846,37 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map(memory_offset_[0].mem_offset_); - GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_); - - vector connect_netoutput_nodes; - for (auto &node : compute_graph_->GetAllNodes()) { - auto node_op_desc = node->GetOpDesc(); - if (node_op_desc == nullptr) { - continue; - } - - bool is_atomic = false; - // If GetBool fail, is_atomic is false. - (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); - if (!is_atomic) { - continue; - } - - bool is_ref = false; - // If GetBool fail, is_ref is false. - (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_REFERENCE, is_ref); - if (is_ref) { - GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and ref attribute.", - node_op_desc->GetName().c_str()); - return ge::PARAM_INVALID; - } + map> normal_atomic_and_clean_nodes_map; + vector connecting_output_atomic_nodes; + Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes); + if (status != SUCCESS) { + GELOGE(status, "Failed to filter atomic nodes for memory assignment."); + return status; + } - vector is_connect_netoutput; - // If GetBool fail, attr is_connect_netoutput is an empty vector. - (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); - if (!is_connect_netoutput.empty()) { - connect_netoutput_nodes.emplace_back(node); - continue; - } + for (auto &iter : normal_atomic_and_clean_nodes_map) { + int64_t atomic_mem_start = static_cast(memory_offset_[0].mem_offset_); + GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start); - // Atomic op memory start addr of loop graph - int64_t loop_graph_atomic_mem_start = static_cast(memory_offset_[0].mem_offset_); - vector mem_offset_end; - if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { - GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); - return FAILED; + for (auto &atomic_node : iter.second) { + vector mem_offset_end; + status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end); + if (status != SUCCESS) { + GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.", + atomic_node->GetName().c_str()); + return status; + } } - /// In networks with loop op, atomic op uses atomic_addr_clean op independently, - /// so we need to set the attr separately. - if (is_loop_graph) { - GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, loop_graph_atomic_mem_start)); + int64_t atomic_mem_size = static_cast(memory_offset_[0].mem_offset_) - atomic_mem_start; + status = SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}); + if (status != SUCCESS) { + GELOGE(status, "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); + return status; } } - // In networks without loop op, the same atomic addr clean op is used for atomic op - if (!is_loop_graph) { - // Set the address attr of atomic clean operator - int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; - if (atomic_mem_size != 0) { - GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}), - "SetAtomicCleanAttr failed."); - } - } - - if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) { + if (AssignConnectNetOutputAtomicMemory(connecting_output_atomic_nodes) != SUCCESS) { GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); return FAILED; } @@ -882,6 +884,55 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { return SUCCESS; } +Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(map> &normal_atomic_nodes_map, + vector &connecting_output_atomic_nodes) { + GE_CHECK_NOTNULL(compute_graph_); + for (const auto &node : compute_graph_->GetAllNodes()) { + if (node->GetType() == ATOMICADDRCLEAN) { + vector tmp_normal_atomic_nodes; + const auto &out_control_anchor = node->GetOutControlAnchor(); + GE_CHECK_NOTNULL(out_control_anchor); + for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) { + if (peer_in_control_anchor != nullptr) { + auto peer_in_node = peer_in_control_anchor->GetOwnerNode(); + auto peer_in_node_desc = peer_in_node->GetOpDesc(); + if (peer_in_node_desc != nullptr) { + bool is_atomic_node = false; + // If GetBool fail, is_atomic_node is false. + (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node); + if (is_atomic_node) { + bool is_reference = false; + // If GetBool fail, is_reference is false. + (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference); + if (is_reference) { + GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and is_reference attribute.", + peer_in_node_desc->GetName().c_str()); + return ge::PARAM_INVALID; + } + + vector is_connecting_output; + // If GetBool fail, attr is_connecting_output is an empty vector. + (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output); + if (is_connecting_output.empty()) { + tmp_normal_atomic_nodes.emplace_back(peer_in_node); + continue; + } + connecting_output_atomic_nodes.emplace_back(peer_in_node); + tmp_normal_atomic_nodes.clear(); + break; + } + } + } + } + + if (!tmp_normal_atomic_nodes.empty()) { + normal_atomic_nodes_map[node] = tmp_normal_atomic_nodes; + } + } + } + return SUCCESS; +} + Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, vector &mem_offset_end) { auto node_op_desc = node->GetOpDesc(); @@ -1191,6 +1242,12 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt } Status GraphMemoryAssigner::CheckOffset() { + std::map anchor_to_symbol; + std::map> symbol_to_anchors; + if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str()); + return FAILED; + } for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { GE_CHECK_NOTNULL(node->GetOpDesc()); vector input_list = node->GetOpDesc()->GetInputOffset(); @@ -1200,13 +1257,26 @@ Status GraphMemoryAssigner::CheckOffset() { return FAILED; } } + + bool need_update_output = false; vector output_list = node->GetOpDesc()->GetOutputOffset(); - for (auto output : output_list) { - if (output == ge::kInvalidOffset) { + for (uint32_t i = 0; i < output_list.size(); ++i) { + if (output_list[i] == ge::kInvalidOffset) { GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset); return FAILED; } + if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) { + auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i); + if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) { + output_list[i] = symbol_offset; + need_update_output = true; + } + } + } + if (need_update_output) { + node->GetOpDesc()->SetOutputOffset(output_list); } + vector workspace_list = node->GetOpDesc()->GetWorkspace(); for (auto workspace : workspace_list) { if (workspace == ge::kInvalidOffset) { @@ -1257,8 +1327,8 @@ ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vecto const auto &in_node = NodeUtils::GetParentInput(node); if (NodeUtils::GetConstOpType(in_node, op_type)) { input_list = in_node->GetOpDesc()->GetOutputOffset(); - node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output. - return SUCCESS; // Constant input. + node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output. + return SUCCESS; // Constant input. } // Memory allocated for dynamic shape subgraph Data. @@ -1275,8 +1345,8 @@ ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vecto return FAILED; } - input_list = {parent_inputs[parent_index]}; - node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input. + input_list = { parent_inputs[parent_index] }; + node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input. return SUCCESS; } @@ -1285,6 +1355,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< vector memory_type; auto tmp_op_desc = node->GetOpDesc(); origin_input_list = tmp_op_desc->GetInputOffset(); + int64_t valid_input_index = 0; bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); for (const auto &anchor : node->GetAllInDataAnchors()) { vector output_list; @@ -1298,8 +1369,9 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc(); GE_CHECK_NOTNULL(last_peer_out_op_desc); output_list = last_peer_out_op_desc->GetOutputOffset(); - if (output_list.size() > static_cast(peer_out_anchor->GetIdx())) { - auto input_index = anchor->GetIdx(); + auto out_index = static_cast(peer_out_anchor->GetIdx()); + if (output_list.size() > static_cast(out_index)) { + int64_t input_offset = output_list.at(out_index); if (has_mem_type_attr) { auto input_size = tmp_op_desc->GetInputsSize(); auto ori_input_offset_list_size = origin_input_list.size(); @@ -1313,26 +1385,24 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< } // not hbm keep orignal inputoffest // hbm inputoffset = original inputoffset + outputoffset - input_list.emplace_back(memory_type[input_index] == RT_MEMORY_L1 - ? origin_input_list[input_index] - : origin_input_list[input_index] + output_list.at(peer_out_anchor->GetIdx())); - GELOGI("fuison: node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", - tmp_op_desc->GetName().c_str(), input_index, - peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), - input_list.back()); - } else { - int64_t output_offset = output_list.at(peer_out_anchor->GetIdx()); - const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); - if (in_node->GetType() == CONSTANT) { - GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index); - GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset)); - } - - GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(), - input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), - output_offset); - input_list.emplace_back(output_offset); + input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 ? origin_input_list[valid_input_index] + : origin_input_list[valid_input_index] + output_list.at(out_index)); } + const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); + if (in_node->GetType() == CONSTANT) { + GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast(anchor->GetIdx())); + GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); + } + + GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", + has_mem_type_attr == true ? "Fusion" : "", + tmp_op_desc->GetName().c_str(), + valid_input_index, + peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), + out_index, + input_offset); + input_list.emplace_back(input_offset); + valid_input_index++; } } return ge::SUCCESS; @@ -1427,125 +1497,49 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in return SUCCESS; } -Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) { - // set the address attr of atomic clean operator for loop graph - int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; - GELOGI("SetLoopGraphAtomicAttr beign, atomic_addr_clean start size is %ld, mem_size is %ld, mem_offset is %zu.", - atomic_mem_start, atomic_mem_size, memory_offset_[0].mem_offset_); - const auto &in_control_anchor = node->GetInControlAnchor(); - if (atomic_mem_size != 0 && in_control_anchor != nullptr) { - for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { - if (peer_out_control_anchor == nullptr) { - continue; - } - auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); - auto peer_out_node_desc = peer_out_node->GetOpDesc(); - if (peer_out_node_desc == nullptr) { - continue; - } - - GELOGD("SetLoopGraphAtomicAttr, node is %s, op type is %s.", peer_out_node_desc->GetName().c_str(), - peer_out_node_desc->GetType().c_str()); - - if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { - GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}), - GELOGE(FAILED, "SetAtomicCleanAttr failed."); - return FAILED); - } - } - } - return SUCCESS; -} - -ge::Status GraphMemoryAssigner::IsIndependentAtomicClean(const ge::NodePtr &node, - bool &is_independent_atomic_clean_node) { - GE_CHECK_NOTNULL(node); - const auto &out_control_anchor = node->GetOutControlAnchor(); - GE_CHECK_NOTNULL(out_control_anchor); - for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) { - if (peer_in_control_anchor != nullptr) { - auto peer_in_node = peer_in_control_anchor->GetOwnerNode(); - auto peer_in_node_desc = peer_in_node->GetOpDesc(); - if (peer_in_node_desc != nullptr) { - bool is_atomic_node = false; - // If GetBool fail, is_atomic_node is false. - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node); - if (is_atomic_node) { - vector is_connect_netoutput; - // If GetBool fail, attr is_connect_netoutput is an empty vector. - (void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); - if (!is_connect_netoutput.empty()) { - GELOGD("Peer in node %s is independent atomic clean node", peer_in_node->GetName().c_str()); - is_independent_atomic_clean_node = true; - break; - } - } - } - } - } - - return SUCCESS; -} - -ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector &atomic_mem_start, +ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector &atomic_mem_start, const vector &atomic_mem_size) { - for (ge::NodePtr &node : compute_graph_->GetAllNodes()) { - auto node_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); - - bool is_valid_atomic_clean_node = (n != nullptr) && (node->GetName() == n->GetName()); - - if (((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) { - bool is_independent_atomic_clean = false; - if (IsIndependentAtomicClean(node, is_independent_atomic_clean) != SUCCESS) { - GELOGE(FAILED, "Failed to determine the connection relationship of atomic addr clean node."); - return PARAM_INVALID; - } - - is_valid_atomic_clean_node = is_valid_atomic_clean_node || (!is_independent_atomic_clean); + auto node_op_desc = node->GetOpDesc(); + if (node_op_desc != nullptr) { + GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); + vector workspace_vector = node_op_desc->GetWorkspace(); + vector workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); + workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); + workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); + node_op_desc->SetWorkspace(workspace_vector); + node_op_desc->SetWorkspaceBytes(workspace_byte_vector); + + std::vector mem_start_vector; + // If GetListInt fail, mem_start_vector is empty. + (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); + mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), + GELOGE(FAILED, "SetListInt failed."); + return FAILED); + + std::vector mem_size_vector; + // If GetListInt fail, mem_size_vector is empty. + (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); + mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), + GELOGE(FAILED, "SetListInt failed."); + return FAILED); + + std::stringstream ss; + for (auto iter : atomic_mem_start) { + ss << iter << " "; } - - if (is_valid_atomic_clean_node) { - GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); - vector workspace_vector = node_op_desc->GetWorkspace(); - vector workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); - workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); - workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); - node_op_desc->SetWorkspace(workspace_vector); - node_op_desc->SetWorkspaceBytes(workspace_byte_vector); - - std::vector mem_start_vector; - // If GetListInt fail, mem_start_vector is empty. - (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); - mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), - GELOGE(FAILED, "SetListInt failed."); - return FAILED); - - std::vector mem_size_vector; - // If GetListInt fail, mem_size_vector is empty. - (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); - mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), - GELOGE(FAILED, "SetListInt failed."); - return FAILED); - - std::stringstream ss; - for (auto iter : atomic_mem_start) { - ss << iter << " "; - } - string atomic_mem_start_str = ss.str(); - ss.clear(); - ss.str(""); - for (auto iter : atomic_mem_size) { - ss << iter << " "; - } - string atomic_mem_size_str = ss.str(); - - GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", - node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), - atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); + string atomic_mem_start_str = ss.str(); + ss.clear(); + ss.str(""); + for (auto iter : atomic_mem_size) { + ss << iter << " "; } + string atomic_mem_size_str = ss.str(); + + GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", + node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), + atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); } return SUCCESS; } diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h old mode 100644 new mode 100755 index e1e408be..3864a967 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,10 +71,12 @@ using VariableMemoryAssignerPtr = std::shared_ptr; using BlockMemAssignerPtr = std::shared_ptr; using HybridMemAssignerPtr = std::shared_ptr; + class GraphMemoryAssigner { public: explicit GraphMemoryAssigner(ge::ComputeGraphPtr compute_graph) - : compute_graph_(std::move(compute_graph)), mem_assigner_(nullptr) {} + : compute_graph_(std::move(compute_graph)), + mem_assigner_(nullptr) {} GraphMemoryAssigner(const GraphMemoryAssigner &) = delete; @@ -127,16 +129,19 @@ class GraphMemoryAssigner { ge::Status ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, int32_t mem_reuse_model); - ge::Status GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, int32_t mem_reuse_model, - string &max_batch_label); + ge::Status GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, + int32_t mem_reuse_model, string &max_batch_label); ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size); ge::Status ReAssignAtomicMemory(bool is_loop_graph); - ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size); + ge::Status FilterAtomicNodesForMemoryAssign(std::map> &normal_atomic_nodes_map, + std::vector &connecting_output_atomic_nodes); + + ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, + int64_t &continuous_mem_start, int64_t &continuous_mem_size); ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node); @@ -165,14 +170,8 @@ class GraphMemoryAssigner { ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, const std::vector &mem_offset_end); - /// - /// @brief set loop graph atomic attr - /// @param node, atomic memory assignment start offset - /// @param atomic_mem_start: atomic op memory start address - /// - ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start); - ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector &atomic_mem_start, + ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector &atomic_mem_start, const std::vector &atomic_mem_size); ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); diff --git a/ge/graph/build/memory/hybrid_mem_assigner.cc b/ge/graph/build/memory/hybrid_mem_assigner.cc old mode 100644 new mode 100755 index a75487de..6538b0f2 --- a/ge/graph/build/memory/hybrid_mem_assigner.cc +++ b/ge/graph/build/memory/hybrid_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,12 +46,12 @@ Status HybridMemAssigner::Assign() { return FAILED; } - std::unique_ptr binary_assigner( - new (std::nothrow) BinaryBlockMemAssigner(compute_graph_, anchor_to_symbol_, symbol_to_anchors_)); + std::unique_ptr binary_assigner(new (std::nothrow) BinaryBlockMemAssigner( + compute_graph_, anchor_to_symbol_, symbol_to_anchors_)); GE_CHECK_NOTNULL(binary_assigner); - std::unique_ptr max_assigner( - new (std::nothrow) MaxBlockMemAssigner(compute_graph_, anchor_to_symbol_, symbol_to_anchors_)); + std::unique_ptr max_assigner(new (std::nothrow) MaxBlockMemAssigner( + compute_graph_, anchor_to_symbol_, symbol_to_anchors_)); GE_CHECK_NOTNULL(max_assigner); size_t bin_mem_size = 0; diff --git a/ge/graph/build/memory/hybrid_mem_assigner.h b/ge/graph/build/memory/hybrid_mem_assigner.h old mode 100644 new mode 100755 index fba70a59..6673c0ef --- a/ge/graph/build/memory/hybrid_mem_assigner.h +++ b/ge/graph/build/memory/hybrid_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/max_block_mem_assigner.cc b/ge/graph/build/memory/max_block_mem_assigner.cc index db6befeb..15edae3d 100644 --- a/ge/graph/build/memory/max_block_mem_assigner.cc +++ b/ge/graph/build/memory/max_block_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/max_block_mem_assigner.h b/ge/graph/build/memory/max_block_mem_assigner.h index f5626ebf..c4d67953 100644 --- a/ge/graph/build/memory/max_block_mem_assigner.h +++ b/ge/graph/build/memory/max_block_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/mem_assigner.h b/ge/graph/build/memory/mem_assigner.h old mode 100644 new mode 100755 index b1cb4627..7d0252d9 --- a/ge/graph/build/memory/mem_assigner.h +++ b/ge/graph/build/memory/mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/memory_assigner.cc b/ge/graph/build/memory/memory_assigner.cc old mode 100644 new mode 100755 index e36f082e..91051edc --- a/ge/graph/build/memory/memory_assigner.cc +++ b/ge/graph/build/memory/memory_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/module.mk b/ge/graph/build/memory/module.mk old mode 100644 new mode 100755 diff --git a/ge/graph/build/memory/var_mem_assign_util.cc b/ge/graph/build/memory/var_mem_assign_util.cc old mode 100644 new mode 100755 index a352cf65..639bfaa0 --- a/ge/graph/build/memory/var_mem_assign_util.cc +++ b/ge/graph/build/memory/var_mem_assign_util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -62,17 +62,17 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr GE_CHECK_NOTNULL(tensor_desc); if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) { GE_CHK_STATUS_RET( - VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); + VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); GE_IF_BOOL_EXEC(n->GetType() == VARIABLE, GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID()))); GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) - ->SetAllocatedGraphId(node_name, compute_graph->GetGraphID())); + ->SetAllocatedGraphId(node_name, compute_graph->GetGraphID())); } uint8_t *dev_ptr = nullptr; rtMemType_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET( - VarManager::Instance(compute_graph->GetSessionID())->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); + GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) + ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); vector output_list = n->GetOpDesc()->GetOutputOffset(); GE_IF_BOOL_EXEC(output_list.empty(), return FAILED); output_list[0] = static_cast(reinterpret_cast(dev_ptr)); @@ -90,9 +90,9 @@ Status VarMemAssignUtil::AssignData2Fp32Var(const ge::NodePtr &node, uint64_t se rtMemType_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetCurVarDesc(src_var_name, cur_tensor_desc)); GE_CHK_STATUS_RET( - VarManager::Instance(session_id)->GetVarAddr(src_var_name, cur_tensor_desc, &dev_ptr, memory_type)); + VarManager::Instance(session_id)->GetVarAddr(src_var_name, cur_tensor_desc, &dev_ptr, memory_type)); GE_CHK_STATUS_RET( - VarManager::Instance(session_id)->SetVarAddr(node->GetName(), cur_tensor_desc, dev_ptr, memory_type)); + VarManager::Instance(session_id)->SetVarAddr(node->GetName(), cur_tensor_desc, dev_ptr, memory_type)); } return SUCCESS; } @@ -122,7 +122,7 @@ Status VarMemAssignUtil::SetOutVariableAttr(const ge::NodePtr &node, const ge::N GeTensorDesc var_tensor_desc = var_node->GetOpDesc()->GetOutputDesc(0); rtMemType_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET( - VarManager::Instance(session_id)->GetVarAddr(var_node->GetName(), var_tensor_desc, &dev_ptr, memory_type)); + VarManager::Instance(session_id)->GetVarAddr(var_node->GetName(), var_tensor_desc, &dev_ptr, memory_type)); int out_list_size = static_cast(output_list.size()); GE_CHK_BOOL_RET_STATUS(index < out_list_size, FAILED, "index %d >= output_list.size() %d", index, out_list_size); @@ -171,7 +171,7 @@ Status VarMemAssignUtil::DealBroadCastNode(uint32_t graph_id, const ge::NodePtr "Get broadcast op %s input tensor desc size [%zu] < idx [%d]", node->GetName().c_str(), input_tensor_desc_ptr_vistor.size(), broad_cast_info.idx); const ge::GeTensorDescPtr input_tensor_desc = - input_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); + input_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); int64_t input_size = 0; GE_CHK_STATUS(TensorUtils::GetSize(*input_tensor_desc, input_size), "get input size failed."); broad_cast_info.input_size = input_size; @@ -190,7 +190,7 @@ Status VarMemAssignUtil::DealBroadCastNode(uint32_t graph_id, const ge::NodePtr "Get broadcast op %s output tensor desc size [%zu] < idx [%d]", node->GetName().c_str(), output_tensor_desc_ptr_vistor.size(), broad_cast_info.idx); const ge::GeTensorDescPtr output_tensor_desc = - output_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); + output_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); int64_t output_size = 0; GE_CHK_STATUS(TensorUtils::GetSize(*output_tensor_desc, output_size), "get input size failed."); broad_cast_info.output_size = output_size; @@ -220,7 +220,7 @@ Status VarMemAssignUtil::DealVariableNode(uint32_t graph_id, const ge::NodePtr & } auto dst_type = dst_node->GetType(); bool is_trans_node = - (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || (dst_type == PERMUTE); + (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || (dst_type == PERMUTE); if (is_trans_node) { NodePtr final_trans_node = GetFinalTransNode(dst_node); GE_CHK_STATUS_RET(DealTransNode(final_trans_node)); @@ -238,7 +238,7 @@ ge::NodePtr VarMemAssignUtil::GetFinalTransNode(const ge::NodePtr &trans_node) { NodePtr dst_node = dst_in_anchor->GetOwnerNode(); auto dst_type = dst_node->GetType(); bool is_trans_node = - (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || (dst_type == PERMUTE); + (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || (dst_type == PERMUTE); if (is_trans_node && (dst_in_anchor->GetIdx() == 0)) { final_ref_node = GetFinalTransNode(dst_node); } @@ -319,11 +319,11 @@ Status VarMemAssignUtil::AssignData2VarRef(const ge::NodePtr &has_ref_attr_node, ge::NodePtr var_ref_src_var = root_graph->FindNode(src_var_name); if (var_ref_src_var == nullptr) { for (auto sub_graph : root_graph->GetAllSubgraphs()) { - auto node_ptr = sub_graph->FindNode(src_var_name); - if (node_ptr != nullptr) { - var_ref_src_var = node_ptr; - break; - } + auto node_ptr = sub_graph->FindNode(src_var_name); + if (node_ptr != nullptr) { + var_ref_src_var = node_ptr; + break; + } } } GE_IF_BOOL_EXEC(var_ref_src_var == nullptr || var_ref_src_var->GetOpDesc() == nullptr, return FAILED); diff --git a/ge/graph/build/memory/var_mem_assign_util.h b/ge/graph/build/memory/var_mem_assign_util.h index cb38af29..f0e6270d 100644 --- a/ge/graph/build/memory/var_mem_assign_util.h +++ b/ge/graph/build/memory/var_mem_assign_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ class VarMemAssignUtil { static Status AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_graph); static Status AssignVarAttr2Nodes(ge::ComputeGraphPtr &compute_graph); static Status AssignMemory2HasRefAttrNode(ge::ComputeGraphPtr &compute_graph); + static Status AssignData2Fp32Var(const ge::NodePtr &node, uint64_t session_id); private: static Status AssignMemory2VariableNode(ge::ComputeGraphPtr &compute_graph); @@ -40,7 +41,6 @@ class VarMemAssignUtil { static Status DealBroadCastNode(uint32_t graph_id, const ge::NodePtr &node, const ge::InDataAnchorPtr &in_data_anchor, const ge::NodePtr &var_node, uint64_t session_id); - static Status AssignData2Fp32Var(const ge::NodePtr &node, uint64_t session_id); static ge::NodePtr GetFinalTransNode(const ge::NodePtr &ref_node); diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc old mode 100644 new mode 100755 index 9a37478d..9c2e4836 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "graph/build/model_builder.h" #include #include @@ -145,7 +144,7 @@ Status ModelBuilder::CalcOutputSize(const ge::NodePtr &n) { bool ModelBuilder::SetInputConst(const OpDescPtr &op_desc, const NodePtr &src_node, size_t index, vector &is_input_const) { - GELOGI("SetIsInputConst const: %s", op_desc->GetName().c_str()); + GELOGI("SetIsInputConst const: %s, source node: %s", op_desc->GetName().c_str(), src_node->GetName().c_str()); for (size_t i = is_input_const.size(); i <= index; ++i) { is_input_const.push_back(false); } @@ -153,7 +152,7 @@ bool ModelBuilder::SetInputConst(const OpDescPtr &op_desc, const NodePtr &src_no vector weights = OpDescUtils::MutableWeights(src_node); if (weights.empty()) { - GELOGW("SetInputIsConst weights is empty"); + GELOGW("SetInputIsConst weights is empty, node: %s", src_node->GetName().c_str()); return false; } GeTensorPtr weight = weights[0]; @@ -192,6 +191,7 @@ void ModelBuilder::SetInputIsConst(const ge::NodePtr &n) { GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); const auto &src_node = peer_out_anchor->GetOwnerNode(); if (!NodeUtils::GetConstOpType(src_node, const_type)) { + GELOGI("Node %s:%zu, sorce node: %s Not Const", n->GetName().c_str(), index, src_node->GetName().c_str()); continue; } @@ -532,8 +532,8 @@ Status ModelBuilder::MergeWeights() { if (weight_data.data() != nullptr) { GE_IF_BOOL_EXEC(base_addr == nullptr, GELOGE(FAILED, "Base addr is nullptr."); return FAILED); if (weight_offset_ - offset < weight_data.size()) { - GELOGE(FAILED, "left weight size not enough. left_size:%lu, weight_size:%lu", weight_offset_ - offset, - weight_data.size()); + GELOGE(FAILED, "left weight size not enough. left_size:%lu, weight_size:%lu", + weight_offset_ - offset, weight_data.size()); return FAILED; } uintptr_t dst_ptr = reinterpret_cast(base_addr) + offset; @@ -543,8 +543,7 @@ Status ModelBuilder::MergeWeights() { auto err = memcpy_s(reinterpret_cast(dst_ptr), SECUREC_MEM_MAX_LEN, reinterpret_cast(src_ptr), SECUREC_MEM_MAX_LEN); if (err != EOK) { - GELOGE(FAILED, - "mem copy failed. errret:%u, " + GELOGE(FAILED, "mem copy failed. errret:%u, " "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu", err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN); return FAILED; @@ -555,8 +554,7 @@ Status ModelBuilder::MergeWeights() { } auto err = memcpy_s(reinterpret_cast(dst_ptr), left_size, reinterpret_cast(src_ptr), left_size); if (err != EOK) { - GELOGE(FAILED, - "mem copy failed. errret:%u, " + GELOGE(FAILED, "mem copy failed. errret:%u, " "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu", err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN); return FAILED; @@ -582,8 +580,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { if (tbe_kernel == nullptr) { std::string kernel_name; GeAttrValue::BYTES kernel_buffer; - (void)AttrUtils::GetStr(node_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name); - (void)AttrUtils::GetBytes(node_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer); + (void) AttrUtils::GetStr(node_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name); + (void) AttrUtils::GetBytes(node_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer); if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) { GE_CHECK_NOTNULL(kernel_buffer.GetData()); std::vector data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize()); @@ -604,7 +602,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); CustAICPUKernelPtr cust_aicpu_kernel = - node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); + node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); GE_IF_BOOL_EXEC(cust_aicpu_kernel == nullptr, continue); if (aicpu_name_set.count(cust_aicpu_kernel->GetName()) > 0) { GELOGE(FAILED, "aicpu_kernel name %s can't be the same", cust_aicpu_kernel->GetName().c_str()); diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index e54d6695..04827c30 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/run_context.cc b/ge/graph/build/run_context.cc index cece31ea..10da061c 100644 --- a/ge/graph/build/run_context.cc +++ b/ge/graph/build/run_context.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "graph/build/run_context.h" #include "common/util.h" @@ -167,8 +166,9 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra GELOGI("CreateRunContext: data_mem_base_ = %p, weight_mem_base_ = %p, memory_size = %lu, weight_size = %lu", data_mem_base_, weight_mem_base_, data_mem_size_, weight_mem_size_); - run_context_ = {rt_model_, nullptr, session_id, data_mem_size_, data_mem_base_, weight_mem_size_, - weight_mem_base_, buffer, stream_list_, event_list_, label_list_}; + run_context_ = {rt_model_, nullptr, session_id, data_mem_size_, data_mem_base_, + weight_mem_size_, weight_mem_base_, buffer, stream_list_, event_list_, + label_list_}; return SUCCESS; } diff --git a/ge/graph/build/run_context.h b/ge/graph/build/run_context.h old mode 100644 new mode 100755 index 5b24f343..0190f134 --- a/ge/graph/build/run_context.h +++ b/ge/graph/build/run_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index bcfea1d8..3aba8fd1 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -221,9 +221,9 @@ Status StreamAllocator::AssignSingleStream() { huge_streams_.emplace_back(huge_stream); } else { GELOGW( - "The estimated task count %ld is greater than the max count of normal stream," - " but the huge stream is not supported.", - task_count); + "The estimated task count %ld is greater than the max count of normal stream," + " but the huge stream is not supported.", + task_count); } } @@ -742,9 +742,9 @@ Status StreamAllocator::SplitStreams(vector> &split_streams) { if (NeedSpiltNewStream(stream_node_num_vec[stream_id], max_node_num_one_stream, op_desc, is_stream_first_node)) { last_stream_id++; GELOGI( - "stream_node_num_vec[%ld]= %ld > max_node_num_one_stream : %ld, " - "It's time to split the stream, split newly-added stream id is %ld", - stream_id, stream_node_num_vec[stream_id], max_node_num_one_stream, last_stream_id); + "stream_node_num_vec[%ld]= %ld > max_node_num_one_stream : %ld, " + "It's time to split the stream, split newly-added stream id is %ld", + stream_id, stream_node_num_vec[stream_id], max_node_num_one_stream, last_stream_id); NodePtr pre_node = pre_node_vec[stream_id]; stream_node_num_vec[stream_id] = 0; AddNodeNum(cur_node, stream_node_num_vec[stream_id]); @@ -770,8 +770,8 @@ Status StreamAllocator::SplitStreams(vector> &split_streams) { cur_continuous_stream_label.c_str()); auto iter = std::find(stream_2_nodes_map[stream_id].begin(), stream_2_nodes_map[stream_id].end(), not_cur); GE_RETURN_WITH_LOG_IF_FALSE( - (iter != stream_2_nodes_map[stream_id].end()) && (iter != stream_2_nodes_map[stream_id].begin()), - "split stream with continuous stream label %s failed", cur_continuous_stream_label.c_str()); + (iter != stream_2_nodes_map[stream_id].end()) && (iter != stream_2_nodes_map[stream_id].begin()), + "split stream with continuous stream label %s failed", cur_continuous_stream_label.c_str()); iter--; pre_node = *iter; } diff --git a/ge/graph/build/stream_allocator.h b/ge/graph/build/stream_allocator.h index 0158e6b0..a21b2f77 100644 --- a/ge/graph/build/stream_allocator.h +++ b/ge/graph/build/stream_allocator.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/stream_graph_optimizer.cc b/ge/graph/build/stream_graph_optimizer.cc index 49ecc674..21625a1e 100644 --- a/ge/graph/build/stream_graph_optimizer.cc +++ b/ge/graph/build/stream_graph_optimizer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "stream_graph_optimizer.h" #include "common/util.h" #include "framework/common/debug/ge_log.h" diff --git a/ge/graph/build/stream_graph_optimizer.h b/ge/graph/build/stream_graph_optimizer.h index 3133d32d..b0eea135 100644 --- a/ge/graph/build/stream_graph_optimizer.h +++ b/ge/graph/build/stream_graph_optimizer.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc old mode 100644 new mode 100755 index 58a8bf7b..225ddb88 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -126,13 +126,13 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t Status TaskGenerator::AddModelTaskToModel(const ModelTaskDef &model_task_def, uint64_t session_id, ge::Model &model, RunContext &run_context) { GE_CHK_BOOL_EXEC( - AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_BASE_ADDR, reinterpret_cast(run_context.dataMemBase)), - GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_BASE_ADDR failed."); - return FAILED); + AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_BASE_ADDR, reinterpret_cast(run_context.dataMemBase)), + GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_BASE_ADDR failed."); + return FAILED); GE_CHK_BOOL_EXEC( - AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, reinterpret_cast(run_context.weightMemBase)), - GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_WEIGHT_ADDR failed."); - return FAILED); + AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, reinterpret_cast(run_context.weightMemBase)), + GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_WEIGHT_ADDR failed."); + return FAILED); GE_CHK_BOOL_EXEC(AttrUtils::SetInt(model, ATTR_MODEL_TASK_GEN_VAR_ADDR, reinterpret_cast(var_mem_base_)), GELOGE(FAILED, "SetInt ATTR_MODEL_TASK_GEN_VAR_ADDR failed."); return FAILED); @@ -292,8 +292,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra // For fusion ddb pass, task def must be continuous. // Part2: Call auto fusion_task_info = - FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, - ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; + FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, + ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); // continue directly @@ -460,10 +460,10 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info } GELOGI( - "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" - " task finished, generate %u task(s).", - op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, - task_list_size_after - task_list_size_before); + "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" + " task finished, generate %u task(s).", + op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, + task_list_size_after - task_list_size_before); // record nodes which have call generate task successfully fusion_nodes_seen.insert(fusion_node.get()); @@ -558,7 +558,7 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { Status TaskGenerator::MarkFirstAndLastOps(const vector &ops, bool is_single_stream) const { vector> continuous_op_lists(1); const set separator_types( - {LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX, STREAMSWITCH, STREAMSWITCHN}); + {LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX, STREAMSWITCH, STREAMSWITCHN}); for (auto &op_desc : ops) { bool attr_notask = false; if (ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_notask) && attr_notask) { @@ -681,7 +681,8 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP } if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGN) { profiling_point.end_index.insert(current_idx); - GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", op_desc->GetName().c_str(), current_idx); + GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", + op_desc->GetName().c_str(), current_idx); } } else { if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { @@ -777,7 +778,8 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin } if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGN) { profiling_point.end_index.insert(current_idx); - GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", op_desc->GetName().c_str(), current_idx); + GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", + op_desc->GetName().c_str(), current_idx); } } else { if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { @@ -803,10 +805,11 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, vector &all_reduce_nodes, std::string &fp_point_str, std::string &bp_point_str) const { + if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_FPPONIT_OPTIONS, fp_point_str) == SUCCESS && ge::GetContext().GetOption(OPTION_EXEC_PROFILING_BPPONIT_OPTIONS, bp_point_str) == SUCCESS && !fp_point_str.empty() && !bp_point_str.empty()) { - return SUCCESS; + return SUCCESS; } Status ret = SUCCESS; @@ -886,6 +889,7 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi return SUCCESS; } + Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, vector &task_def_list) { diff --git a/ge/graph/build/task_generator.h b/ge/graph/build/task_generator.h old mode 100644 new mode 100755 index 0d482afe..c93b2007 --- a/ge/graph/build/task_generator.h +++ b/ge/graph/build/task_generator.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -120,7 +120,7 @@ class TaskGenerator { vector &all_reduce_nodes) const; Status GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, vector &all_reduce_nodes, - std::string &fp_point_str, std::string &bp_point_str) const; + std::string& fp_point_str, std::string& bp_point_str) const; Status FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, std::vector &all_reduce_nodes) const; diff --git a/ge/graph/common/ge_call_wrapper.h b/ge/graph/common/ge_call_wrapper.h index 305c6c15..55a93951 100644 --- a/ge/graph/common/ge_call_wrapper.h +++ b/ge/graph/common/ge_call_wrapper.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,43 +13,42 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GE_CALL_WRAPPER_H_ #define GE_GE_CALL_WRAPPER_H_ #include "framework/common/debug/ge_log.h" /*lint --emacro((773),GE_TIMESTAMP_START)*/ /*lint -esym(773,GE_TIMESTAMP_START)*/ -#define GE_TIMESTAMP_START(stage) uint64_t startUsec_##stage = ge::GetCurrentTimestap() +#define GE_TIMESTAMP_START(stage) uint64_t startUsec_##stage = ge::GetCurrentTimestamp() -#define GE_TIMESTAMP_END(stage, stage_name) \ - do { \ - uint64_t endUsec_##stage = ge::GetCurrentTimestap(); \ - GELOGI("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \ - (endUsec_##stage - startUsec_##stage)); \ +#define GE_TIMESTAMP_END(stage, stage_name) \ + do { \ + uint64_t endUsec_##stage = ge::GetCurrentTimestamp(); \ + GELOGI("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \ + (endUsec_##stage - startUsec_##stage)); \ } while (0); #define GE_TIMESTAMP_EVENT_END(stage, stage_name) \ do { \ - uint64_t endUsec_##stage = ge::GetCurrentTimestap(); \ + uint64_t endUsec_##stage = ge::GetCurrentTimestamp(); \ GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \ (endUsec_##stage - startUsec_##stage)); \ } while (0); #define GE_TIMESTAMP_CALLNUM_START(stage) \ - uint64_t startUsec_##stage = ge::GetCurrentTimestap(); \ + uint64_t startUsec_##stage = ge::GetCurrentTimestamp(); \ uint64_t call_num_of##stage = 0; \ uint64_t time_of##stage = 0 -#define GE_TIMESTAMP_RESTART(stage) (startUsec_##stage = ge::GetCurrentTimestap()) +#define GE_TIMESTAMP_RESTART(stage) (startUsec_##stage = ge::GetCurrentTimestamp()) #define GE_TIMESTAMP_ADD(stage) \ - time_of##stage += ge::GetCurrentTimestap() - startUsec_##stage; \ + time_of##stage += ge::GetCurrentTimestamp() - startUsec_##stage; \ call_num_of##stage++ #define GE_TIMESTAMP_CALLNUM_END(stage, stage_name) \ GELOGI("[GEPERFTRACE] The time cost of %s is [%lu] micro second, call num is %lu", (stage_name), time_of##stage, \ - call_num_of##stage) + call_num_of##stage) #define GE_TIMESTAMP_CALLNUM_EVENT_END(stage, stage_name) \ GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second, call num is %lu", (stage_name), time_of##stage, \ @@ -66,11 +65,11 @@ } \ } while (0) -#define RUN_WITH_PERF_TIMESTAMP_NAME(var_name, prefix, func, ...) \ +#define RUN_WITH_PERF_TIMESTAMP_NAME(var_name, prefix, func, ...) \ do { \ GE_TIMESTAMP_START(var_name); \ auto ret_inner_macro = func(__VA_ARGS__); \ - GE_TIMESTAMP_EVENT_END(var_name, #prefix "::" #func) \ + GE_TIMESTAMP_EVENT_END(var_name, #prefix "::" #func) \ if (ret_inner_macro != ge::SUCCESS) { \ GELOGE(ret_inner_macro, "Failed to process " #prefix "_" #func); \ return ret_inner_macro; \ diff --git a/ge/graph/common/local_context.cc b/ge/graph/common/local_context.cc index 43d3bc7c..d3e66861 100644 --- a/ge/graph/common/local_context.cc +++ b/ge/graph/common/local_context.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,9 @@ namespace { thread_local OmgContext *omg_context = nullptr; } -void SetLocalOmgContext(OmgContext &context) { omg_context = &context; } +void SetLocalOmgContext(OmgContext &context) { + omg_context = &context; +} OmgContext &GetLocalOmgContext() { if (omg_context != nullptr) { @@ -35,4 +37,4 @@ OmgContext &GetLocalOmgContext() { return domi::GetContext(); } } -} // namespace ge +} diff --git a/ge/graph/common/local_context.h b/ge/graph/common/local_context.h index 1cdd2ca1..83367766 100644 --- a/ge/graph/common/local_context.h +++ b/ge/graph/common/local_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/common/transop_util.cc b/ge/graph/common/transop_util.cc index eb80fb69..684ef3dc 100644 --- a/ge/graph/common/transop_util.cc +++ b/ge/graph/common/transop_util.cc @@ -28,8 +28,9 @@ std::map precision_loss_transfer_map = {{ge::DT_FLOA namespace ge { TransOpUtil::TransOpUtil() { - transop_index_map_ = {{TRANSDATA, 0}, {TRANSPOSE, 0}, {TRANSPOSED, 0}, {RESHAPE, 0}, - {REFORMAT, 0}, {CAST, 0}, {SQUEEZE, 0}, {EXPANDDIMS, 0}}; + transop_index_map_ = {{TRANSDATA, 0}, {TRANSPOSE, 0}, {TRANSPOSED, 0}, + {RESHAPE, 0}, {REFORMAT, 0}, {CAST, 0}, + {SQUEEZE, 0}, {EXPANDDIMS, 0}}; } TransOpUtil::~TransOpUtil() {} @@ -74,7 +75,8 @@ bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) { auto iter = precision_loss_transfer_map.find(src_dtype); if (iter != precision_loss_transfer_map.end() && iter->second == dst_dtype) { GELOGW("Node %s transfer data type from %s to %s ,it will cause precision loss. ignore pass.", - src_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(src_dtype).c_str(), + src_node->GetName().c_str(), + TypeUtils::DataTypeToSerialString(src_dtype).c_str(), TypeUtils::DataTypeToSerialString(dst_dtype).c_str()); return false; } diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc old mode 100644 new mode 100755 index 25208aa4..052d20a0 --- a/ge/graph/execute/graph_execute.cc +++ b/ge/graph/execute/graph_execute.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -293,8 +293,8 @@ Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vectorGetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats); + model_manager->GetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats); if (ret != SUCCESS) { GELOGE(ret, "GetInputOutputDescInfoForZeroCopy failed."); return ret; @@ -592,7 +592,17 @@ Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigI GELOGW("GetAIPPInfo is not success."); return ret; } + return SUCCESS; +} +Status GraphExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { + auto model_manager = ge::ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->GetAippType(model_id, index, type, aipp_index); + if (ret != SUCCESS) { + GELOGW("Get aipp type is not success."); + return ret; + } return SUCCESS; } diff --git a/ge/graph/execute/graph_execute.h b/ge/graph/execute/graph_execute.h old mode 100644 new mode 100755 index 5cf39bae..efc30743 --- a/ge/graph/execute/graph_execute.h +++ b/ge/graph/execute/graph_execute.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -75,6 +75,8 @@ class GraphExecutor { static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); + /// /// @ingroup ge /// @brief Get dynamic batch_info diff --git a/ge/graph/label/case_label_maker.cc b/ge/graph/label/case_label_maker.cc index 88b7ee8b..ea4b2a03 100644 --- a/ge/graph/label/case_label_maker.cc +++ b/ge/graph/label/case_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "case_label_maker.h" #include "common/util.h" diff --git a/ge/graph/label/case_label_maker.h b/ge/graph/label/case_label_maker.h index 2e3b584b..1078a906 100644 --- a/ge/graph/label/case_label_maker.h +++ b/ge/graph/label/case_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/if_label_maker.cc b/ge/graph/label/if_label_maker.cc index 62722e7c..d07f7984 100644 --- a/ge/graph/label/if_label_maker.cc +++ b/ge/graph/label/if_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "if_label_maker.h" #include "common/util.h" diff --git a/ge/graph/label/if_label_maker.h b/ge/graph/label/if_label_maker.h index 9ffe8fca..0807f549 100644 --- a/ge/graph/label/if_label_maker.h +++ b/ge/graph/label/if_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/label_maker.cc b/ge/graph/label/label_maker.cc index 88b90199..3f643fb2 100644 --- a/ge/graph/label/label_maker.cc +++ b/ge/graph/label/label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/label_maker.h b/ge/graph/label/label_maker.h index 759bf5cf..847c7904 100644 --- a/ge/graph/label/label_maker.h +++ b/ge/graph/label/label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/label_maker_factory.h b/ge/graph/label/label_maker_factory.h index 0a87ec66..e0a48c6b 100644 --- a/ge/graph/label/label_maker_factory.h +++ b/ge/graph/label/label_maker_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,13 +56,12 @@ class LabelMakerFactory { LabelMakerFactory::Instance().RegisterCreator(node_type, func); } - ~Registerar() {} + ~Registerar() = default; }; private: - LabelMakerFactory() {} - - ~LabelMakerFactory() {} + LabelMakerFactory() = default; + ~LabelMakerFactory() = default; // register creator, this function will call in the constructor void RegisterCreator(const std::string &node_type, const LabelCreatorFun func) { diff --git a/ge/graph/label/partitioned_call_label_maker.cc b/ge/graph/label/partitioned_call_label_maker.cc index 64db223b..0be738f0 100644 --- a/ge/graph/label/partitioned_call_label_maker.cc +++ b/ge/graph/label/partitioned_call_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "partitioned_call_label_maker.h" #include "common/util.h" @@ -50,7 +49,7 @@ Status PartitionedCallLabelMaker::Run(uint32_t &label_index) { return FAILED; } - const std::string stream_active_name = parent_node_->GetName() + "/StreamActive"; // rtStreamActive + const std::string stream_active_name = parent_node_->GetName() + "/StreamActive"; // rtStreamActive NodePtr stream_active = AddStreamActive(sub_graph, stream_active_name); if (stream_active == nullptr) { GELOGE(INTERNAL_ERROR, "Subgraph: %s add stream active node failed.", sub_graph->GetName().c_str()); @@ -71,3 +70,4 @@ Status PartitionedCallLabelMaker::Run(uint32_t &label_index) { REGISTER_LABEL_MAKER(PARTITIONEDCALL, PartitionedCallLabelMaker); REGISTER_LABEL_MAKER(STATEFULPARTITIONEDCALL, PartitionedCallLabelMaker); } // namespace ge + diff --git a/ge/graph/label/partitioned_call_label_maker.h b/ge/graph/label/partitioned_call_label_maker.h index 1c0f0890..b89cb94c 100644 --- a/ge/graph/label/partitioned_call_label_maker.h +++ b/ge/graph/label/partitioned_call_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/while_label_maker.cc b/ge/graph/label/while_label_maker.cc index c5e0abb7..83aad7c9 100644 --- a/ge/graph/label/while_label_maker.cc +++ b/ge/graph/label/while_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "while_label_maker.h" #include "common/util.h" @@ -117,8 +116,8 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) { // link Data input. const auto &all_in_data = cond_out_node->GetAllInDataAnchors(); if (all_in_data.size() != kCondOutputNum) { - GELOGE(FAILED, "Node: %s Cond sbugraph output size:%zu should equal size:%u.", switch_node->GetName().c_str(), - all_in_data.size(), kCondOutputNum); + GELOGE(FAILED, "Node: %s Cond sbugraph output size:%zu should equal size:%u.", + switch_node->GetName().c_str(), all_in_data.size(), kCondOutputNum); return FAILED; } diff --git a/ge/graph/label/while_label_maker.h b/ge/graph/label/while_label_maker.h index 42e6a490..0eb0deee 100644 --- a/ge/graph/label/while_label_maker.h +++ b/ge/graph/label/while_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc old mode 100644 new mode 100755 index c173d67a..cffd07e5 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -141,7 +141,7 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string } return ret; } - return SUCCESS; + return SUCCESS; } Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, @@ -224,13 +224,14 @@ Status GraphLoader::CommandHandle(const Command &command) { return SUCCESS; } -Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t memsize, - void *weight_ptr, size_t weightsize) { +Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, + size_t memsize, void *weight_ptr, size_t weightsize) { GELOGI("Load model begin, model_id:%u.", model_id); // For ACL, Open Device from App. auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->LoadModelOffline(model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize); + Status ret = model_manager->LoadModelOffline( + model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize); if (ret != SUCCESS) { GELOGE(ret, "Load model failed, model_id:%u.", model_id); return ret; @@ -309,8 +310,8 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) { return RT_FAILED; } // Add small page memory size - free = - static_cast(free_mem + VarManager::Instance(GetContext().SessionId())->GetUseMaxMemorySize() - total_mem); + free = static_cast(free_mem + VarManager::Instance(GetContext().SessionId())->GetUseMaxMemorySize() - + total_mem); GELOGI("GetMemoryInfo free[%zu], total[%zu], return free[%ld]", free_mem, total_mem, free); return SUCCESS; } diff --git a/ge/graph/load/graph_loader.h b/ge/graph/load/graph_loader.h old mode 100644 new mode 100755 index c887c06b..b581f2fa --- a/ge/graph/load/graph_loader.h +++ b/ge/graph/load/graph_loader.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/aipp_utils.cc b/ge/graph/load/new_model_manager/aipp_utils.cc old mode 100644 new mode 100755 index 0a348109..e0e60d2b --- a/ge/graph/load/new_model_manager/aipp_utils.cc +++ b/ge/graph/load/new_model_manager/aipp_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,11 +29,11 @@ namespace ge { #define AIPP_CONVERT_TO_AIPP_INFO(KEY) aipp_info.KEY = aipp_params->KEY() -#define AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(KEY, INDEX) \ - do { \ - if (aipp_params->KEY##_size() > 0) { \ - aipp_info.KEY = aipp_params->KEY(INDEX); \ - } \ +#define AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(KEY, INDEX) \ + do { \ + if (aipp_params->KEY##_size() > 0) { \ + aipp_info.KEY = aipp_params->KEY(INDEX); \ + } \ } while (0) Status AippUtils::ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info) { diff --git a/ge/graph/load/new_model_manager/aipp_utils.h b/ge/graph/load/new_model_manager/aipp_utils.h old mode 100644 new mode 100755 index 2534b9fb..78107f3e --- a/ge/graph/load/new_model_manager/aipp_utils.h +++ b/ge/graph/load/new_model_manager/aipp_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc b/ge/graph/load/new_model_manager/cpu_queue_schedule.cc index 01e1cfa8..7f406985 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc +++ b/ge/graph/load/new_model_manager/cpu_queue_schedule.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.h b/ge/graph/load/new_model_manager/cpu_queue_schedule.h index cea00613..8999e975 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.h +++ b/ge/graph/load/new_model_manager/cpu_queue_schedule.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_ @@ -28,24 +27,24 @@ namespace ge { // For AICPU task "modelDequeue" / "modelEnqueue" struct MbufQueueInfo { - uint32_t queue_id; // Op queue id - uintptr_t in_mbuf; // addr for input mbuf + uint32_t queue_id; // Op queue id + uintptr_t in_mbuf; // addr for input mbuf }; // For AICPU task "modelPrepareInput" struct PrepareInputInfo { - uintptr_t in_mbuf; // input mbuf from dequeue - uint32_t mbuf_offset; // offset of mbuf(current is 0) - uint32_t data_size; // input Tensor size - uintptr_t data_addr; // input Tensor addr + uintptr_t in_mbuf; // input mbuf from dequeue + uint32_t mbuf_offset; // offset of mbuf(current is 0) + uint32_t data_size; // input Tensor size + uintptr_t data_addr; // input Tensor addr }; // For AICPU task "modelPrepareOutput" struct PrepareOutputInfo { - uint32_t data_size; // output Tensor size - uintptr_t data_addr; // output Tensor addr - uintptr_t in_mbuf; // input mbuf, for fill output mbuf header - uintptr_t out_mbuf; // output mbuf addr + uint32_t data_size; // output Tensor size + uintptr_t data_addr; // output Tensor addr + uintptr_t in_mbuf; // input mbuf, for fill output mbuf header + uintptr_t out_mbuf; // output mbuf addr }; // For AICPU task "modelZeroCopy" @@ -97,8 +96,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo { Status Init(std::vector &mbuf_list, std::map &outside_addrs); Status Distribute() override; - - private: +private: void *src_addr_ = nullptr; void *dst_addr_ = nullptr; }; diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc index e4e3a63f..c6100129 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/new_model_manager/data_dumper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -83,36 +83,44 @@ static uint64_t GetNowTime() { return ret; } + +static void ReplaceStringElem(std::string &str) { + for_each(str.begin(), str.end(), [](char &ch) { + if ((ch == ' ') || (ch == '.') || (ch == '/') || (ch == '\\')) { + ch = '_'; + } + }); +} } // namespace static int32_t GetIrDataType(ge::DataType data_type) { static const std::map data_type_map = { - {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, - {ge::DT_FLOAT, ge::proto::DT_FLOAT}, - {ge::DT_FLOAT16, ge::proto::DT_FLOAT16}, - {ge::DT_INT8, ge::proto::DT_INT8}, - {ge::DT_UINT8, ge::proto::DT_UINT8}, - {ge::DT_INT16, ge::proto::DT_INT16}, - {ge::DT_UINT16, ge::proto::DT_UINT16}, - {ge::DT_INT32, ge::proto::DT_INT32}, - {ge::DT_INT64, ge::proto::DT_INT64}, - {ge::DT_UINT32, ge::proto::DT_UINT32}, - {ge::DT_UINT64, ge::proto::DT_UINT64}, - {ge::DT_BOOL, ge::proto::DT_BOOL}, - {ge::DT_DOUBLE, ge::proto::DT_DOUBLE}, - {ge::DT_DUAL, ge::proto::DT_DUAL}, - {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8}, - {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8}, - {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64}, - {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128}, - {ge::DT_QINT8, ge::proto::DT_QINT8}, - {ge::DT_QINT16, ge::proto::DT_QINT16}, - {ge::DT_QINT32, ge::proto::DT_QINT32}, - {ge::DT_QUINT8, ge::proto::DT_QUINT8}, - {ge::DT_QUINT16, ge::proto::DT_QUINT16}, - {ge::DT_RESOURCE, ge::proto::DT_RESOURCE}, - {ge::DT_STRING_REF, ge::proto::DT_STRING_REF}, - {ge::DT_STRING, ge::proto::DT_STRING}, + {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, + {ge::DT_FLOAT, ge::proto::DT_FLOAT}, + {ge::DT_FLOAT16, ge::proto::DT_FLOAT16}, + {ge::DT_INT8, ge::proto::DT_INT8}, + {ge::DT_UINT8, ge::proto::DT_UINT8}, + {ge::DT_INT16, ge::proto::DT_INT16}, + {ge::DT_UINT16, ge::proto::DT_UINT16}, + {ge::DT_INT32, ge::proto::DT_INT32}, + {ge::DT_INT64, ge::proto::DT_INT64}, + {ge::DT_UINT32, ge::proto::DT_UINT32}, + {ge::DT_UINT64, ge::proto::DT_UINT64}, + {ge::DT_BOOL, ge::proto::DT_BOOL}, + {ge::DT_DOUBLE, ge::proto::DT_DOUBLE}, + {ge::DT_DUAL, ge::proto::DT_DUAL}, + {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8}, + {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8}, + {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64}, + {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128}, + {ge::DT_QINT8, ge::proto::DT_QINT8}, + {ge::DT_QINT16, ge::proto::DT_QINT16}, + {ge::DT_QINT32, ge::proto::DT_QINT32}, + {ge::DT_QUINT8, ge::proto::DT_QUINT8}, + {ge::DT_QUINT16, ge::proto::DT_QUINT16}, + {ge::DT_RESOURCE, ge::proto::DT_RESOURCE}, + {ge::DT_STRING_REF, ge::proto::DT_STRING_REF}, + {ge::DT_STRING, ge::proto::DT_STRING}, }; auto iter = data_type_map.find(data_type); @@ -169,7 +177,7 @@ void DataDumper::SaveDumpInput(const std::shared_ptr &node) { } input_map_.insert( - {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); + {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); } } GELOGI("Save data message successfully"); @@ -196,14 +204,17 @@ void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr op_desc_info.op_type = op->GetType(); op_desc_info.task_id = task_id; op_desc_info.stream_id = stream_id; - for (size_t i = 0; i < op->GetInputsSize(); ++i) { - GeTensorDesc input_desc = op->GetInputDesc(i); - op_desc_info.input_format.emplace_back(input_desc.GetFormat()); - op_desc_info.input_shape.emplace_back(input_desc.GetShape().GetDims()); - op_desc_info.input_data_type.emplace_back(input_desc.GetDataType()); + for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { + GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); + if (input_tensor_desc == nullptr) { + continue; + } + op_desc_info.input_format.emplace_back(input_tensor_desc->GetFormat()); + op_desc_info.input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); + op_desc_info.input_data_type.emplace_back(input_tensor_desc->GetDataType()); int64_t input_size = 0; - auto tensor_descs = op->GetAllInputsDesc(); - if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(i), input_size) != SUCCESS) { + + if (TensorUtils::GetTensorSizeInBytes(*input_tensor_desc, input_size) != SUCCESS) { GELOGW("Get input size failed"); return; } @@ -211,13 +222,15 @@ void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr op_desc_info.input_size.emplace_back(input_size); } for (size_t j = 0; j < op->GetOutputsSize(); ++j) { - GeTensorDesc output_desc = op->GetOutputDesc(j); - op_desc_info.output_format.emplace_back(output_desc.GetFormat()); - op_desc_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); - op_desc_info.output_data_type.emplace_back(output_desc.GetDataType()); + GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); + if (output_tensor_desc == nullptr) { + continue; + } + op_desc_info.output_format.emplace_back(output_tensor_desc->GetFormat()); + op_desc_info.output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); + op_desc_info.output_data_type.emplace_back(output_tensor_desc->GetDataType()); int64_t output_size = 0; - auto tensor_descs = op->GetAllOutputsDesc(); - if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(j), output_size) != SUCCESS) { + if (TensorUtils::GetTensorSizeInBytes(*output_tensor_desc, output_size) != SUCCESS) { GELOGW("Get input size failed"); return; } @@ -671,12 +684,32 @@ Status DataDumper::LoadDumpInfo() { op_mapping_info.set_flag(kAicpuLoadFlag); op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); - GELOGI("Dump step is %s and dump path is %s in load dump info", dump_properties_.GetDumpStep().c_str(), - dump_path.c_str()); + GELOGI("Dump step is %s and dump path is %s dump model is %s in load dump info", + dump_properties_.GetDumpStep().c_str(), dump_path.c_str(), dump_list_key.c_str()); + auto ret = BuildTaskInfo(op_mapping_info); + if (ret != SUCCESS) { + GELOGE(ret, "Build task info failed"); + return ret; + } + SetEndGraphIdToAicpu(end_graph_task_id_, end_graph_stream_id_, op_mapping_info); + + SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); + + if (!op_list_.empty() || is_op_debug_ || is_end_graph_) { + auto ret = ExecuteLoadDumpInfo(op_mapping_info); + if (ret != SUCCESS) { + GELOGE(ret, "Execute load dump info failed"); + return ret; + } + } + return SUCCESS; +} + +Status DataDumper::BuildTaskInfo(aicpu::dump::OpMappingInfo &op_mapping_info) { for (const auto &op_iter : op_list_) { auto op_desc = op_iter.op; - GELOGD("Op %s in model %s begin to add task in op_mapping_info", op_desc->GetName().c_str(), dump_list_key.c_str()); + GELOGD("Op %s in model begin to add task in op_mapping_info", op_desc->GetName().c_str()); aicpu::dump::Task task; task.set_end_graph(false); task.set_task_id(op_iter.task_id); @@ -704,7 +737,7 @@ Status DataDumper::LoadDumpInfo() { op_mapping_info.mutable_task()->Add(std::move(task)); continue; } - if (dump_properties_.GetDumpMode() == kDumpAll) { + if (dump_properties_.GetDumpMode() == kDumpAll || is_op_debug_) { auto ret = DumpOutput(op_iter, task); if (ret != SUCCESS) { GELOGE(ret, "Dump output failed when in dumping all"); @@ -721,18 +754,6 @@ Status DataDumper::LoadDumpInfo() { continue; } } - - SetEndGraphIdToAicpu(end_graph_task_id_, end_graph_stream_id_, op_mapping_info); - - SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); - - if (!op_list_.empty() || is_op_debug_) { - auto ret = ExecuteLoadDumpInfo(op_mapping_info); - if (ret != SUCCESS) { - GELOGE(ret, "Execute load dump info failed"); - return ret; - } - } return SUCCESS; } @@ -740,7 +761,6 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info) { if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput || dump_properties_.GetDumpMode() == kDumpAll) { - GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); aicpu::dump::Task task; task.set_end_graph(true); task.set_task_id(end_graph_task_id_); @@ -748,6 +768,14 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH); task.mutable_op()->set_op_type(ENDGRAPH); op_mapping_info.mutable_task()->Add(std::move(task)); + + is_end_graph_ = true; + if (op_mapping_info.model_name_param_case() == aicpu::dump::OpMappingInfo::kModelName) { + GELOGI("Add end_graph_info to aicpu, model_name is %s, task_id is %u, stream_id is %u", + op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_); + return; + } + GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); } } @@ -899,8 +927,14 @@ Status DataDumper::DumpExceptionInfo(const std::vector exceptio dump_data.mutable_output()->Add(std::move(output)); } uint64_t now_time = GetNowTime(); - string dump_file_path = "./" + op_desc_info.op_type + "." + op_desc_info.op_name + "." + - to_string(op_desc_info.task_id) + "." + to_string(now_time); + std::string op_name = op_desc_info.op_name; + std::string op_type = op_desc_info.op_type; + ReplaceStringElem(op_name); + ReplaceStringElem(op_type); + string dump_file_path = + "./" + op_type + "." + op_name + "." + to_string(op_desc_info.task_id) + "." + to_string(now_time); + GELOGI("The exception dump file path is %s", dump_file_path.c_str()); + uint64_t proto_size = dump_data.ByteSizeLong(); unique_ptr proto_msg(new (std::nothrow) char[proto_size]); bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/new_model_manager/data_dumper.h old mode 100644 new mode 100755 index 0a1c2274..17cb16f8 --- a/ge/graph/load/new_model_manager/data_dumper.h +++ b/ge/graph/load/new_model_manager/data_dumper.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -116,6 +116,7 @@ class DataDumper { std::vector op_list_; uint32_t end_graph_task_id_ = 0; uint32_t end_graph_stream_id_ = 0; + bool is_end_graph_ = false; std::multimap input_map_; bool load_flag_; uint32_t device_id_; @@ -126,6 +127,7 @@ class DataDumper { std::map ref_info_; void *l1_fusion_addr_ = nullptr; + uint32_t op_debug_task_id_ = 0; uint32_t op_debug_stream_id_ = 0; void *op_debug_addr_ = nullptr; @@ -133,22 +135,28 @@ class DataDumper { DumpProperties dump_properties_; + // Build task info of op mapping info + Status BuildTaskInfo(aicpu::dump::OpMappingInfo &op_mapping_info); Status DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); Status DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Output &output, size_t i, const std::string &node_name_index); Status DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); Status DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i, - const std::string &node_name_index); + const std::string &node_name_index); Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info); void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, aicpu::dump::OpMappingInfo &op_mapping_info); Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); - Status GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor &tensor_descs, - const uintptr_t &addr, size_t index); - Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor &tensor_descs, - const uintptr_t &addr, size_t index); + Status GenerateInput(aicpu::dump::Input &input, + const OpDesc::Vistor &tensor_descs, + const uintptr_t &addr, + size_t index); + Status GenerateOutput(aicpu::dump::Output &output, + const OpDesc::Vistor &tensor_descs, + const uintptr_t &addr, + size_t index); void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task); }; struct DataDumper::InnerDumpInfo { diff --git a/ge/graph/load/new_model_manager/data_inputer.cc b/ge/graph/load/new_model_manager/data_inputer.cc old mode 100644 new mode 100755 index 594a7bcd..5efc710e --- a/ge/graph/load/new_model_manager/data_inputer.cc +++ b/ge/graph/load/new_model_manager/data_inputer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/data_inputer.h b/ge/graph/load/new_model_manager/data_inputer.h old mode 100644 new mode 100755 index cc511c36..14ebcea5 --- a/ge/graph/load/new_model_manager/data_inputer.h +++ b/ge/graph/load/new_model_manager/data_inputer.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc old mode 100644 new mode 100755 index 3c2aaffa..85ef4d83 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -125,7 +125,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr op_desc_handle = { - {VARIABLE, &DavinciModel::InitVariable}, - {CONSTANTOP, &DavinciModel::InitConstant}, - {STREAMACTIVE, &DavinciModel::InitStreamActive}, - {STREAMSWITCH, &DavinciModel::InitStreamSwitch}, - {STREAMSWITCHN, &DavinciModel::InitStreamSwitchN}, - {LABELSET, &DavinciModel::InitLabelSet}, - {CASE, &DavinciModel::InitCase}, + {VARIABLE, &DavinciModel::InitVariable}, + {CONSTANTOP, &DavinciModel::InitConstant}, + {STREAMACTIVE, &DavinciModel::InitStreamActive}, + {STREAMSWITCH, &DavinciModel::InitStreamSwitch}, + {STREAMSWITCHN, &DavinciModel::InitStreamSwitchN}, + {LABELSET, &DavinciModel::InitLabelSet}, + {CASE, &DavinciModel::InitCase}, }; GE_CHK_STATUS_RET(InitInputOutputForDynamic(compute_graph), "InitInputOutputForDynamic failed."); map data_by_index; auto nodes = compute_graph->GetAllNodes(); - const TBEKernelStore &tbekernel_store = ge_model_->GetTBEKernelStore(); const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore(); for (size_t i = 0; i < nodes.size(); i++) { auto node = nodes.at(i); @@ -782,7 +782,6 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { op_list_[op_desc->GetId()] = op_desc; GE_TIMESTAMP_RESTART(LoadTBEKernelBinToOpDesc); - tbekernel_store.LoadTBEKernelBinToOpDesc(op_desc); aicpu_kernel_store.LoadCustAICPUKernelBinToOpDesc(op_desc); GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); @@ -898,8 +897,8 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma } uint32_t parent_index = 0; // Ignore subgraph Data Node. if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGI("Skip subgraph Data node: %s.", op_desc->GetName().c_str()); - return SUCCESS; + GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); + return InitInputBatchLabel(node); } data_op_list_.push_back(op_desc); @@ -1010,9 +1009,9 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph(); GE_CHECK_NOTNULL(owner_graph); if (owner_graph->GetParentGraph() != nullptr) { - GELOGI("Skip subgraph NetOutput node: %s.", op_desc->GetName().c_str()); + GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); op_list_.erase(op_desc->GetId()); - return SUCCESS; + return InitOutputBatchLabel(node); } output_op_list_.push_back(op_desc); @@ -1058,7 +1057,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { for (size_t i = 0; i < tensor_addrs.size(); ++i) { void *real_addr = tensor_addrs.at(i); DisableZeroCopy(real_addr); - real_virtual_addrs_.emplace_back(real_addr); + real_virtual_addrs_.insert(real_addr); } GELOGI("SetOutputOutsideAddr success."); } @@ -1124,6 +1123,68 @@ Status DavinciModel::InitOutputZeroCopyNodes(const NodePtr &node) { return SUCCESS; } +/// +/// @ingroup ge +/// @brief input zero copy node Initialize. +/// @param [in] NodePtr: Data Op. +/// @return Status +/// +Status DavinciModel::InitInputBatchLabel(const NodePtr &node) { + string batch_label; + if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { + return SUCCESS; // Not Multi-batch. + } + + const auto &out_data_anchor = node->GetOutDataAnchor(kDataIndex); + GE_CHECK_NOTNULL(out_data_anchor); + + for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { + const auto &node = peer_in_data_anchor->GetOwnerNode(); + const auto &op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + + if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { + zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; + GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", + op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); + } + } + + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief output zero copy node Initialize for Case. +/// @param [in] NodePtr: netoutput Op. +/// @return Status +/// +Status DavinciModel::InitOutputBatchLabel(const NodePtr &node) { + string batch_label; + if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { + return SUCCESS; // Not Multi-batch. + } + + for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { + const auto &peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + if (peer_out_data_anchor == nullptr) { + continue; + } + + const auto &peer_node = peer_out_data_anchor->GetOwnerNode(); + const auto &op_desc = peer_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + + if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { + zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; + GELOGD("Init Output zero copy nodes success, op name: %s, op id: %ld, batch label: %s", + op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); + } + } + + return SUCCESS; +} + /// @ingroup ge /// @brief LabelSet Op Initialize. /// @param [in] op_desc: LabelSet Op descriptor. @@ -1573,6 +1634,48 @@ Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { return SUCCESS; } +Status DavinciModel::GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) { + GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); + // Set default value + type = DATA_WITHOUT_AIPP; + aipp_index = 0xFFFFFFFF; // default invalid value + OpDescPtr data_op = data_op_list_[index]; + GE_CHECK_NOTNULL(data_op); + if (!data_op->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { + GELOGW("There is no aipp releated info with index %u.", index); + return SUCCESS; + } + std::string data_mode; + (void)AttrUtils::GetStr(data_op, ATTR_DATA_RELATED_AIPP_MODE, data_mode); + if (data_mode == "static_aipp") { + type = DATA_WITH_STATIC_AIPP; + } else if (data_mode == "dynamic_aipp") { + type = DATA_WITH_DYNAMIC_AIPP; + } else if (data_mode == "dynamic_aipp_conf") { + type = DYNAMIC_AIPP_NODE; + } else { + GELOGE(INTERNAL_ERROR, "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index); + return INTERNAL_ERROR; + } + + if (type == DATA_WITH_DYNAMIC_AIPP) { + string releated_name; + (void)AttrUtils::GetStr(data_op, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); + for (size_t i = 0; i < data_op_list_.size(); ++i) { + GE_CHECK_NOTNULL(data_op_list_[i]); + if (data_op_list_[i]->GetName() == releated_name) { + GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), i, index); + aipp_index = i; + } + } + if (aipp_index == 0xFFFFFFFF) { + GELOGE(INTERNAL_ERROR, "Can not find aipp data node from index %u", index); + return INTERNAL_ERROR; + } + } + return SUCCESS; +} + void DavinciModel::SetDynamicSize(const std::vector &batch_num, int32_t dynamic_type) { batch_size_.clear(); if (batch_num.empty()) { @@ -1666,9 +1769,9 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, return; } // judge if this data is linked dynamic aipp first, multiply batch has been considered - if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { + if (op_desc->HasAttr(ATTR_DYNAMIC_AIPP_INPUT_DIMS)) { vector dynamic_aipp_input_dims; - (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); + (void)AttrUtils::GetListInt(op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_input_dims); SetInputDimsInfo(dynamic_aipp_input_dims, format, input); return; } else { @@ -1713,7 +1816,7 @@ void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputD uint32_t &format_result) { /// netoutput input tensor desc GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); - return ); + return); Format format = op_desc->GetInputDescPtr(index)->GetFormat(); GeShape shape = op_desc->GetInputDescPtr(index)->GetShape(); DataType data_type = op_desc->GetInputDescPtr(index)->GetDataType(); @@ -1777,7 +1880,7 @@ Status DavinciModel::GetOutputDescInfo(vector &output_desc, // neweast plan, the index will add to name during generate model. bool contains_colon = out_node_name_[index].find(":") != std::string::npos; output_name = - contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]); + contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]); } else { output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); @@ -1849,7 +1952,7 @@ Status DavinciModel::SyncVarData() { for (auto op_desc : variable_op_list_) { ret = - VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); + VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, op_desc->GetName().c_str()); } @@ -2020,7 +2123,7 @@ Status DavinciModel::SinkModelProfile() { memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc)); memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc)); memory_info.total_size = - memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; + memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; reporter_data.data = (unsigned char *)&memory_info; reporter_data.dataLen = sizeof(struct memoryInfo); GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", @@ -2054,9 +2157,9 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { // report model data tag name std::string tag_name; tag_name.append("model_time_info_") - .append(std::to_string(this->Id())) - .append("_") - .append(std::to_string(current_data.index)); + .append(std::to_string(this->Id())) + .append("_") + .append(std::to_string(current_data.index)); GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, return FAILED, "Sink model tag memcpy error."); @@ -2327,7 +2430,7 @@ Status DavinciModel::ReturnNoOutput(uint32_t data_id) { GELOGI("ReturnNoOutput model id:%u", model_id_); for (auto op_desc : variable_op_list_) { Status ret = VarManager::Instance(session_id_) - ->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); + ->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, op_desc->GetName().c_str()); } @@ -2379,9 +2482,9 @@ void *DavinciModel::Run(DavinciModel *model) { GE_TIMESTAMP_START(Model_SyncVarData); ret = model->SyncVarData(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); - CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); - continue, "Copy input data to model failed."); // [No need to check value] + ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); + CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); + continue, "Copy input data to model failed."); // [No need to check value] GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(Model_SyncVarData, "Model Run SyncVarData")); GELOGI("Copy input data, model id:%u", model_id); @@ -2389,9 +2492,9 @@ void *DavinciModel::Run(DavinciModel *model) { model->SetProfileTime(MODEL_PRE_PROC_START)); ret = model->CopyInputData(current_data, false); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); - CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); - continue, "Copy input data to model failed."); // [No need to check value] + ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); + CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); + continue, "Copy input data to model failed."); // [No need to check value] GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START)); if (ProfilingManager::Instance().ProfilingOpTraceOn()) { @@ -2438,11 +2541,11 @@ void *DavinciModel::Run(DavinciModel *model) { seq_end_flag = true; } GE_IF_BOOL_EXEC( - rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); - (void)model->ReturnResult(current_data.index, false, seq_end_flag, - data_wrapper->GetOutput()); // [No need to check value] - CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); - continue); + rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); + (void)model->ReturnResult(current_data.index, false, seq_end_flag, + data_wrapper->GetOutput()); // [No need to check value] + CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); + continue); GELOGI("rtStreamSynchronize end."); GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); @@ -2646,12 +2749,19 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector &inputs, const vec } GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); - uint32_t total_addr_size = total_io_addrs_.size() * sizeof(uint64_t); - GELOGI("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, src size %u", args_, total_args_size_, - total_addr_size); + if (total_args_size_ == 0) { + GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", + args_, total_args_size_); + } else { + uint32_t total_addr_size = total_io_addrs_.size() * sizeof(uint64_t); + GELOGI("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, src size %u", + args_, total_args_size_, total_addr_size); - Status rt_ret = rtMemcpy(args_, total_args_size_, total_io_addrs_.data(), total_addr_size, RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) + Status rt_ret = rtMemcpy(args_, total_args_size_, total_io_addrs_.data(), + total_addr_size, RT_MEMCPY_HOST_TO_DEVICE); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, + GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) + } GELOGI("DavinciModel::UpdateKnownNodeArgs success"); return SUCCESS; @@ -2696,6 +2806,11 @@ Status DavinciModel::MallocKnownArgs() { } } // malloc args memory + if (total_args_size_ == 0) { + GELOGW("DavinciModel::MallocKnownArgs total_args_size_ equals to zero."); + return SUCCESS; + } + rtError_t rt_ret = rtMalloc(&args_, total_args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); @@ -2744,15 +2859,12 @@ Status DavinciModel::DistributeTask() { auto op_index = std::max(model_task_def->task(task_index).kernel().context().op_index(), model_task_def->task(task_index).kernel_ex().op_index()); OpDescPtr op = GetOpByIndex(op_index); - if (op == nullptr) { - GELOGE(PARAM_INVALID, "Op index %u is null, op list size %zu.", op_index, op_list_.size()); - return PARAM_INVALID; - } + GE_CHECK_NOTNULL(op); SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); if (reinterpret_cast(task->GetDumpArgs()) != nullptr) { bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo(); - if (call_dump) { + if (call_dump || is_op_debug_reg_) { SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); } } @@ -2832,7 +2944,7 @@ void DavinciModel::SetCopyOnlyOutput() { /// @return None. /// void DavinciModel::DisableZeroCopy(const void *addr) { - if (find(real_virtual_addrs_.begin(), real_virtual_addrs_.end(), addr) == real_virtual_addrs_.end()) { + if (real_virtual_addrs_.find(addr) == real_virtual_addrs_.end()) { return; } @@ -2931,9 +3043,9 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 if (input_size > op_size) { GELOGW( - "Input size [%u] is bigger than om size need [%u], " - "MAY cause inference result ERROR, please check model input", - input_size, op_size); + "Input size [%u] is bigger than om size need [%u], " + "MAY cause inference result ERROR, please check model input", + input_size, op_size); } bool is_dynamic_aipp = false; for (const auto &op_desc : data_op_list_) { @@ -3046,8 +3158,8 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map & for (size_t count = 0; count < data.second.GetDataCount(); ++count) { int64_t size = data.second.GetDataInfo().at(count).first; void *addr = data.second.GetDataInfo().at(count).second; - void *buffer_addr = - reinterpret_cast(reinterpret_cast(buffer.data) + data.second.GetRelativeOffset().at(count)); + void *buffer_addr = reinterpret_cast(reinterpret_cast(buffer.data) + + data.second.GetRelativeOffset().at(count)); GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p", input_or_output.c_str(), data.first, addr, size, buffer_addr); // For input data, just copy for rts task. @@ -3121,10 +3233,10 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) { return PARAM_INVALID;); GeTensor *tensor = const_cast(v_weights[0].get()); - GE_IF_BOOL_EXEC( - static_cast(v_output_size[0]) < tensor->GetData().size(), - GELOGE(PARAM_INVALID, "output size:%u less than weight data size:%zu", v_output_size[0], tensor->GetData().size()); - return PARAM_INVALID;); + GE_IF_BOOL_EXEC(static_cast(v_output_size[0]) < tensor->GetData().size(), + GELOGE(PARAM_INVALID, "output size:%u less than weight data size:%zu", v_output_size[0], + tensor->GetData().size()); + return PARAM_INVALID;); GE_IF_BOOL_EXEC(tensor->GetData().size() == 0, GELOGW("const op:%s has no weight data.", op_desc->GetName().c_str()); return SUCCESS;); @@ -3146,7 +3258,7 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) { uint64_t offset = static_cast(elem_num * kBytes); uint64_t hbm_raw_data_base_addr = - reinterpret_cast(reinterpret_cast(v_output_addr[0])) + offset; + reinterpret_cast(reinterpret_cast(v_output_addr[0])) + offset; for (int64_t i = elem_num - 1; i >= 0; --i) { buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]); } @@ -3166,7 +3278,8 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) { /// @return Status /// Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { - TBEKernelPtr tbe_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); + auto kernel = ge_model_->GetTBEKernelStore().FindKernel(op_desc->GetName()); + auto tbe_kernel = (kernel != nullptr) ? kernel : op_desc->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); if (tbe_kernel == nullptr) { GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc->GetName().c_str()); return INTERNAL_ERROR; @@ -3191,6 +3304,8 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { binary.magic = RT_DEV_BINARY_MAGIC_ELF; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; } else { GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); return PARAM_INVALID; @@ -3371,11 +3486,15 @@ bool DavinciModel::IsBroadCastOpData(const ge::NodePtr &var_node) { /// @return Status /// Status DavinciModel::InitModelStream(rtStream_t stream) { + ExecuteMode curr_mode = is_async_mode_ ? ASYNCHRONIZATION : SYNCHRONIZATION; + GE_CHK_BOOL_RET_STATUS((curr_mode == last_execute_mode_) || (last_execute_mode_ == INITIALIZATION), INTERNAL_ERROR, + "NnExecute not support mix execute."); + last_execute_mode_ = curr_mode; + // asynchronize mode, use user input stream. if (is_async_mode_) { rt_model_stream_ = stream; is_inner_model_stream_ = false; - last_execute_mode_ = true; return SUCCESS; } @@ -3387,14 +3506,12 @@ Status DavinciModel::InitModelStream(rtStream_t stream) { rt_model_stream_ = stream; is_inner_model_stream_ = false; - last_execute_mode_ = false; return SUCCESS; } - if (last_execute_mode_ || (rt_model_stream_ == nullptr)) { + if (rt_model_stream_ == nullptr) { GE_CHK_RT_RET(rtStreamCreateWithFlags(&rt_model_stream_, priority_, RT_STREAM_FORBIDDEN_DEFAULT)); is_inner_model_stream_ = true; - last_execute_mode_ = false; } return SUCCESS; @@ -3521,7 +3638,7 @@ uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { string weight_memory_key = std::to_string(0) + "_w"; weights_mem_base = - MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); + MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); } else { weights_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weights_size, GetDeviceId()); } @@ -3581,7 +3698,7 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) } GE_CHK_STATUS_RET_NOLOG( - TransVarDataUtils::TransAllVarData(variable_node_list, session_id_, ctx, graph_id, kThreadNum)); + TransVarDataUtils::TransAllVarData(variable_node_list, session_id_, ctx, graph_id, kThreadNum)); GELOGI("TransAllVarData success."); return SUCCESS; @@ -3662,11 +3779,14 @@ Status DavinciModel::GetComputeGraphInfo(const ComputeGraphPtr &graph, vectorGetName(); compute_graph_info.op_type = op_desc->GetType(); - for (size_t i = 0; i < op_desc->GetInputsSize(); ++i) { - GeTensorDesc input_desc = op_desc->GetInputDesc(i); - compute_graph_info.input_format.emplace_back(input_desc.GetFormat()); - compute_graph_info.input_shape.emplace_back(input_desc.GetShape().GetDims()); - compute_graph_info.input_data_type.emplace_back(input_desc.GetDataType()); + for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { + GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); + if (input_desc == nullptr) { + continue; + } + compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); + compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); + compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); } for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { @@ -3754,12 +3874,12 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vectorGetInputDescPtr(kDataIndex)), data_input_size); GELOGD( - "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: " - "%s, data_type: %s, shape: %s .", - index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, - TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), - TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), - formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); + "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: " + "%s, data_type: %s, shape: %s .", + index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, + TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), + TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), + formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); } } diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h old mode 100644 new mode 100755 index 15f4539f..5bdee9b5 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -75,6 +75,12 @@ struct timeInfo { int64_t dumpEndTime; }; +enum ExecuteMode { + INITIALIZATION, + SYNCHRONIZATION, + ASYNCHRONIZATION, +}; + // comments class DavinciModel { public: @@ -314,6 +320,8 @@ class DavinciModel { /// Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); + Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index); + /// /// @ingroup ge /// @brief Get model_id. @@ -440,7 +448,9 @@ class DavinciModel { DavinciModel(const DavinciModel &model) = delete; - const map> &GetHcclFolowStream() { return main_follow_stream_mapping_; } + const map> &GetHcclFolowStream() { + return main_follow_stream_mapping_; + } void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream); void InitRuntimeParams(); @@ -655,6 +665,22 @@ class DavinciModel { /// Status InitOutputZeroCopyNodes(const NodePtr &node); + /// + /// @ingroup ge + /// @brief input zero copy node Initialize for Case. + /// @param [in] NodePtr: Data Op. + /// @return Status + /// + Status InitInputBatchLabel(const NodePtr &node); + + /// + /// @ingroup ge + /// @brief output zero copy node Initialize for Case. + /// @param [in] NodePtr: netoutput Op. + /// @return Status + /// + Status InitOutputBatchLabel(const NodePtr &node); + /// /// @ingroup ge /// @brief Constant Op Init. @@ -837,7 +863,7 @@ class DavinciModel { std::map new_input_outside_addrs_; std::map new_output_outside_addrs_; - std::vector real_virtual_addrs_; + std::set real_virtual_addrs_; // output op: save cce op actual needed memory size vector output_memory_size_list_; @@ -884,7 +910,7 @@ class DavinciModel { bool is_inner_model_stream_; bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. - bool last_execute_mode_; + ExecuteMode last_execute_mode_; bool is_stream_list_bind_{false}; bool is_pure_head_stream_{false}; diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.cc b/ge/graph/load/new_model_manager/davinci_model_parser.cc index b744f907..34180d08 100644 --- a/ge/graph/load/new_model_manager/davinci_model_parser.cc +++ b/ge/graph/load/new_model_manager/davinci_model_parser.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.h b/ge/graph/load/new_model_manager/davinci_model_parser.h old mode 100644 new mode 100755 index 8907c97d..83eb4cc3 --- a/ge/graph/load/new_model_manager/davinci_model_parser.h +++ b/ge/graph/load/new_model_manager/davinci_model_parser.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc old mode 100644 new mode 100755 index 320bfb16..b7486a64 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,7 +57,7 @@ std::mutex ModelManager::exeception_infos_mutex_; std::shared_ptr ModelManager::GetInstance() { static const std::shared_ptr instance_ptr = - shared_ptr(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr); + shared_ptr(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr); return instance_ptr; } @@ -107,7 +107,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u } rt_ret = - rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); + rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "memory copy to device failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); @@ -557,10 +557,10 @@ Status ModelManager::Stop(uint32_t model_id) { /// Status ModelManager::HandleCommand(const Command &command) { static const std::map> cmds = { - {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, - {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, - {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, - {kCmdTypeProfStop, HandleProfStopCommand}}; + {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, + {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, + {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, + {kCmdTypeProfStop, HandleProfStopCommand}}; auto iter = cmds.find(command.cmd_type); if (iter == cmds.end()) { @@ -876,6 +876,14 @@ Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippCo return davinci_model->GetAIPPInfo(index, aipp_info); } +Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetAIPPInfo failed, invalid model_id is %u.", + model_id); + + return davinci_model->GetAippType(index, type, aipp_index); +} + Status ModelManager::GenSessionId(uint64_t &session_id) { std::lock_guard lock(session_id_create_mutex_); @@ -1117,8 +1125,8 @@ Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name) GE_CHK_RT(rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM)); GE_CHK_RT(rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT(rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM)); - GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast(so_name.c_str()), so_name.size(), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast(so_name.c_str()), + so_name.size(), RT_MEMCPY_HOST_TO_DEVICE)); CustAicpuSoBuf cust_aicpu_so_buf; cust_aicpu_so_buf.kernelSoBuf = reinterpret_cast(reinterpret_cast(d_aicpu_data)); diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h old mode 100644 new mode 100755 index e89bfc36..d4852a53 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -224,6 +224,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); + /// /// @ingroup domi_ome /// @brief set model input and output size zero copy @@ -333,7 +335,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::mutex map_mutex_; std::mutex sess_ids_mutex_; std::mutex session_id_create_mutex_; - static ::std::mutex exeception_infos_mutex_; + static::std::mutex exeception_infos_mutex_; uint64_t session_id_bias_; std::set sess_ids_; std::vector exception_infos_; diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc old mode 100644 new mode 100755 index 9cbb684f..2ef1c42b --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,13 +29,14 @@ #include "framework/common/debug/ge_log.h" #include "graph/manager/graph_var_manager.h" -#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ - do { \ - if (SIZE <= static_cast(OFFSET)) { \ - GELOGE(OUT_OF_MEMORY, "Node: %s, memory out of range[%lu: %ld]", OP->GetName().c_str(), SIZE, OFFSET); \ - return {}; \ - } \ - } while (0) +#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ +do { \ + if (SIZE <= static_cast(OFFSET)) { \ + GELOGE(OUT_OF_MEMORY, "Node: %s, memory out of range[%lu: %ld]", \ + OP->GetName().c_str(), SIZE, OFFSET); \ + return {}; \ + } \ +} while (0) namespace ge { /// @@ -68,8 +69,7 @@ vector ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { continue; } - GE_IF_BOOL_EXEC( - TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, + GE_IF_BOOL_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); continue); @@ -93,8 +93,8 @@ vector ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) { const size_t outputs_size = op_desc->GetOutputsSize(); const vector v_output_offset = op_desc->GetOutputOffset(); GE_IF_BOOL_EXEC(v_output_offset.size() != outputs_size, - GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size); - return v_output_size;); + GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size); + return v_output_size;); for (size_t i = 0; i < outputs_size; ++i) { const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); @@ -104,8 +104,7 @@ vector ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) { } int64_t tensor_size = 0; - GE_IF_BOOL_EXEC( - TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, + GE_IF_BOOL_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i); continue); @@ -314,7 +313,7 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_data_addr); uint64_t session_id = model_param.session_id; - const size_t inputs_size = op_desc->GetAllInputsSize(); + const size_t inputs_size = op_desc->GetInputsSize(); const vector v_input_offset = op_desc->GetInputOffset(); const string op_type = op_desc->GetType(); @@ -328,15 +327,14 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co op_desc->GetName().c_str(), v_memory_type.size(), inputs_size); return v_input_data_addr; } - for (size_t i = 0; i < inputs_size; ++i) { + for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { + const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast(i)); + if (tensor_desc == nullptr) { + GELOGD("Op: %s, Index: %zu, has no input", op_desc->GetName().c_str(), i); + continue; + } if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { // TBE: add weights address to input - const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); - if (tensor_desc == nullptr) { - GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); - continue; - } - int64_t tensor_size = 0; GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); if (tensor_size) { @@ -345,8 +343,8 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co VALIDATE_MEM_RANGE(op_desc, model_param.weight_size, data_offset); uint8_t *weight_addr = model_param.weight_base + data_offset; v_input_data_addr.push_back(weight_addr); - GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, - op_desc->GetName().c_str(), i, weight_addr); + GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", + model_param.graph_id, op_desc->GetName().c_str(), i, weight_addr); } non_const_index++; continue; @@ -359,12 +357,12 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co int64_t input_offset = v_input_offset[non_const_index]; non_const_index++; GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; - v_input_data_addr.push_back(variable_addr); - GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); - continue); + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); + uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; + v_input_data_addr.push_back(variable_addr); + GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", + model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); + continue); // feature maps uint8_t *mem_addr = nullptr; @@ -397,8 +395,8 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C const size_t outputs_size = op_desc->GetOutputsSize(); const vector v_output_offset = op_desc->GetOutputOffset(); GE_IF_BOOL_EXEC(v_output_offset.size() != outputs_size, - GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size); - return v_output_data_addr); + GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size); + return v_output_data_addr); vector v_memory_type; bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type); if (has_mem_type_attr && (v_memory_type.size() != outputs_size)) { @@ -409,12 +407,12 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C } for (size_t i = 0; i < outputs_size; ++i) { GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; - v_output_data_addr.push_back(variable_addr); - GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); - continue); + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); + uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; + v_output_data_addr.push_back(variable_addr); + GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", + model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); + continue); // feature maps uint8_t *mem_addr = nullptr; // fusion @@ -454,18 +452,17 @@ vector ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { v_workspace_data_addr.push_back(reinterpret_cast(reinterpret_cast(v_workspace_offset[i]))); GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[L1] name[%s], mem_addr[workspace index %zu]:0x%lx", - model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i]); + model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i]); } else if (v_workspace_bytes[i] == 0) { v_workspace_data_addr.push_back(nullptr); GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] Null addr", - model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]); + model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]); } else { VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]); uint8_t *mem_addr = model_param.mem_base + v_workspace_offset[i]; v_workspace_data_addr.push_back(mem_addr); GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i], - mem_addr); + model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i], mem_addr); } } @@ -481,16 +478,16 @@ Status ModelUtils::GetRtAddress(const RuntimeParam ¶m, uintptr_t logic_addr, uint8_t *runtime_base_addr = nullptr; if ((param.logic_mem_base <= logic_addr) && (logic_addr < param.logic_mem_base + param.mem_size)) { runtime_base_addr = param.mem_base - param.logic_mem_base; - GELOGI("The logic addr:0x%lx is data address, base:0x%lx, size:%lu", logic_addr, param.logic_mem_base, - param.mem_size); + GELOGI("The logic addr:0x%lx is data address, base:0x%lx, size:%lu", + logic_addr, param.logic_mem_base, param.mem_size); } else if ((param.logic_weight_base <= logic_addr) && (logic_addr < param.logic_weight_base + param.weight_size)) { runtime_base_addr = param.weight_base - param.logic_weight_base; - GELOGI("The logic addr:0x%lx is weight address, base:0x%lx, size:%lu", logic_addr, param.logic_weight_base, - param.weight_size); + GELOGI("The logic addr:0x%lx is weight address, base:0x%lx, size:%lu", + logic_addr, param.logic_weight_base, param.weight_size); } else if ((param.logic_var_base <= logic_addr) && (logic_addr < param.logic_var_base + param.var_size)) { runtime_base_addr = param.var_base - param.logic_var_base; - GELOGI("The logic addr:0x%lx is variable address, base:0x%lx, size:%lu", logic_addr, param.logic_var_base, - param.var_size); + GELOGI("The logic addr:0x%lx is variable address, base:0x%lx, size:%lu", + logic_addr, param.logic_var_base, param.var_size); } else if (logic_addr != 0) { mem_addr = nullptr; GELOGE(PARAM_INVALID, "The logic addr:0x%lx is abnormal", logic_addr); diff --git a/ge/graph/load/new_model_manager/model_utils.h b/ge/graph/load/new_model_manager/model_utils.h old mode 100644 new mode 100755 index 8474a987..4b3d7ae7 --- a/ge/graph/load/new_model_manager/model_utils.h +++ b/ge/graph/load/new_model_manager/model_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc index 39f0591d..b8b02f59 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h index 82e228e6..614544f9 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc b/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc old mode 100644 new mode 100755 index f742118c..772078c6 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h b/ge/graph/load/new_model_manager/task_info/event_record_task_info.h old mode 100644 new mode 100755 index 04ee1779..d3f5961e --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/event_record_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc old mode 100644 new mode 100755 index e8f96b35..b6d8f04c --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h old mode 100644 new mode 100755 index f9da30b8..a92252d7 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc old mode 100644 new mode 100755 index 9b1ea04a..32c79647 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h old mode 100644 new mode 100755 index 7f575639..b1897533 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc old mode 100644 new mode 100755 index 7acbb5b3..dd4edfd0 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h old mode 100644 new mode 100755 index 66248e9f..880ca487 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc index 11eaaca9..6679c980 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -168,7 +168,7 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode for (int64_t i = 0; i < stream_num; ++i) { rtStream_t stream = nullptr; rtError_t rt_ret = - rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); + rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); @@ -319,8 +319,8 @@ void HcclTaskInfo::GetPrivateDefByTaskDef(const domi::TaskDef &task) { return; } - ret = - rtMemcpy(private_def_, private_def_len_, task.private_def().c_str(), private_def_len_, RT_MEMCPY_HOST_TO_HOST); + ret = rtMemcpy(private_def_, private_def_len_, task.private_def().c_str(), private_def_len_, + RT_MEMCPY_HOST_TO_HOST); if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtMemcpy Fail, ret = 0x%X.", ret); return; diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h b/ge/graph/load/new_model_manager/task_info/hccl_task_info.h index d8456834..f7ce3468 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc index 4f72ec36..49723f17 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,7 +61,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return FAILED; } errno_t sec_ret = - memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args().data(), kernel_ex_def.args_size()); + memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args().data(), kernel_ex_def.args_size()); if (sec_ret != EOK) { GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); return FAILED; @@ -79,8 +79,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return RT_ERROR_TO_GE_STATUS(rt_ret);) } - GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc_->GetName().c_str(), - op_desc_->GetType().c_str(), ext_info.size(), ext_info_addr_); + GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", + op_desc_->GetName().c_str(), op_desc_->GetType().c_str(), ext_info.size(), ext_info_addr_); // 2.1 get loop cond variable for tensor array write uint64_t step_id_addr = 0; @@ -110,7 +110,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin if (davinci_model_->IsKnownNode()) { void *input_output_addr = davinci_model_->GetCurrentArgsAddr(args_offset_); fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = - static_cast(reinterpret_cast(input_output_addr)); + static_cast(reinterpret_cast(input_output_addr)); void *workspace_base_addr = nullptr; rtError_t rt_ret = rtMalloc(&workspace_base_addr, kernel_ex_def.task_info_size(), RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: Ox%X", rt_ret); @@ -118,7 +118,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin rt_ret = rtMemcpy(workspace_base_addr, kernel_ex_def.task_info_size(), kernel_ex_def.task_info().data(), kernel_ex_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = - static_cast(reinterpret_cast(workspace_base_addr)); + static_cast(reinterpret_cast(workspace_base_addr)); fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr; fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size(); fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast(ext_info_addr_); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h index b26a95ac..e4d3e6fd 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,8 +54,9 @@ class KernelExTaskInfo : public TaskInfo { auto ret = reinterpret_cast(dump_args_); return ret; } - bool CallSaveDumpInfo() override { return true; }; - + bool CallSaveDumpInfo() override { + return true; + }; private: Status CopyTaskInfo(const domi::KernelExDef &kernel_def, const RuntimeParam &rts_param, const OpDescPtr &op_desc); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc old mode 100644 new mode 100755 index 3964e0d5..14d38c68 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ const uint32_t kAddrLen = sizeof(void *); namespace ge { KernelTaskInfo::SuperKernelTaskInfo KernelTaskInfo::skt_info_ = { - 0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, kInvalidGroupKey, 0, nullptr}; + 0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, kInvalidGroupKey, 0, nullptr}; Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { GE_CHECK_NOTNULL(davinci_model); @@ -380,9 +380,9 @@ Status KernelTaskInfo::Distribute() { } else { /* default: not skt launch */ GELOGI( - "KernelTaskInfo Distribute Start, sktenable:%d taskid:%u sktid:%u last_sktid:%u stubfunc_name:%s " - "stubfunc:%p blockdim:%u stream:%p", - call_skt, task_id_, skt_id_, skt_info_.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); + "KernelTaskInfo Distribute Start, sktenable:%d taskid:%u sktid:%u last_sktid:%u stubfunc_name:%s " + "stubfunc:%p blockdim:%u stream:%p", + call_skt, task_id_, skt_id_, skt_info_.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); // l1 fusion enable and env flag open (kCloseSkt for skt debug) bool open_dump = false; auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); @@ -407,9 +407,9 @@ Status KernelTaskInfo::Distribute() { // set for task_id_ UpdateTaskId(); GELOGI( - "KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p " - "blockdim:%d stream:%p", - call_skt, task_id_, skt_id_, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); + "KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p " + "blockdim:%d stream:%p", + call_skt, task_id_, skt_id_, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); return SUCCESS; } @@ -744,15 +744,15 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel } } *(reinterpret_cast(args + ctx_.argsOffset[0])) = - reinterpret_cast(reinterpret_cast(custom_info_.input_descs)); // arg 0 + reinterpret_cast(reinterpret_cast(custom_info_.input_descs)); // arg 0 *(reinterpret_cast(args + ctx_.argsOffset[1])) = - reinterpret_cast(reinterpret_cast(custom_info_.input_addrs)); // arg 1 + reinterpret_cast(reinterpret_cast(custom_info_.input_addrs)); // arg 1 *(reinterpret_cast(args + ctx_.argsOffset[2])) = - reinterpret_cast(reinterpret_cast(custom_info_.output_descs)); // arg 2 + reinterpret_cast(reinterpret_cast(custom_info_.output_descs)); // arg 2 *(reinterpret_cast(args + ctx_.argsOffset[3])) = - reinterpret_cast(reinterpret_cast(custom_info_.output_addrs)); // arg 3 + reinterpret_cast(reinterpret_cast(custom_info_.output_addrs)); // arg 3 *(reinterpret_cast(args + ctx_.argsOffset[4])) = - reinterpret_cast(reinterpret_cast(custom_info_.attr_handle)); // arg 4 + reinterpret_cast(reinterpret_cast(custom_info_.attr_handle)); // arg 4 rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { @@ -760,8 +760,8 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel return RT_ERROR_TO_GE_STATUS(rt_ret); } - rt_ret = - rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); + rt_ret = rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), + RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); @@ -831,8 +831,8 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { } GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", kernel_def.args_size()) - rt_ret = - rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); + rt_ret = rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), + RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); @@ -859,8 +859,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k GELOGI("Do InitAicpuTask"); so_name_ = kernel_def.so_name(); kernel_name_ = kernel_def.kernel_name(); - GELOGI("node[%s] test so name %s, kernel name %s", op_desc_->GetName().c_str(), so_name_.c_str(), - kernel_name_.c_str()); + GELOGI("node[%s] test so name %s, kernel name %s", + op_desc_->GetName().c_str(), so_name_.c_str(), kernel_name_.c_str()); OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); if (op_desc == nullptr) { @@ -869,7 +869,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed"); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), + "launch cust aicpu so failed"); } // copy args to new host memory @@ -940,7 +941,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; } - davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, args_addr.get(), args_, args_size_, sizeof(aicpu::AicpuParamHead)); + davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, args_addr.get(), args_, args_size_, + sizeof(aicpu::AicpuParamHead)); return SUCCESS; } @@ -954,7 +956,8 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); return RT_ERROR_TO_GE_STATUS(rt_ret); } - rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); + rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), + ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); return RT_ERROR_TO_GE_STATUS(rt_ret); @@ -1085,11 +1088,11 @@ Status KernelTaskInfo::UpdateCceArgs(std::string &sm_desc, std::string &flowtabl uint64_t data_base_addr = reinterpret_cast(davinci_model_->MemBase()) - davinci_model_->GetRtBaseAddr(); uint64_t weight_base_addr = - reinterpret_cast(davinci_model_->WeightsMemBase()) - davinci_model_->GetRtWeightAddr(); + reinterpret_cast(davinci_model_->WeightsMemBase()) - davinci_model_->GetRtWeightAddr(); uint64_t var_base_addr = reinterpret_cast(davinci_model_->VarMemBase()) - davinci_model_->GetRtVarAddr(); Status status = - CceUpdateKernelArgs(context, data_base_addr, weight_base_addr, var_base_addr, sm_desc, flowtable, kernel_def); + CceUpdateKernelArgs(context, data_base_addr, weight_base_addr, var_base_addr, sm_desc, flowtable, kernel_def); if (status != SUCCESS) { GELOGE(status, "Call cce api failed"); return status; @@ -1180,8 +1183,8 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe } *(reinterpret_cast( - args + (reinterpret_cast(const_cast(context.args_offset().data())))[0])) = - reinterpret_cast(reinterpret_cast(flowtable_)); + args + (reinterpret_cast(const_cast(context.args_offset().data())))[0])) = + reinterpret_cast(reinterpret_cast(flowtable_)); } return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index 8ada2082..f2945b0b 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -90,7 +90,7 @@ class KernelTaskInfo : public TaskInfo { uint32_t GetSktTaskID() override { return skt_id_; } - bool CallSaveDumpInfo() override { return call_save_dump_; }; + bool CallSaveDumpInfo() override { return call_save_dump_; }; cce::ccOpContext ctx_; FusionOpInfo fusion_op_info_; @@ -114,9 +114,9 @@ class KernelTaskInfo : public TaskInfo { Status SetContext(const domi::KernelDef &kernel_def); Status UpdateCceArgs(std::string &sm_desc, std::string &flowtable, const domi::KernelDef &kernel_def); - Status CceUpdateKernelArgs(const domi::KernelContext &context, uint64_t &data_base_addr, uint64_t &weight_base_addr, - uint64_t &var_base_addr, std::string &sm_desc, std::string &flowtable, - const domi::KernelDef &kernel_def); + Status CceUpdateKernelArgs(const domi::KernelContext &context, uint64_t &data_base_addr, + uint64_t &weight_base_addr, uint64_t &var_base_addr, std::string &sm_desc, + std::string &flowtable, const domi::KernelDef &kernel_def); Status SetFlowtable(std::string &flowtable, const domi::KernelDef &kernel_def); diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc old mode 100644 new mode 100755 index 75f6c121..393c0b31 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,8 +38,8 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da uint32_t label_index = 0; if (!AttrUtils::GetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, label_index)) { - GELOGE(INTERNAL_ERROR, "LabelGotoExTaskInfo: %s attr [%s] not exist.", op_desc->GetName().c_str(), - ATTR_NAME_LABEL_SWITCH_INDEX.c_str()); + GELOGE(INTERNAL_ERROR, "LabelGotoExTaskInfo: %s attr [%s] not exist.", + op_desc->GetName().c_str(), ATTR_NAME_LABEL_SWITCH_INDEX.c_str()); return INTERNAL_ERROR; } diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h old mode 100644 new mode 100755 index c8a695c9..f83cd1d9 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc b/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc index de6a1d65..5fa96a96 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,8 +38,8 @@ Status LabelSetTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin uint32_t label_index = 0; if (!AttrUtils::GetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, label_index)) { - GELOGE(INTERNAL_ERROR, "LabelSetTaskInfo: %s attr [%s] not exist.", op_desc->GetName().c_str(), - ATTR_NAME_LABEL_SWITCH_INDEX.c_str()); + GELOGE(INTERNAL_ERROR, "LabelSetTaskInfo: %s attr [%s] not exist.", + op_desc->GetName().c_str(), ATTR_NAME_LABEL_SWITCH_INDEX.c_str()); return INTERNAL_ERROR; } diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h b/ge/graph/load/new_model_manager/task_info/label_set_task_info.h index c68ffb98..bb02ccf0 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/label_set_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc index efefd3e2..0c2d63f3 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h index 4cb39c95..538b2d68 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc old mode 100644 new mode 100755 index 1f542154..40c8974b --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h index 90aad9b7..c7645b9f 100644 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc old mode 100644 new mode 100755 index 96247e7d..59831996 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h old mode 100644 new mode 100755 index 9436529d..3272b91f --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc old mode 100644 new mode 100755 index fd5f4f4c..533c459a --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h old mode 100644 new mode 100755 index ab07eb22..8989096d --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc old mode 100644 new mode 100755 index f48f64e3..33ebea3b --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,8 +63,8 @@ Status StreamActiveTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d active_stream_ = davinci_model->GetStreamList()[active_stream_index_list[internal_index]]; active_stream_id_ = stream_active_def.active_stream_id(); - GELOGI("InitStreamActiveTaskInfo Init Success, index:%u, activeStream:%p, activeStreamID:%u.", internal_index, - active_stream_, active_stream_id_); + GELOGI("InitStreamActiveTaskInfo Init Success, index:%u, activeStream:%p, activeStreamID:%u.", + internal_index, active_stream_, active_stream_id_); return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h old mode 100644 new mode 100755 index a75e616e..c6b263b4 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc index 45db2be5..616ba85f 100644 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,8 +56,8 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d size_t input_size = op_desc->GetInputsSize(); if (input_data_addr.size() != STREAM_SWITCH_INPUT_NUM || input_size != STREAM_SWITCH_INPUT_NUM) { - GELOGE(INTERNAL_ERROR, "Input num should be %u. inputAddr size:%zu, inputDesc size:%zu.", STREAM_SWITCH_INPUT_NUM, - input_data_addr.size(), input_size); + GELOGE(INTERNAL_ERROR, "Input num should be %u. inputAddr size:%zu, inputDesc size:%zu.", + STREAM_SWITCH_INPUT_NUM, input_data_addr.size(), input_size); return INTERNAL_ERROR; } @@ -93,8 +93,8 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d data_type_ = static_cast(data_type); } - GELOGI("InitStreamSwitchTaskInfo Init Success, cond:%d, trueStream:%p, trueStreamID:%u, datatype:%d.", cond_, - true_stream_, true_stream_id_, data_type_); + GELOGI("InitStreamSwitchTaskInfo Init Success, cond:%d, trueStream:%p, trueStreamID:%u, datatype:%d.", + cond_, true_stream_, true_stream_id_, data_type_); return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h old mode 100644 new mode 100755 index e6e8339a..89642cf8 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,6 @@ class StreamSwitchTaskInfo : public TaskInfo { Status Distribute() override; Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; - private: void SetInputAndValuePtr(DavinciModel *davinci_model, const vector &input_data_addrs); void *input_ptr_; diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc old mode 100644 new mode 100755 index d95aefac..0c1a1d35 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h" #include #include "framework/common/debug/ge_log.h" @@ -83,7 +82,7 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel * Status StreamSwitchNTaskInfo::Distribute() { GELOGI("StreamSwitchNTaskInfo Distribute Start."); rtError_t rt_ret = - rtStreamSwitchN(input_ptr_, input_size_, value_ptr_, true_stream_ptr_, element_size_, stream_, data_type_); + rtStreamSwitchN(input_ptr_, input_size_, value_ptr_, true_stream_ptr_, element_size_, stream_, data_type_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h old mode 100644 new mode 100755 index 5a73eb1a..3d65a086 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,5 +53,5 @@ class StreamSwitchNTaskInfo : public TaskInfo { vector value_list_; int64_t args_offset_; }; -} // namespace ge +} #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc index 100a4fea..09ed7458 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,8 +26,8 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { reinterpret_cast(reinterpret_cast(this->GetNavTableSize()))}; rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret);) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return + RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy((void *)device_args_addr_, sizeof(args), (void *)args, sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h index b7e76af0..9c94d1a9 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc index ca42b4e2..d237d56c 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -112,8 +112,8 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret); GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) // Create the necessary metadata for the super kernel - h = - std::unique_ptr(new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); + h = std::unique_ptr( + new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); return SUCCESS; } } // namespace skt diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h index 7db44eec..efd61ef7 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ class SuperKernelFactory { void *handle_ = nullptr; std::string sk_stub_name_ = "_Z21super_kernel_templatePmm"; bool is_init_ = false; - SuperKernelFactory(){}; + SuperKernelFactory() {}; ~SuperKernelFactory() { if (handle_ != nullptr) { GELOGI("SKT: SKT LIB PATH release."); diff --git a/ge/graph/load/new_model_manager/task_info/task_info.cc b/ge/graph/load/new_model_manager/task_info/task_info.cc old mode 100644 new mode 100755 index 01bf0690..674d477f --- a/ge/graph/load/new_model_manager/task_info/task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/new_model_manager/task_info/task_info.h index f69511e6..e131a356 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info.h +++ b/ge/graph/load/new_model_manager/task_info/task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/task_info_factory.h b/ge/graph/load/new_model_manager/task_info/task_info_factory.h index 5b220960..8feef0ac 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info_factory.h +++ b/ge/graph/load/new_model_manager/task_info/task_info_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.cc b/ge/graph/load/new_model_manager/tbe_handle_store.cc old mode 100644 new mode 100755 index 5bdf4c81..591e88d0 --- a/ge/graph/load/new_model_manager/tbe_handle_store.cc +++ b/ge/graph/load/new_model_manager/tbe_handle_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "tbe_handle_store.h" #include @@ -40,9 +39,14 @@ void TbeHandleInfo::used_dec(uint32_t num) { used_ -= num; } -uint32_t TbeHandleInfo::used_num() const { return used_; } +uint32_t TbeHandleInfo::used_num() const { + return used_; +} + +void *TbeHandleInfo::handle() const { + return handle_; +} -void *TbeHandleInfo::handle() const { return handle_; } TBEHandleStore &TBEHandleStore::GetInstance() { static TBEHandleStore instance; @@ -77,7 +81,8 @@ bool TBEHandleStore::FindTBEHandle(const std::string &name, void *&handle) { /// @param [in] kernel: TBE kernel bin to store. /// @return NA /// -void TBEHandleStore::StoreTBEHandle(const std::string &name, void *handle, std::shared_ptr &kernel) { +void TBEHandleStore::StoreTBEHandle(const std::string &name, void *handle, + std::shared_ptr &kernel) { std::lock_guard lock(mutex_); auto it = kernels_.find(name); if (it == kernels_.end()) { @@ -135,4 +140,4 @@ void TBEHandleStore::EraseTBEHandle(const std::map &names } } } -} // namespace ge +} // namespace ge diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.h b/ge/graph/load/new_model_manager/tbe_handle_store.h index a8f68514..6c3ad750 100644 --- a/ge/graph/load/new_model_manager/tbe_handle_store.h +++ b/ge/graph/load/new_model_manager/tbe_handle_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/new_model_manager/zero_copy_offset.cc index 18b958ef..e93a7250 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/new_model_manager/zero_copy_offset.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,7 +67,7 @@ Status ZeroCopyOffset::InitInputDataInfo(const vector &output_size_list if (zero_copy_basic_offset_.at(index) == virtual_addr_offset) { out_count++; uint64_t out_offset = - reinterpret_cast(virtual_addr_list[kDataIndex]) + zero_copy_relative_offset_.at(index); + reinterpret_cast(virtual_addr_list[kDataIndex]) + zero_copy_relative_offset_.at(index); int64_t real_data_size = ModelUtils::GetOutputSize(op_desc).at(kDataIndex); data_info_.emplace_back(real_data_size, reinterpret_cast(reinterpret_cast(out_offset))); relative_offset_.emplace_back(zero_copy_relative_offset_.at(index)); @@ -141,7 +141,7 @@ void ZeroCopyOffset::IsL2Fusion(const vector &fusion_basic_addrs, const } void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_list, void *addr, const size_t &index, - bool fusion_flag, std::vector &real_virtual_addrs) { + bool fusion_flag, std::set &real_virtual_addrs) { GELOGI("[ZCPY] Start to SetInputOutsideAddrs for virtual_addr %p.", addr); uint32_t out_count = 0; if (!fusion_flag) { @@ -150,7 +150,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_l std::map> addr_mapping; addr_mapping[addr] = {}; outside_addrs_.emplace_back(addr_mapping); - real_virtual_addrs.emplace_back(addr); + real_virtual_addrs.insert(addr); } else { GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr); int64_t output_offset = output_offset_list.at(index); @@ -158,11 +158,11 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_l if (zero_copy_basic_offset_.at(i) == output_offset) { out_count++; void *virtual_addr = - reinterpret_cast(reinterpret_cast(addr) + zero_copy_relative_offset_.at(i)); + reinterpret_cast(reinterpret_cast(addr) + zero_copy_relative_offset_.at(i)); std::map> addr_mapping; addr_mapping[virtual_addr] = {}; outside_addrs_.emplace_back(addr_mapping); - real_virtual_addrs.emplace_back(virtual_addr); + real_virtual_addrs.insert(virtual_addr); GELOGI("[ZCPY] virtual_addr %p has been fusion to virtual_addr %p.", addr, virtual_addr); } } @@ -187,7 +187,7 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo if (zero_copy_basic_offset_.at(i) == input_offset) { out_count++; void *virtual_addr = - reinterpret_cast(reinterpret_cast(addr) + zero_copy_relative_offset_.at(i)); + reinterpret_cast(reinterpret_cast(addr) + zero_copy_relative_offset_.at(i)); std::map> addr_mapping; addr_mapping[virtual_addr] = {}; outside_addrs_.emplace_back(addr_mapping); diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/new_model_manager/zero_copy_offset.h index eb2cdb4d..c662032b 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/new_model_manager/zero_copy_offset.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ class ZeroCopyOffset { Status InitInputDataInfo(const vector &output_size_list, const vector &virtual_addr_list, const OpDescPtr &op_desc, bool &fusion_flag); void SetInputOutsideAddrs(const vector &output_offset_list, void *addr, const size_t &index, - bool fusion_flag, std::vector &real_virtual_addrs); + bool fusion_flag, std::set &real_virtual_addrs); void IsL2Fusion(const vector &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag); Status InitOutputDataInfo(const vector &input_size_list, const vector &virtual_addr_list, diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc old mode 100644 new mode 100755 index 7db9c459..35169726 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -154,8 +154,8 @@ Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) { GE_CHECK_NOTNULL(args_addr_); rtError_t rt_err = RT_ERROR_NONE; if (async_mode) { - rt_err = - rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, stream); + rt_err = rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, + stream); } else { __builtin_prefetch(args_addr_); rt_err = rtMemcpy(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE); diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/new_model_manager/zero_copy_task.h index c83387e8..57ccdbaf 100644 --- a/ge/graph/load/new_model_manager/zero_copy_task.h +++ b/ge/graph/load/new_model_manager/zero_copy_task.h @@ -28,8 +28,8 @@ using std::map; using std::set; -using std::string; using std::vector; +using std::string; namespace ge { class ZeroCopyTask { @@ -83,9 +83,13 @@ class ZeroCopyTask { */ ge::Status DistributeParam(bool async_mode, rtStream_t stream); - void SetBatchLabel(const string &batch_label) { batch_label_ = batch_label; } + void SetBatchLabel(const string &batch_label) { + batch_label_ = batch_label; + } - const string &GetBatchLabel() const { return batch_label_; } + const string& GetBatchLabel() const { + return batch_label_; + } protected: bool CheckDynamicBatch(const map> &batch_addrs, const string &batch_label, uintptr_t addr); @@ -101,5 +105,5 @@ class ZeroCopyTask { //
map> task_addr_offset_; }; -} // namespace ge +} // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_TASK_H_ diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h index 850a73e8..dc4af753 100644 --- a/ge/graph/manager/graph_caching_allocator.h +++ b/ge/graph/manager/graph_caching_allocator.h @@ -33,8 +33,8 @@ #include "runtime/mem.h" namespace ge { -constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes -constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold +constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes +constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold constexpr size_t kKByteSize = 1024; constexpr size_t kMByteSize = 1024 * 1024; constexpr size_t kGByteSize = 1024 * 1024 * 1024; @@ -88,6 +88,7 @@ class CachingAllocator { Status Free(uint8_t *memory_addr, uint32_t device_id = 0); private: + /// /// @ingroup ge_graph /// @brief extend cache by size @@ -130,7 +131,7 @@ class CachingAllocator { /// @param [in] block ptr /// @return void /// - void FreeBlock(Block *block); + void FreeBlock(Block* block); /// /// @ingroup ge_graph diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc old mode 100644 new mode 100755 index 82108653..db68b8fd --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -95,6 +95,7 @@ #include "graph/passes/variable_ref_useless_control_out_delete_pass.h" #include "graph/passes/end_of_sequence_add_control_pass.h" #include "graph/passes/subexpression_migration_pass.h" +#include "graph/passes/subgraph_const_migration_pass.h" #include "graph/passes/unused_args_clean_pass.h" #include "graph/passes/global_step_insert_pass.h" #include "graph/utils/tensor_adapter.h" @@ -132,7 +133,10 @@ bool IsTailingOptimization() { namespace ge { GraphManager::GraphManager(OmgContext &omg_context) - : thread_run_flag_(false), graph_run_listener_(nullptr), init_flag_(false), omg_context_(omg_context) { + : thread_run_flag_(false), + graph_run_listener_(nullptr), + init_flag_(false), + omg_context_(omg_context) { SetLocalOmgContext(omg_context); } @@ -240,6 +244,13 @@ Status GraphManager::Finalize() { continue; } } + + // clear analyzer saved info(graph level) + auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); + GE_CHECK_NOTNULL(compute_graph); + auto session_id = compute_graph->GetSessionID(); + auto graph_id = compute_graph->GetGraphID(); + Analyzer::GetInstance()->DestroyGraphJsonObject(session_id, graph_id); } graph_map_.clear(); cache_helper_map_.clear(); @@ -268,10 +279,10 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, if (compute_graph != nullptr) { compute_graph->SetGraphID(graph_id); bool graph_has_been_added = false; - if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added) && - graph_has_been_added) { - GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] same graph object can not be added again, graph_id = %u.", - graph_id); + if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added) + && graph_has_been_added) { + GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, + "[GraphManager] same graph object can not be added again, graph_id = %u.", graph_id); return GE_GRAPH_GRAPH_ALREADY_EXIST; } (void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true); @@ -374,7 +385,8 @@ Status GraphManager::CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_gr } Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, - Graph2SubGraphInfoList &sub_graph_map, uint64_t session_id) { + Graph2SubGraphInfoList &sub_graph_map, + uint64_t session_id) { GE_CHECK_NOTNULL(compute_graph); // use default 16 multi thread const uint32_t thread_num = 16; @@ -386,7 +398,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr GELOGI("OptimizeSubGraphWithMultiThreads Process op_compile_strategy:%s", op_compile_strategy.c_str()); for (const auto &subgraph : root_subgraph_list) { if (!op_compile_strategy.empty()) { - (void)AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); + (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, subgraph, session_id, GetThreadLocalContext()); @@ -401,7 +413,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr auto subgraph_list = sub_graph_map[function_graph]; for (const auto &subgraph : subgraph_list) { if (!op_compile_strategy.empty()) { - (void)AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); + (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, subgraph, session_id, GetThreadLocalContext()); @@ -436,7 +448,7 @@ bool GraphManager::CheckAllFusionOptimizeSuccess(const ComputeGraphPtr &compute_ const auto &root_subgraph_list = sub_graph_map[compute_graph]; for (const auto &subgraph : root_subgraph_list) { bool optimize_group = true; - (void)AttrUtils::GetBool(subgraph->GetSubGraph(), ATTR_NAME_OPTIMIZE_GROUP, optimize_group); + (void) AttrUtils::GetBool(subgraph->GetSubGraph(), ATTR_NAME_OPTIMIZE_GROUP, optimize_group); if (!optimize_group) { GELOGW("Run lx optimize for subgraph:%s failed.", subgraph->GetSubGraph()->GetName().c_str()); return false; @@ -446,7 +458,7 @@ bool GraphManager::CheckAllFusionOptimizeSuccess(const ComputeGraphPtr &compute_ const auto &subgraph_list = sub_graph_map[function_graph]; for (const auto &subgraph : subgraph_list) { bool optimize_group = true; - (void)AttrUtils::GetBool(subgraph->GetSubGraph(), ATTR_NAME_OPTIMIZE_GROUP, optimize_group); + (void) AttrUtils::GetBool(subgraph->GetSubGraph(), ATTR_NAME_OPTIMIZE_GROUP, optimize_group); if (!optimize_group) { GELOGW("Run lx optimize for subgraph:%s failed.", subgraph->GetSubGraph()->GetName().c_str()); return false; @@ -537,7 +549,9 @@ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_gr /// 1. run lx buffer while build_mode is normal and buffer_optimize is empty or "off_optimize"; /// 2. run lx fusion or buffer according build_mode and build_step in fe. GELOGI("Directly optimize subgraph with build mode:%s, and step:%s, buffer_optimize:%s.", - options_.build_mode.c_str(), options_.build_step.c_str(), buffer_optimize.c_str()); + options_.build_mode.c_str(), + options_.build_step.c_str(), + buffer_optimize.c_str()); Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); if (ret != SUCCESS) { GELOGE(ret, "Multiply optimize subgraph with lx buffer"); @@ -573,19 +587,18 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, } GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph); GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed); - const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); - if (unknown_shape_skip != nullptr) { - PassManager graph_pass; - GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) - GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); - } + + PassManager graph_pass; + GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) + GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed."); GELOGI("PreRun:PreRunOptimizeOriginalGraph success."); return SUCCESS; } -Status GraphManager::PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, ge::ComputeGraphPtr &compute_graph, +Status GraphManager::PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, + ge::ComputeGraphPtr &compute_graph, uint64_t session_id) { GE_CHECK_NOTNULL(graph_node); GE_CHECK_NOTNULL(compute_graph); @@ -594,7 +607,7 @@ Status GraphManager::PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, ge:: // Dump graph to tuning path if (options_.build_mode == BUILD_MODE_TUNING && options_.build_step == BUILD_STEP_AFTER_UB_MATCH) { std::string tuning_path; - (void)GetContext().GetOption(TUNING_PATH, tuning_path); + (void) GetContext().GetOption(TUNING_PATH, tuning_path); GELOGI("Dump path:%s.", tuning_path.c_str()); GraphUtils::DumpGEGraph(compute_graph, "", true, tuning_path); } @@ -607,7 +620,8 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, GE_CHECK_NOTNULL(graph_node); GE_CHECK_NOTNULL(compute_graph); GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); - GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", graph_optimize_.OptimizeGraphBeforeBuildForRts, compute_graph); + GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", + graph_optimize_.OptimizeGraphBeforeBuildForRts, compute_graph); GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id); GELOGI("PreRun:PreRunAfterOptimizeSubGraph success."); return SUCCESS; @@ -656,10 +670,10 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorSetRunFlag(true); ComputeGraphPtr compute_graph_tmp = GraphUtils::GetComputeGraph(*(graph_node->GetGraph())); - GE_IF_BOOL_EXEC( - GetTrainFlag(), - GE_IF_BOOL_EXEC(compute_graph_tmp == nullptr, - GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[RunGraph] compute_graph_tmp is NULL, graph id = %u.", graph_id); - return GE_GRAPH_GRAPH_NODE_NULL;)) + GE_IF_BOOL_EXEC(GetTrainFlag(), + GE_IF_BOOL_EXEC(compute_graph_tmp == nullptr, + GELOGE(GE_GRAPH_GRAPH_NODE_NULL, + "[RunGraph] compute_graph_tmp is NULL, graph id = %u.", graph_id); + return GE_GRAPH_GRAPH_NODE_NULL;)) // when set incre build, add cache helper map AddModelCacheHelperToMap(graph_id, session_id, compute_graph_tmp); @@ -1050,7 +1064,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const } OpsKernelInfoStorePtr kernel_info = - instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(op_desc->GetOpKernelLibName()); + instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(op_desc->GetOpKernelLibName()); if (kernel_info == nullptr) { GELOGE(FAILED, "Get op kernel info store failed"); return FAILED; @@ -1060,7 +1074,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const if (ret != SUCCESS) { GELOGE(ret, "Compile op failed, op = %s, graph_id = %u.", op_desc->GetName().c_str(), graph_id); return ret; - } + } } } @@ -1566,7 +1580,7 @@ Status GraphManager::SummaryHandle(const GraphId &graph_id, std::vector summary_output_index; GELOGI("[GraphManager] SummaryHandle, outputsSize=%zu.", outputs.size()); const std::map> &whole_summary_output_indexes = - graph_optimize_.GetSummaryOutputIndexes(); + graph_optimize_.GetSummaryOutputIndexes(); if (whole_summary_output_indexes.find(graph_id) == whole_summary_output_indexes.end()) { GELOGE(FAILED, "No Summary graph found in map."); return FAILED; @@ -1660,7 +1674,8 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap } Status GraphManager::RegisterCallBackFunc( - const std::string &key, const std::function &)> &callback) { + const std::string &key, + const std::function &)> &callback) { GELOGI("[GraphManager] RegisterCallBackFunc, key=%s.", key.c_str()); me_callback_map_[key] = callback; return SUCCESS; @@ -1899,9 +1914,9 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { } PassManager after_merge_passes; GE_CHK_STATUS_RET( - after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass)); + after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass)); GE_CHK_STATUS_RET( - after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass)); + after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass)); GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CommonSubexpressionEliminationPass", new (std::nothrow) CommonSubexpressionEliminationPass)); GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::PermutePass", new (std::nothrow) PermutePass)) @@ -1938,7 +1953,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { return ret; } - GraphUtils::DumpGEGraphToOnnx(*compute_graph, "OptimizeStage1_1"); + GE_DUMP(compute_graph, "OptimizeStage1_1"); NamesToPass names_to_passes; TransOpNearbyAllreduceFusionPass trans_op_nearby_allreduce_fusion_pass; @@ -1978,28 +1993,30 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { uint64_t op_constant_folding_cost = 0; for (auto &it : constant_folding_pass.GetOpConstantFoldingPerfStatistic()) { op_constant_folding_cost += it.second.second; - GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", it.first.c_str(), - it.second.second, it.second.first); + GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", + it.first.c_str(), it.second.second, it.second.first); } GEEVENT("[GEPERFTRACE] The time cost of extern constant folding is [%lu] micro second.", op_constant_folding_cost); for (auto &it : constant_folding_pass.GetGeConstantFoldingPerfStatistic()) { op_constant_folding_cost += it.second.second; - GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", it.first.c_str(), - it.second.second, it.second.first); + GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", + it.first.c_str(), it.second.second, it.second.first); } - GraphUtils::DumpGEGraphToOnnx(*compute_graph, "OptimizeStage1_2"); + GE_DUMP(compute_graph, "OptimizeStage1_2"); PassManager graph_pass; // the prune pass should between SwitchPass and SwitchToStreamSwitchPass + GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::Migration", new (std::nothrow) SubgraphConstMigrationPass)); + GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ArgsClean", new (std::nothrow) UnusedArgsCleanPass)); GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::PrunePass", new (std::nothrow) PrunePass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::NextIterationPass", new (std::nothrow) NextIterationPass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ControlTriggerPass", new (std::nothrow) ControlTriggerPass)) GE_CHK_STATUS_RET( - graph_pass.AddPass("OptimizeStage1_3::MergeToStreamMergePass", new (std::nothrow) MergeToStreamMergePass)) + graph_pass.AddPass("OptimizeStage1_3::MergeToStreamMergePass", new (std::nothrow) MergeToStreamMergePass)) GE_CHK_STATUS_RET( - graph_pass.AddPass("OptimizeStage1_3::SwitchToStreamSwitchPass", new (std::nothrow) SwitchToStreamSwitchPass)) + graph_pass.AddPass("OptimizeStage1_3::SwitchToStreamSwitchPass", new (std::nothrow) SwitchToStreamSwitchPass)) GE_CHK_STATUS_RET( - graph_pass.AddPass("OptimizeStage1_3::AttachStreamLabelPass", new (std::nothrow) AttachStreamLabelPass)) + graph_pass.AddPass("OptimizeStage1_3::AttachStreamLabelPass", new (std::nothrow) AttachStreamLabelPass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::MultiBatchPass", new (std::nothrow) MultiBatchPass(true))) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::IteratorOpPass", new (std::nothrow) IteratorOpPass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass", @@ -2009,7 +2026,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { // Priority: The GlobalStepInsertPass should work before graph partitioner. // Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory GE_CHK_STATUS_RET( - graph_pass.AddPass("OptimizeStage1_3::GlobalStepInsertPass", new (std::nothrow) GlobalStepInsertPass)) + graph_pass.AddPass("OptimizeStage1_3::GlobalStepInsertPass", new (std::nothrow) GlobalStepInsertPass)) } GE_TIMESTAMP_START(graph_pass); ret = graph_pass.Run(compute_graph); @@ -2037,7 +2054,7 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { PassManager after_merge_passes; GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", new (std::nothrow) - LinkGenMaskNodesPass(options_.stream_max_parallel_num))); + LinkGenMaskNodesPass(options_.stream_max_parallel_num))); GE_TIMESTAMP_START(after_merge_passes); auto ret = after_merge_passes.Run(compute_graph); @@ -2091,19 +2108,18 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::CompileNodesPass", new (std::nothrow) CompileNodesPass)) GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( - "OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new (std::nothrow) MarkGraphUnknownStatusPass)) + "OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new(std::nothrow) MarkGraphUnknownStatusPass)) GE_CHK_STATUS_RET( - pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::InputOutputConnectionIdentifyPass", - new (std::nothrow) InputOutputConnectionIdentifyPass)) + pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::InputOutputConnectionIdentifyPass", + new (std::nothrow) InputOutputConnectionIdentifyPass)) // When the input node to be cleared is after a `Data` node, the atomic-clean-node should not be inserted. // So The ComputeGraph should not delete nodes after `AtomicAddrCleanPass` // to prevent unexpected deletion of nodes after a `Data` node GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::AtomicAddrCleanPass", new (std::nothrow) AtomicAddrCleanPass)) - GE_CHK_STATUS_RET( - pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::" - "EndOfSequenceAddControlPass", - new (std::nothrow) EndOfSequenceAddControlPass)) + GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::" + "EndOfSequenceAddControlPass", + new (std::nothrow) EndOfSequenceAddControlPass)) // SubgraphPass solves memory_assign_conflicts by insert MemcpyAsync node, which depends on multi attrs and // graph-structure. So try not to add new pass after SubgraphPass. GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass", @@ -2124,8 +2140,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { auto result = graph_optimize_.HandleMemoryRWConflict(compute_graph); if (result != SUCCESS) { GELOGW( - "Mark node rw type failed. It will take some effect on memory_assign_conflicts handling." - "Please pay attention to it."); + "Mark node rw type failed. It will take some effect on memory_assign_conflicts handling." + "Please pay attention to it."); } ChangeConstTypeWhenTraining(compute_graph); @@ -2171,7 +2187,7 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G GE_TIMESTAMP_START(LoadGraph); GE_CHECK_NOTNULL(graph_node->graph_run_async_listener_); Status ret = - GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, graph_node->graph_run_async_listener_); + GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, graph_node->graph_run_async_listener_); GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraphAsync"); if (ret != SUCCESS) { GELOGE(ret, "[LoadGraphAsync] LoadGraphAsync Failed"); @@ -2202,9 +2218,9 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra } GELOGI( - "CheckAndReleaseMemory Graph[%u] need memory_size[%ld], weight_size[%ld]," - " Device[%u] free_memory_size[%ld]", - graph_node->GetGraphId(), memory_size, weight_size, GetContext().DeviceId(), free_memory); + "CheckAndReleaseMemory Graph[%u] need memory_size[%ld], weight_size[%ld]," + " Device[%u] free_memory_size[%ld]", + graph_node->GetGraphId(), memory_size, weight_size, GetContext().DeviceId(), free_memory); if (ge::CheckInt64AddOverflow(memory_size, weight_size) != SUCCESS) { GELOGE(INTERNAL_ERROR, "The sum of Memory size and weight size exceeds INT64_MAX"); return INTERNAL_ERROR; @@ -2426,8 +2442,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { if (graph_node->GetBuildFlag()) { ReturnError(graph_manager, args.callback, PARAM_INVALID, "The graph " + std::to_string(graph_node->GetGraphId()) + - " need to re-build, you should remove it" - " from GE first, then AddGraph again and rebuild it."); + " need to re-build, you should remove it" + " from GE first, then AddGraph again and rebuild it."); graph_node->Unlock(); return; } @@ -2458,7 +2474,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { } graph_manager->run_args_q_.Push( - RunArgs({graph_node, args.graph_id, args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback})); + RunArgs({graph_node, args.graph_id, args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback})); GELOGI("Loop end."); } } @@ -2536,8 +2552,8 @@ void GraphManager::ReturnError(GraphManager *graph_manager, RunAsyncCallback cal callback(ret, outputs); } -void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, - Status ret, const string &log) { +void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, + RunAsyncCallback callback, Status ret, const string &log) { std::vector outputs; auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); if (graph_manager == nullptr || compute_graph == nullptr) { @@ -2580,10 +2596,10 @@ void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_ return; } tensor.length = len * size; - tensor.data.reset(new (std::nothrow) uint8_t[tensor.length]); + tensor.data.reset(new(std::nothrow) uint8_t[tensor.length]); // To avoid global step too small and can not stop, totally set a bigger value for (int64_t i = 0; i < tensor.length; i++) { - tensor.data[i] = 0x7F; // here stands for a positive max value + tensor.data[i] = 0x7F; // here stands for a positive max value } outputs.emplace_back(std::move(tensor)); } @@ -2665,7 +2681,7 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB)) { GE_TIMESTAMP_START(ConvertGraphToFile); std::string tuning_path; - (void)GetContext().GetOption(TUNING_PATH, tuning_path); + (void) GetContext().GetOption(TUNING_PATH, tuning_path); Status ret = ConvertGraphToFile(compute_graph, tuning_path, (options_.build_step == BUILD_STEP_AFTER_BUILDER)); if (ret != SUCCESS) { GELOGE(ret, "Convert graph[%s] to file failed", compute_graph->GetName().c_str()); @@ -2714,7 +2730,7 @@ Status GraphManager::ConvertGraphToFile(ComputeGraphPtr &compute_graph, std::str auto sub_graph_map = graph_partitioner_.GetSubGraphMap(); const auto &subgraph_infos = sub_graph_map[compute_graph]; std::vector tuning_subgraphs; - for (const auto &sub_graph_info_ptr : subgraph_infos) { + for (const auto &sub_graph_info_ptr: subgraph_infos) { GE_CHECK_NOTNULL(sub_graph_info_ptr); ComputeGraphPtr sub_graph_tmp = sub_graph_info_ptr->GetSubGraph(); // need to tuning @@ -2795,11 +2811,18 @@ Status GraphManager::SaveVariables(const Graph &graph, const std::vectorfirst; + auto var_tensor = iter->second.GetTensorDesc(); + var_tensor.SetName(var_name); + iter->second.SetTensorDesc(var_tensor); var_values.emplace_back(iter->second); } } diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h index 9096f4a8..45c91406 100644 --- a/ge/graph/manager/graph_manager.h +++ b/ge/graph/manager/graph_manager.h @@ -103,6 +103,7 @@ class GraphManager { ge::Status BuildGraph(const GraphId &graph_id, const std::vector &inputs, GeRootModelPtr &models, uint64_t session_id = 0, bool async = false); + Status BuildGraphForUnregisteredOp(const GraphId &graph_id, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id); @@ -137,8 +138,8 @@ class GraphManager { /// @param [out] callback: callback while run graph async finish /// @return Status result of function /// - Status RunGraphAsync(const GraphId &graph_id, const std::vector &inputs, uint64_t session_id, - RunAsyncCallback callback); + Status RunGraphAsync(const GraphId &graph_id, const std::vector &inputs, + uint64_t session_id, RunAsyncCallback callback); /// /// @ingroup ge_graph @@ -148,7 +149,8 @@ class GraphManager { /// @return Status result of function /// Status RegisterCallBackFunc( - const std::string &key, const std::function &)> &callback); + const std::string &key, + const std::function &)> &callback); const bool GetTrainFlag() const { return options_.train_graph_flag; } @@ -196,8 +198,8 @@ class GraphManager { Status OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, uint64_t session_id); - Status Build(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, GeRootModelPtr &ge_root_model, - uint64_t session_id); + Status Build(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, + GeRootModelPtr &ge_root_model, uint64_t session_id); Status StartForRunGraph(const GraphNodePtr &graph_node, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id = INVALID_SESSION_ID); @@ -302,27 +304,33 @@ class GraphManager { static void RunThread(GraphManager *graph_manager); static void StopQueue(GraphManager *graph_manager); static void ReturnError(GraphManager *graph_manager, RunAsyncCallback callback, Status ret, const string &log); - static void ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, Status ret, - const string &log); + static void ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, + Status ret, const string &log); void ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph); Status PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, const std::vector &inputs, ge::ComputeGraphPtr &compute_graph, uint64_t session_id); - Status PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, ge::ComputeGraphPtr &compute_graph, + Status PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, + ge::ComputeGraphPtr &compute_graph, uint64_t session_id); - Status PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, - GeRootModelPtr &ge_root_model, uint64_t session_id); + Status PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, + ComputeGraphPtr &compute_graph, + GeRootModelPtr &ge_root_model, + uint64_t session_id); - Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph, Graph2SubGraphInfoList &sub_graph_map, + Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph, + Graph2SubGraphInfoList &sub_graph_map, std::unordered_map ©_graphs); - Status OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, Graph2SubGraphInfoList &sub_graph_map, + Status OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, + Graph2SubGraphInfoList &sub_graph_map, uint64_t session_id); bool CheckAllFusionOptimizeSuccess(const ComputeGraphPtr &compute_graph, Graph2SubGraphInfoList &sub_graph_map); - Status ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_graph, Graph2SubGraphInfoList &sub_graph_map, + Status ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_graph, + Graph2SubGraphInfoList &sub_graph_map, std::unordered_map ©_graphs); Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id); diff --git a/ge/graph/manager/graph_manager_utils.cc b/ge/graph/manager/graph_manager_utils.cc index edacadb9..fe7e5b34 100644 --- a/ge/graph/manager/graph_manager_utils.cc +++ b/ge/graph/manager/graph_manager_utils.cc @@ -51,7 +51,9 @@ GraphNode::GraphNode(GraphId graph_id) GraphNode::~GraphNode() = default; -void GraphNode::Lock() { sem_.Push(0); } +void GraphNode::Lock() { + sem_.Push(0); +} void GraphNode::Unlock() { uint8_t unused; @@ -103,9 +105,9 @@ GraphModelListener::GraphModelListener(std::mutex &mutex, std::condition_variabl Status GraphModelListener::OnComputeDone(uint32_t model_id, uint32_t task_id, uint32_t result, std::vector &outputs) { GELOGI( - "[GraphManager] graph compute call back, model_id:%u, task_id:%u, " - "resultCode:%u.", - model_id, task_id, result); + "[GraphManager] graph compute call back, model_id:%u, task_id:%u, " + "resultCode:%u.", + model_id, task_id, result); std::lock_guard lock(mutex_); result_code_ = result; @@ -138,7 +140,8 @@ void RunAsyncListener::SetCallback(const RunAsyncCallback &callback) { Status RunAsyncListener::OnComputeDone(uint32_t model_id, uint32_t task_id, uint32_t result, std::vector &outputs) { - GELOGI("[GraphManager] run graph async call back, modelId:%u, taskId:%u, resultCode:%u.", model_id, task_id, result); + GELOGI("[GraphManager] run graph async call back, modelId:%u, taskId:%u, resultCode:%u.", + model_id, task_id, result); GE_CHECK_NOTNULL(callback_); callback_(result, outputs); uint8_t unused; diff --git a/ge/graph/manager/graph_manager_utils.h b/ge/graph/manager/graph_manager_utils.h index be39df21..aee020f2 100644 --- a/ge/graph/manager/graph_manager_utils.h +++ b/ge/graph/manager/graph_manager_utils.h @@ -167,7 +167,7 @@ class GraphNode { GeModelPtr GetGeModel() const { return ge_model_; } void SetGeRootModel(const GeRootModelPtr &ge_root_model) { ge_root_model_ = ge_root_model; } GeRootModelPtr GetGeRootModel() const { return ge_root_model_; } - const std::map &GetOptions() const { return options_; } + const std::map& GetOptions() const { return options_; } void SetOptions(const std::map &options) { options_ = options; } void Lock(); void Unlock(); diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc old mode 100644 new mode 100755 index 20ca12ae..b832986b --- a/ge/graph/manager/graph_mem_allocator.cc +++ b/ge/graph/manager/graph_mem_allocator.cc @@ -105,9 +105,9 @@ Status MemoryAllocator::FreeMemory(const string &memory_key, uint32_t device_id) if (it == memory_base_map_.end()) { if (mem_malloced_) { GELOGW( - "MemoryAllocator::FreeMemory failed," - " memory_key[%s] was not exist, device_id = %u.", - memory_key.c_str(), device_id); + "MemoryAllocator::FreeMemory failed," + " memory_key[%s] was not exist, device_id = %u.", + memory_key.c_str(), device_id); } return ge::INTERNAL_ERROR; } @@ -139,9 +139,9 @@ uint8_t *MemoryAllocator::GetMemoryAddr(const string &memory_key, uint32_t devic auto it = memory_base_map_.find(memory_key); if (it == memory_base_map_.end()) { GELOGW( - "MemoryAllocator::GetMemoryAddr failed," - " memory_key[%s] was not exist, device_id = %u.", - memory_key.c_str(), device_id); + "MemoryAllocator::GetMemoryAddr failed," + " memory_key[%s] was not exist, device_id = %u.", + memory_key.c_str(), device_id); return nullptr; } diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h index bebdedb6..2723ae5c 100644 --- a/ge/graph/manager/graph_mem_allocator.h +++ b/ge/graph/manager/graph_mem_allocator.h @@ -109,7 +109,8 @@ class MemoryAllocator { /// @param [in] device_id device id /// @return memory address /// - uint8_t *MallocMemory(const string &purpose, const string &memory_key, size_t memory_size, uint32_t device_id = 0); + uint8_t *MallocMemory(const string &purpose, const string &memory_key, size_t memory_size, + uint32_t device_id = 0); /// /// @ingroup ge_graph diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc old mode 100644 new mode 100755 index 8633e361..0b9f1ff9 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -685,7 +685,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty return nullptr; } uint8_t *mem_addr = - logic_addr + reinterpret_cast(mem_base) - VarManager::Instance(session_id_)->GetVarMemLogicBase(); + logic_addr + reinterpret_cast(mem_base) - VarManager::Instance(session_id_)->GetVarMemLogicBase(); return mem_addr; } diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h old mode 100644 new mode 100755 index 4a038f13..b4f6aca3 --- a/ge/graph/manager/graph_var_manager.h +++ b/ge/graph/manager/graph_var_manager.h @@ -219,7 +219,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { ge::Status SaveBroadCastInfo(uint32_t graph_id, const VarBroadCastInfo &broad_cast_info); - ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); + ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, ge::ConstOpDescPtr var_op_desc, uint8_t *base_ptr); diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc index 1d35f7af..43bc8e17 100644 --- a/ge/graph/manager/host_mem_manager.cc +++ b/ge/graph/manager/host_mem_manager.cc @@ -18,20 +18,46 @@ #include +#include "graph/ge_context.h" #include "graph/utils/tensor_utils.h" +#include "runtime/mem.h" +namespace { +const uint32_t kMallocHostMemFlag = 1; +} // namespace namespace ge { -Status HostMemoryAllocator::Allocate(std::size_t memory_size, uint8_t *memory_addr) { - GELOGI("HostMemoryAllocator::MallocMemory size= %zu.", memory_size); +Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) { + auto device_id = GetContext().DeviceId(); + GELOGD("SharedMemAllocator::Malloc host mem size= %zu for devid:[%u].", mem_info.mem_size, device_id); + + auto dev_id = static_cast(device_id); + GE_CHK_RT_RET(rtSetDevice(dev_id)); + // DeviceReset before memory finished! + GE_MAKE_GUARD(not_used_var, [&] { GE_CHK_RT(rtDeviceReset(dev_id)); }); + + rtMallocHostSharedMemoryIn input_para = {mem_info.shm_name.c_str(), mem_info.mem_size, kMallocHostMemFlag}; + rtMallocHostSharedMemoryOut output_para; + rtError_t rt_ret = rtMallocHostSharedMemory(&input_para, &output_para); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api(rtMallocHostSharedMemory) failed, devid:[%u].", device_id); + return GE_GRAPH_MEMORY_ALLOC_FAILED; + } + mem_info.fd = output_para.fd; + mem_info.host_address = reinterpret_cast(output_para.ptr); + mem_info.device_address = reinterpret_cast(output_para.devPtr); return SUCCESS; } -Status HostMemoryAllocator::DeAllocate(uint8_t *memory_addr) { - if (rtFreeHost(memory_addr) != RT_ERROR_NONE) { - GELOGE(GE_GRAPH_FREE_FAILED, "MemoryAllocator::Free memory failed."); - return GE_GRAPH_FREE_FAILED; +Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { + GELOGD("SharedMemAllocator::DeAllocate"); + rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, + mem_info.host_address, mem_info.device_address}; + + rtError_t rt_ret = rtFreeHostSharedMemory(&free_para); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret); + return RT_FAILED; } - memory_addr = nullptr; return ge::SUCCESS; } @@ -42,9 +68,9 @@ HostMemManager &HostMemManager::Instance() { Status HostMemManager::Initialize() { std::lock_guard lock(mutex_); - allocator_ = std::unique_ptr(new (std::nothrow) HostMemoryAllocator()); + allocator_ = std::unique_ptr(new (std::nothrow) SharedMemAllocator()); if (allocator_ == nullptr) { - GELOGE(GE_GRAPH_MALLOC_FAILED, "Host mem allocator init failed!"); + GELOGE(GE_GRAPH_MALLOC_FAILED, "Shared memory allocator init failed!"); return GE_GRAPH_MALLOC_FAILED; } return SUCCESS; @@ -52,25 +78,25 @@ Status HostMemManager::Initialize() { void HostMemManager::Finalize() noexcept { std::lock_guard lock(mutex_); - - for (const auto &it : var_memory_base_map_) { - if (allocator_->DeAllocate(it.second.address) != SUCCESS) { - GELOGW("Host %s mem deAllocator failed!", it.first.c_str()); + for (auto &it : var_memory_base_map_) { + if (allocator_->DeAllocate(it.second) != SUCCESS) { + GELOGW("Host %s mem release failed!", it.first.c_str()); } } var_memory_base_map_.clear(); } -Status HostMemManager::MallocMemoryForHostVar(const string &op_name, uint64_t tensor_size, uint8_t *&var_addr) { +Status HostMemManager::MallocSharedMemory(SharedMemInfo &mem_info) { std::lock_guard lock(mutex_); - if (var_memory_base_map_.find(op_name) != var_memory_base_map_.end()) { - GELOGI("Host mem for variable %s has been malloced", op_name.c_str()); - return SUCCESS; + auto iter = var_memory_base_map_.find(mem_info.op_name); + if (iter != var_memory_base_map_.end()) { + GELOGE(FAILED, "Host shared memory for op %s has been malloced", mem_info.op_name.c_str()); + return FAILED; } + mem_info.shm_name = OpNameToShmName(mem_info.op_name); GE_CHECK_NOTNULL(allocator_); - GE_CHK_STATUS(allocator_->Allocate(tensor_size, var_addr)); - HostMemInfo info(var_addr, tensor_size); - var_memory_base_map_[op_name] = info; + GE_CHK_STATUS_RET(allocator_->Allocate(mem_info)); + var_memory_base_map_[mem_info.op_name] = mem_info; return SUCCESS; } @@ -79,8 +105,15 @@ Status HostMemManager::QueryVarMemInfo(const string &op_name, uint64_t &base_add GELOGE(INTERNAL_ERROR, "Find host base base_addr failed,node name:%s!", op_name.c_str()); return INTERNAL_ERROR; } - base_addr = reinterpret_cast(reinterpret_cast(var_memory_base_map_[op_name].address)); - data_size = var_memory_base_map_[op_name].data_size; + base_addr = reinterpret_cast(reinterpret_cast(var_memory_base_map_[op_name].device_address)); + data_size = var_memory_base_map_[op_name].mem_size; return SUCCESS; } + +string HostMemManager::OpNameToShmName(const string &op_name) { + string sh_name("Ascend_"); + std::hash hash_str; + sh_name.append(std::to_string(hash_str(op_name))); + return sh_name; +} } // namespace ge diff --git a/ge/graph/manager/host_mem_manager.h b/ge/graph/manager/host_mem_manager.h index 3a5a0602..66bd5826 100644 --- a/ge/graph/manager/host_mem_manager.h +++ b/ge/graph/manager/host_mem_manager.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "framework/common/ge_inner_error_codes.h" @@ -35,19 +36,23 @@ #include "runtime/mem.h" namespace ge { -class HostMemoryAllocator { - public: - ~HostMemoryAllocator() = default; - - Status Allocate(std::size_t size, uint8_t *memory_addr); - Status DeAllocate(uint8_t *memory_addr); +struct SharedMemInfo { + string op_name; + string shm_name; + uint64_t mem_size = 0; + int fd = 0; + uint8_t *device_address = nullptr; + uint8_t *host_address = nullptr; + SharedMemInfo() = default; + SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {} }; +class SharedMemAllocator { + public: + SharedMemAllocator() = default; + ~SharedMemAllocator() = default; -struct HostMemInfo { - uint8_t *address; - uint64_t data_size; - HostMemInfo() : address(nullptr), data_size(0) {} - HostMemInfo(uint8_t *addr, uint64_t size) : address(addr), data_size(size) {} + Status Allocate(SharedMemInfo &mem_info); + Status DeAllocate(SharedMemInfo &mem_info); }; class HostMemManager { @@ -60,12 +65,13 @@ class HostMemManager { static HostMemManager &Instance(); Status Initialize(); void Finalize() noexcept; - Status MallocMemoryForHostVar(const string &op_name, uint64_t tensor_size, uint8_t *&var_addr); + Status MallocSharedMemory(SharedMemInfo &mem_nfo); Status QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size); private: - std::unordered_map var_memory_base_map_; - std::unique_ptr allocator_; + static string OpNameToShmName(const string &op_name); + std::unordered_map var_memory_base_map_; + std::unique_ptr allocator_; mutable std::recursive_mutex mutex_; }; } // namespace ge diff --git a/ge/graph/manager/memory_api.cc b/ge/graph/manager/memory_api.cc index 0a98e983..116a4b86 100644 --- a/ge/graph/manager/memory_api.cc +++ b/ge/graph/manager/memory_api.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,11 +18,13 @@ #include +#include "common/ge/plugin_manager.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/rdma_pool_allocator.h" +#include "graph/utils/type_utils.h" #include "hccl/base.h" -#include "hccl/hcom.h" +#include "hccl/hccl_types.h" namespace ge { Status InitRdmaPool(size_t size, rtMemType_t mem_type) { @@ -38,8 +40,36 @@ Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t return SUCCESS; } +Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size) { + GELOGD("MallocSharedMemory in"); + uint32_t type_size = 0; + bool result = TypeUtils::GetDataTypeLength(tensor_info.data_type, type_size); + if (!result) { + GELOGE(GRAPH_FAILED, "GetDataTypeLength failed, data_type=(%s).", + TypeUtils::DataTypeToSerialString(tensor_info.data_type).c_str()); + return GRAPH_FAILED; + } + memory_size = type_size; + for (auto dim : tensor_info.dims) { + if (dim <= 0) { + GELOGE(GRAPH_FAILED, "Tensor dims should be positive"); + return GRAPH_FAILED; + } + memory_size *= dim; + } + SharedMemInfo mem_info(tensor_info.var_name, memory_size); + Status ret = HostMemManager::Instance().MallocSharedMemory(mem_info); + if (ret != SUCCESS) { + GELOGE(GRAPH_FAILED, "MallocSharedMemory failed op name [%s]", tensor_info.var_name.c_str()); + return GRAPH_FAILED; + } + dev_addr = reinterpret_cast(reinterpret_cast(mem_info.device_address)); + GELOGD("MallocSharedMemory Succeeded"); + return SUCCESS; +} + Status GetVarBaseAddrAndSize(const string &var_name, uint64_t &base_addr, uint64_t &var_size) { GELOGD("GetVarBaseAddrAndSize in"); return HostMemManager::Instance().QueryVarMemInfo(var_name, base_addr, var_size); } -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/ge/graph/manager/model_manager/event_manager.cc b/ge/graph/manager/model_manager/event_manager.cc index 686eb3d8..69a946f9 100644 --- a/ge/graph/manager/model_manager/event_manager.cc +++ b/ge/graph/manager/model_manager/event_manager.cc @@ -67,7 +67,7 @@ Status EventManager::EventElapsedTime(size_t start_event_idx, size_t stop_event_ GE_CHK_BOOL_RET_STATUS_NOLOG(this->inited_, INTERNAL_ERROR); GE_CHK_BOOL_RET_STATUS_NOLOG(start_event_idx < this->event_list_.size() && - stop_event_idx < this->event_list_.size() && start_event_idx <= stop_event_idx, + stop_event_idx < this->event_list_.size() && start_event_idx <= stop_event_idx, PARAM_INVALID); GE_CHK_RT_RET(rtEventElapsedTime(&time, this->event_list_[start_event_idx], this->event_list_[stop_event_idx])); diff --git a/ge/graph/manager/model_manager/event_manager.h b/ge/graph/manager/model_manager/event_manager.h index a20afead..a7464e0c 100644 --- a/ge/graph/manager/model_manager/event_manager.h +++ b/ge/graph/manager/model_manager/event_manager.h @@ -17,6 +17,7 @@ #ifndef GE_GRAPH_MANAGER_MODEL_MANAGER_EVENT_MANAGER_H_ #define GE_GRAPH_MANAGER_MODEL_MANAGER_EVENT_MANAGER_H_ + #include #include "common/fmk_error_codes.h" diff --git a/ge/graph/manager/rdma_pool_allocator.cc b/ge/graph/manager/rdma_pool_allocator.cc index ef82deff..03e01bd2 100644 --- a/ge/graph/manager/rdma_pool_allocator.cc +++ b/ge/graph/manager/rdma_pool_allocator.cc @@ -126,7 +126,7 @@ uint8_t *RdmaPoolAllocator::Malloc(size_t size, uint32_t device_id) { if (ShouldSplit(block, aligned_size)) { GELOGD("Block will be splited block size = %zu, aligned_size:%zu", block->size, aligned_size); auto *new_block = - new (std::nothrow) Block(device_id, block->size - aligned_size, nullptr, block->ptr + aligned_size); + new (std::nothrow) Block(device_id, block->size - aligned_size, nullptr, block->ptr + aligned_size); if (new_block == nullptr) { GELOGW("Block split failed"); return block->ptr; @@ -140,8 +140,8 @@ uint8_t *RdmaPoolAllocator::Malloc(size_t size, uint32_t device_id) { block->size = aligned_size; block_bin_.insert(new_block); } - return block->ptr; GELOGD("Find block size = %zu", block->size); + return block->ptr; } GELOGW("Memory block not founded."); return nullptr; diff --git a/ge/graph/manager/trans_var_data_utils.cc b/ge/graph/manager/trans_var_data_utils.cc index 60a0d0db..d6865716 100644 --- a/ge/graph/manager/trans_var_data_utils.cc +++ b/ge/graph/manager/trans_var_data_utils.cc @@ -122,7 +122,7 @@ Status CopyVarFromDevice(uint64_t session_id, const NodePtr &var, std::unique_pt return INTERNAL_ERROR; } - std::unique_ptr var_host(new (std::nothrow) uint8_t[var_size_bytes]); + std::unique_ptr var_host(new(std::nothrow) uint8_t[var_size_bytes]); if (var_host == nullptr) { GELOGE(OUT_OF_MEMORY, "Failed to malloc rt-host memory, size %ld", var_size_bytes); return OUT_OF_MEMORY; @@ -220,7 +220,9 @@ Status TransVarOnHost(uint8_t *var_data, const VarTransRoad &trans_road, formats /// @param var_size_bytes /// @param var_device /// @return -Status ReAssignVarAddr(uint64_t session_id, const std::string &var_name, const GeTensorDesc &tensor_desc, +Status ReAssignVarAddr(uint64_t session_id, + const std::string &var_name, + const GeTensorDesc &tensor_desc, void **var_device) { uint8_t *var_logic = nullptr; Status ret = VarManager::Instance(session_id)->GetVarAddr(var_name, tensor_desc, &var_logic); @@ -308,17 +310,19 @@ Status TransTensor(uint8_t *var_data, const NodePtr &var_src, const NodePtr &var auto src_data_datatype = var_src->GetOpDesc()->GetOutputDesc(0).GetDataType(); auto dst_data_datatype = var_dst->GetOpDesc()->GetOutputDesc(0).GetDataType(); GE_IF_BOOL_EXEC( - src_data_datatype != dst_data_datatype, - auto ret = formats::TransDataType( - {var_data, static_cast(src_data_shape_size), src_data_datatype, dst_data_datatype}, result); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "trans var data on host failed"); - return ret; - }); + src_data_datatype != dst_data_datatype, + auto ret = formats::TransDataType( + {var_data, static_cast(src_data_shape_size), src_data_datatype, dst_data_datatype}, result); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "trans var data on host failed"); + return ret; + }); return SUCCESS; } -Status CopyTensorFromSrcVarNode(const NodePtr &var_src, const NodePtr &var_dst, uint64_t session_id, +Status CopyTensorFromSrcVarNode(const NodePtr &var_src, + const NodePtr &var_dst, + uint64_t session_id, uint32_t device_id) { /// after FE fusion pass, input num of applymomentum op was changed, 0th input is var_fp32, 6th input is /// var_fp16(new). @@ -361,7 +365,7 @@ Status CopyTensorFromSrcVarNode(const NodePtr &var_src, const NodePtr &var_dst, GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Failed to send var data to device"); return ret); return SUCCESS; } -} // namespace +} // namespace Status TransVarDataUtils::SyncVarData2BroadCast(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, uint8_t *dst_addr, int64_t dst_addr_size, uint64_t session_id) { GE_CHK_BOOL_RET_STATUS(dst_addr != nullptr, FAILED, "dst addr is null. "); @@ -386,7 +390,7 @@ Status TransVarDataUtils::SyncBroadCastData2Var(uint8_t *src_addr, int64_t src_a GE_CHK_RT_RET(rtMemcpy(host_addr, src_addr_size, src_addr, src_addr_size, RT_MEMCPY_DEVICE_TO_HOST)); GE_CHK_STATUS_RET( - SyncTensorToDevice(var_name, reinterpret_cast(host_addr), src_addr_size, dst_tensor_desc, session_id)); + SyncTensorToDevice(var_name, reinterpret_cast(host_addr), src_addr_size, dst_tensor_desc, session_id)); return SUCCESS; } @@ -398,10 +402,10 @@ Status TransVarDataUtils::SyncTensorToHost(const string &var_name, const ge::GeT uint8_t *src_addr = nullptr; GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr)); uint8_t *mem_addr = - src_addr - - static_cast(reinterpret_cast(VarManager::Instance(session_id)->GetVarMemLogicBase())) + - static_cast( - reinterpret_cast(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); + src_addr - + static_cast(reinterpret_cast(VarManager::Instance(session_id)->GetVarMemLogicBase())) + + static_cast( + reinterpret_cast(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); GE_CHK_RT_RET(rtMallocHost(reinterpret_cast(host_addr), src_tensor_size)); GE_CHK_RT_RET(rtMemcpy(*host_addr, src_tensor_size, mem_addr, src_tensor_size, RT_MEMCPY_DEVICE_TO_HOST)); @@ -415,10 +419,10 @@ Status TransVarDataUtils::SyncTensorToDevice(const string &var_name, const uint8 uint8_t *dst_addr = nullptr; GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, dst_tensor_desc, &dst_addr)); uint8_t *mem_addr = - dst_addr - - static_cast(reinterpret_cast(VarManager::Instance(session_id)->GetVarMemLogicBase())) + - static_cast( - reinterpret_cast(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); + dst_addr - + static_cast(reinterpret_cast(VarManager::Instance(session_id)->GetVarMemLogicBase())) + + static_cast( + reinterpret_cast(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); GE_CHK_RT_RET(rtMemcpy(mem_addr, addr_size, host_addr, addr_size, RT_MEMCPY_HOST_TO_DEVICE)); GELOGI("SyncTensorToDevice var_name %s, addr_size %u", var_name.c_str(), addr_size); @@ -426,8 +430,11 @@ Status TransVarDataUtils::SyncTensorToDevice(const string &var_name, const uint8 return SUCCESS; } -Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, uint64_t session_id, - rtContext_t context, uint32_t graph_id, uint32_t thread_num) { +Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, + uint64_t session_id, + rtContext_t context, + uint32_t graph_id, + uint32_t thread_num) { ThreadPool executor(thread_num); std::vector> vector_future; for (auto &node : variable_nodes) { @@ -440,40 +447,40 @@ Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, } std::future f = executor.commit( - [](const ge::NodePtr &node, uint64_t session_id, rtContext_t ctx, uint32_t graph_id) -> Status { - rtError_t rt_ret = rtCtxSetCurrent(ctx); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Failed to set context, error_code is: 0x%X.", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret); - } - uint32_t allocated_graph_id = 0; - Status ret = VarManager::Instance(session_id)->GetAllocatedGraphId(node->GetName(), allocated_graph_id); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "var has not been allocated, node:%s, graph_id:%u.", node->GetName().c_str(), - graph_id); - return INTERNAL_ERROR; - } - uint32_t changed_graph_id = 0; - ret = VarManager::Instance(session_id)->GetChangedGraphId(node->GetName(), changed_graph_id); - bool call_trans_var = - (ret == SUCCESS && changed_graph_id == graph_id && changed_graph_id != allocated_graph_id); - if (call_trans_var) { - GELOGI("VarManager::GetChangedGraphId() success, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); - VarTransRoad *trans_road = VarManager::Instance(session_id)->GetTransRoad(node->GetName()); - if (trans_road == nullptr) { - GELOGI("The variable %s does not have any trans road", node->GetName().c_str()); - return SUCCESS; + [](const ge::NodePtr &node, uint64_t session_id, rtContext_t ctx, uint32_t graph_id) -> Status { + rtError_t rt_ret = rtCtxSetCurrent(ctx); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Failed to set context, error_code is: 0x%X.", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } - ret = TransVarData(node, *trans_road, session_id); + uint32_t allocated_graph_id = 0; + Status ret = VarManager::Instance(session_id)->GetAllocatedGraphId(node->GetName(), allocated_graph_id); if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "TransVarData failed, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); + GELOGE(INTERNAL_ERROR, "var has not been allocated, node:%s, graph_id:%u.", node->GetName().c_str(), + graph_id); return INTERNAL_ERROR; } - VarManager::Instance(session_id)->RemoveChangedGraphId(node->GetName()); - } - return SUCCESS; - }, - node, session_id, context, graph_id); + uint32_t changed_graph_id = 0; + ret = VarManager::Instance(session_id)->GetChangedGraphId(node->GetName(), changed_graph_id); + bool call_trans_var = + (ret == SUCCESS && changed_graph_id == graph_id && changed_graph_id != allocated_graph_id); + if (call_trans_var) { + GELOGI("VarManager::GetChangedGraphId() success, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); + VarTransRoad *trans_road = VarManager::Instance(session_id)->GetTransRoad(node->GetName()); + if (trans_road == nullptr) { + GELOGI("The variable %s does not have any trans road", node->GetName().c_str()); + return SUCCESS; + } + ret = TransVarData(node, *trans_road, session_id); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "TransVarData failed, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); + return INTERNAL_ERROR; + } + VarManager::Instance(session_id)->RemoveChangedGraphId(node->GetName()); + } + return SUCCESS; + }, + node, session_id, context, graph_id); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); return FAILED; @@ -507,7 +514,7 @@ Status TransVarDataUtils::CopyVarData(const ComputeGraphPtr &compute_graph, uint GE_IF_BOOL_EXEC(ge::AttrUtils::GetStr(node->GetOpDesc(), "_copy_from_var_node", cp_from_node), GELOGI("Get original type of cp_from_node")); if (cp_from_node.length() != 0) { - (void)ge::AttrUtils::GetBool(node->GetOpDesc(), "_copy_value", copy_value); // no need to check value + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), "_copy_value", copy_value); // no need to check value if (!copy_value) { auto src_node = compute_graph->FindNode(cp_from_node); GE_CHECK_NOTNULL(src_node); @@ -516,7 +523,7 @@ Status TransVarDataUtils::CopyVarData(const ComputeGraphPtr &compute_graph, uint auto ret = CopyTensorFromSrcVarNode(src_node, node, session_id, device_id); GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(FAILED, "copy tensor failed!"); return FAILED); // only copy once - (void)ge::AttrUtils::SetBool(node->GetOpDesc(), "_copy_value", true); // no need to check value + (void) ge::AttrUtils::SetBool(node->GetOpDesc(), "_copy_value", true); // no need to check value } } } diff --git a/ge/graph/manager/trans_var_data_utils.h b/ge/graph/manager/trans_var_data_utils.h old mode 100644 new mode 100755 index efdfa51f..95ebd09a --- a/ge/graph/manager/trans_var_data_utils.h +++ b/ge/graph/manager/trans_var_data_utils.h @@ -34,8 +34,11 @@ class TransVarDataUtils { static ge::Status SyncBroadCastData2Var(uint8_t *src_addr, int64_t src_addr_size, const string &var_name, const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id_); - static ge::Status TransAllVarData(const std::vector &variable_nodes, uint64_t session_id, - rtContext_t context, uint32_t graph_id, uint32_t thread_num = 16); + static ge::Status TransAllVarData(const std::vector &variable_nodes, + uint64_t session_id, + rtContext_t context, + uint32_t graph_id, + uint32_t thread_num = 16); static ge::Status CopyVarData(const ComputeGraphPtr &compute_graph, uint64_t session_id, uint32_t device_id); diff --git a/ge/graph/manager/util/debug.h b/ge/graph/manager/util/debug.h old mode 100644 new mode 100755 diff --git a/ge/graph/manager/util/hcom_util.cc b/ge/graph/manager/util/hcom_util.cc index 614f8527..d865b40e 100644 --- a/ge/graph/manager/util/hcom_util.cc +++ b/ge/graph/manager/util/hcom_util.cc @@ -108,19 +108,19 @@ Status HcomOmeUtil::GetHcomCount(const ge::ConstOpDescPtr &op_desc, HcclDataType } GE_IF_BOOL_EXEC( - op_desc->GetType() == HCOMREDUCESCATTER, int32_t rank_size = 0; - GE_CHK_BOOL_RET_STATUS(ge::AttrUtils::GetInt(op_desc, HCOM_ATTR_RANK_SIZE, rank_size), PARAM_INVALID, - "get HCOM_ATTR_RANK_SIZE failed"); - GE_CHK_BOOL_RET_STATUS(rank_size != 0, PARAM_INVALID, "rank size is zero"); - int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); GE_CHK_STATUS_RET( - ge::CheckInt64Uint32MulOverflow(shape_size, size), "Product of shape size and size beyond INT64_MAX"); - block_size = (shape_size * size) / rank_size; - GE_CHK_STATUS_RET(ge::CheckInt64AddOverflow(total_size, block_size), "Total size is beyond the INT64_MAX"); - total_size = total_size + block_size; continue;); + op_desc->GetType() == HCOMREDUCESCATTER, int32_t rank_size = 0; + GE_CHK_BOOL_RET_STATUS(ge::AttrUtils::GetInt(op_desc, HCOM_ATTR_RANK_SIZE, rank_size), PARAM_INVALID, + "get HCOM_ATTR_RANK_SIZE failed"); + GE_CHK_BOOL_RET_STATUS(rank_size != 0, PARAM_INVALID, "rank size is zero"); + int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); GE_CHK_STATUS_RET( + ge::CheckInt64Uint32MulOverflow(shape_size, size), "Product of shape size and size beyond INT64_MAX"); + block_size = (shape_size * size) / rank_size; + GE_CHK_STATUS_RET(ge::CheckInt64AddOverflow(total_size, block_size), "Total size is beyond the INT64_MAX"); + total_size = total_size + block_size; continue;); int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); - GELOGD("hcom util node %s inputsize %ld, shapesize %ld, datasize %d.", op_desc->GetName().c_str(), input_size, - shape_size, size); + GELOGD("hcom util node %s inputsize %ld, shapesize %ld, datasize %d.", + op_desc->GetName().c_str(), input_size, shape_size, size); GE_CHK_STATUS_RET(ge::CheckInt64Int32MulOverflow(shape_size, size), "Product of shape size and size beyond INT64_MAX"); GE_IF_BOOL_EXEC(is_allgather, block_size = shape_size * size;); diff --git a/ge/graph/manager/util/hcom_util.h b/ge/graph/manager/util/hcom_util.h index 064058f8..448ed611 100644 --- a/ge/graph/manager/util/hcom_util.h +++ b/ge/graph/manager/util/hcom_util.h @@ -35,23 +35,23 @@ using std::string; using std::vector; static std::map kConstOpHcclDataType = { - {ge::DT_FLOAT, HCCL_DATA_TYPE_FP32}, - {ge::DT_FLOAT16, HCCL_DATA_TYPE_FP16}, - {ge::DT_INT8, HCCL_DATA_TYPE_INT8}, - {ge::DT_INT32, HCCL_DATA_TYPE_INT32}, + {ge::DT_FLOAT, HCCL_DATA_TYPE_FP32}, + {ge::DT_FLOAT16, HCCL_DATA_TYPE_FP16}, + {ge::DT_INT8, HCCL_DATA_TYPE_INT8}, + {ge::DT_INT32, HCCL_DATA_TYPE_INT32}, }; static std::map kConstOpHcclDataTypeSize = { - {HCCL_DATA_TYPE_FP32, sizeof(float)}, - {HCCL_DATA_TYPE_FP16, sizeof(float) / 2}, - {HCCL_DATA_TYPE_INT8, sizeof(int8_t)}, - {HCCL_DATA_TYPE_INT32, sizeof(int32_t)}, + {HCCL_DATA_TYPE_FP32, sizeof(float)}, + {HCCL_DATA_TYPE_FP16, sizeof(float) / 2}, + {HCCL_DATA_TYPE_INT8, sizeof(int8_t)}, + {HCCL_DATA_TYPE_INT32, sizeof(int32_t)}, }; static std::map kHorovodRedOpToHcclRedOp = { - {HOROVOD_REDUCE_SUM, HCCL_REDUCE_SUM}, {HOROVOD_REDUCE_MIN, HCCL_REDUCE_MIN}, - {HOROVOD_REDUCE_MAX, HCCL_REDUCE_MAX}, {HOROVOD_REDUCE_PROD, HCCL_REDUCE_PROD}, - {HOROVOD_REDUCE_RESERVED, HCCL_REDUCE_RESERVED}, + {HOROVOD_REDUCE_SUM, HCCL_REDUCE_SUM}, {HOROVOD_REDUCE_MIN, HCCL_REDUCE_MIN}, + {HOROVOD_REDUCE_MAX, HCCL_REDUCE_MAX}, {HOROVOD_REDUCE_PROD, HCCL_REDUCE_PROD}, + {HOROVOD_REDUCE_RESERVED, HCCL_REDUCE_RESERVED}, }; class HcomOmeUtil { @@ -150,7 +150,8 @@ class HcomOmeUtil { /// @return SUCCESS /// @return FAIL /// - static Status GetHcomCount(const ge::ConstOpDescPtr &op_desc, HcclDataType data_type, bool is_allgather, int &count); + static Status GetHcomCount(const ge::ConstOpDescPtr &op_desc, HcclDataType data_type, bool is_allgather, + int &count); private: /// diff --git a/ge/graph/manager/util/rt_context_util.cc b/ge/graph/manager/util/rt_context_util.cc index 75b25740..a8aad574 100644 --- a/ge/graph/manager/util/rt_context_util.cc +++ b/ge/graph/manager/util/rt_context_util.cc @@ -20,7 +20,7 @@ namespace ge { namespace { -const int64_t kDefaultGraphId = -1; + const int64_t kDefaultGraphId = -1; } void RtContextUtil::AddRtContext(uint64_t session_id, rtContext_t context) { diff --git a/ge/graph/manager/util/rt_context_util.h b/ge/graph/manager/util/rt_context_util.h index 50f0fbed..5fbd52be 100644 --- a/ge/graph/manager/util/rt_context_util.h +++ b/ge/graph/manager/util/rt_context_util.h @@ -52,3 +52,4 @@ class RtContextUtil { } // namespace ge #endif // GE_GRAPH_MANAGER_UTIL_RT_CONTEXT_UTIL_H_ + diff --git a/ge/graph/manager/util/variable_accelerate_ctrl.cc b/ge/graph/manager/util/variable_accelerate_ctrl.cc index b62be02c..522b5ee3 100644 --- a/ge/graph/manager/util/variable_accelerate_ctrl.cc +++ b/ge/graph/manager/util/variable_accelerate_ctrl.cc @@ -25,7 +25,7 @@ namespace { inline bool IsVariable(const std::string &node_type) { return node_type == VARIABLE || node_type == VARIABLEV2 || node_type == VARHANDLEOP; } -} // namespace +} bool VarAccelerateCtrl::IsVarPermitToChangeFormats(const std::string &var_name) { auto iter = var_names_to_change_times_.find(var_name); @@ -39,10 +39,9 @@ void VarAccelerateCtrl::SetVarChanged(const std::string &var_name) { auto times = ++var_names_to_change_times_[var_name]; for (auto &graph_id_to_var_names : graph_ids_to_var_names_) { if (graph_id_to_var_names.second.count(var_name) > 0) { - GELOGI( - "The format of var %s has been changed, total changed times %d, " - "the graph %u contains which should be re-build before next run", - var_name.c_str(), times, graph_id_to_var_names.first); + GELOGI("The format of var %s has been changed, total changed times %d, " + "the graph %u contains which should be re-build before next run", + var_name.c_str(), times, graph_id_to_var_names.first); /// The graph being compiled right now is also added to the rebuild-list /// and can be deleted by calling `SetGraphBuildEnd` at the end of compilation. graph_ids_need_rebuild_.insert(graph_id_to_var_names.first); diff --git a/ge/graph/optimize/common/params.h b/ge/graph/optimize/common/params.h index ee2a735b..c174a4d1 100644 --- a/ge/graph/optimize/common/params.h +++ b/ge/graph/optimize/common/params.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc index 214f68eb..53695fba 100644 --- a/ge/graph/optimize/graph_optimize.cc +++ b/ge/graph/optimize/graph_optimize.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,7 +107,7 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { Status ret = (*iter)->OptimizeFusedGraphAfterGraphSlice(*(compute_graph)); if (ret != SUCCESS) { - GELOGE(ret, "[OptimizeSubGraph][OptimizeFusedGraphStage2]: graph optimize failed, ret:%d", ret); + GELOGE(ret, "[OptimizeSubGraph][OptimizeFusedGraphAfterGraphSlice]: graph optimize failed, ret:%d", ret); return ret; } } @@ -247,8 +247,8 @@ Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_gr graph_optimizer.size()); Status ret = SUCCESS; string exclude_core_Type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine; - GELOGI("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s", exclude_core_Type.c_str(), - core_type_.c_str()); + GELOGI("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s", + exclude_core_Type.c_str(), core_type_.c_str()); if (graph_optimizer.size() != 0) { for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { if (iter->first == exclude_core_Type || iter->second == nullptr) { @@ -320,20 +320,20 @@ Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) { auto input_desc = op_desc->GetInputDesc(name_index.second); input_desc.SetRefPortByIndex({name_index.second}); op_desc->UpdateInputDesc(name_index.second, input_desc); - GELOGI("SetRefPort: set op[%s] input desc[%u-%s] ref.", op_desc->GetName().c_str(), name_index.second, - name_index.first.c_str()); + GELOGI("SetRefPort: set op[%s] input desc[%u-%s] ref.", + op_desc->GetName().c_str(), name_index.second, name_index.first.c_str()); auto output_desc = op_desc->GetOutputDesc(static_cast(out_index)); output_desc.SetRefPortByIndex({name_index.second}); op_desc->UpdateOutputDesc(static_cast(out_index), output_desc); - GELOGI("SetRefPort: set op[%s] output desc[%u-%s] ref.", op_desc->GetName().c_str(), out_index, - name_index.first.c_str()); + GELOGI("SetRefPort: set op[%s] output desc[%u-%s] ref.", + op_desc->GetName().c_str(), out_index, name_index.first.c_str()); is_ref = true; } } if (is_ref) { AttrUtils::SetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); - GELOGI("param [node] %s is reference node, set attribute %s to be true.", node->GetName().c_str(), - ATTR_NAME_REFERENCE.c_str()); + GELOGI("param [node] %s is reference node, set attribute %s to be true.", + node->GetName().c_str(), ATTR_NAME_REFERENCE.c_str()); } } return SUCCESS; diff --git a/ge/graph/optimize/graph_optimize.h b/ge/graph/optimize/graph_optimize.h old mode 100644 new mode 100755 index 3d2db782..19bf1b4a --- a/ge/graph/optimize/graph_optimize.h +++ b/ge/graph/optimize/graph_optimize.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/optimize/mem_rw_conflict_optimize.cc b/ge/graph/optimize/mem_rw_conflict_optimize.cc index 9c166f4d..226ebbed 100644 --- a/ge/graph/optimize/mem_rw_conflict_optimize.cc +++ b/ge/graph/optimize/mem_rw_conflict_optimize.cc @@ -1,12 +1,11 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -87,8 +86,8 @@ OutputRWType GetSingleNodeOutputRWTypeByIndex(const Node &node, uint32_t index) } // check if it is ref switch std::string type; - if ((node.GetType() == FRAMEWORK_OP_TYPE) && AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type) && - (type == REFSWITCH)) { + if ((node.GetType() == FRAMEWORK_OP_TYPE) && AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type) + && (type == REFSWITCH)) { return OutputRWType::kWriteable; } @@ -218,8 +217,8 @@ InputRWType GetSingleNodeInputRWTypeByIndex(const Node &node, uint32_t index) { if (op_desc == nullptr) { return InputRWType::kInvalidRWType; } - if (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HCOMALLGATHER || - op_desc->GetType() == HCOMREDUCESCATTER) { + if (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HCOMALLGATHER + || op_desc->GetType() == HCOMREDUCESCATTER) { return InputRWType::kScopeWriteable; } // check if it is ref input @@ -231,8 +230,8 @@ InputRWType GetSingleNodeInputRWTypeByIndex(const Node &node, uint32_t index) { } // check if it is ref switch std::string type; - if ((node.GetType() == FRAMEWORK_OP_TYPE) && (AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type)) && - (type == REFSWITCH) && (index == 0)) { + if ((node.GetType() == FRAMEWORK_OP_TYPE) && (AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type)) + && (type == REFSWITCH) && (index == 0)) { return InputRWType::kWriteable; } @@ -491,8 +490,8 @@ Status SplitIdentityAlongAnchor(const OutDataAnchorPtr &out_data_anchor, const I if (input_rw_type == InputRWType::kScopeWriteable || input_rw_type == InputRWType::kWriteable) { auto new_identity = CreateIdentityAfterSrcNode(*pre_node, pre_out_data_anchor->GetIdx()); GE_CHECK_NOTNULL(new_identity); - if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS && - GraphUtils::AddEdge(new_identity->GetOutDataAnchor(kIdentityAnchorIndex), peer_in_data_anchor) != SUCCESS) { + if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS + || GraphUtils::AddEdge(new_identity->GetOutDataAnchor(kIdentityAnchorIndex), peer_in_data_anchor) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to insert Identity between node %s and %s", pre_out_data_anchor->GetOwnerNode()->GetName().c_str(), peer_in_data_anchor->GetOwnerNode()->GetName().c_str()); @@ -510,8 +509,8 @@ Status SplitIdentityAlongAnchor(const OutDataAnchorPtr &out_data_anchor, const I peer_in_data_anchor->GetOwnerNode()->GetName().c_str()); } else { // copy control edge to pre and peer node - if (GraphUtils::CopyInCtrlEdges(old_identity, peer_in_data_node) != SUCCESS || - GraphUtils::CopyOutCtrlEdges(old_identity, pre_node) != SUCCESS) { + if (GraphUtils::CopyInCtrlEdges(old_identity, peer_in_data_node) != SUCCESS + || GraphUtils::CopyOutCtrlEdges(old_identity, pre_node) != SUCCESS) { GELOGW("Fail to copy control edge from node %s.", old_identity->GetName().c_str()); return FAILED; } diff --git a/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc b/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc index be025730..34c3a0de 100644 --- a/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc +++ b/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/optimize/optimizer/allreduce_fusion_pass.h b/ge/graph/optimize/optimizer/allreduce_fusion_pass.h index 2701ba16..8b2168d9 100644 --- a/ge/graph/optimize/optimizer/allreduce_fusion_pass.h +++ b/ge/graph/optimize/optimizer/allreduce_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,10 +45,11 @@ class AllReducePass : public GraphPass { vector &peerInControlFromOutControlVec, ge::NodePtr &srcNodePtr); Status GetPeerOutDataToInData(std::unordered_set &anchorSet, - std::vector &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr); + std::vector &peerOutDataAnchorVec, + ge::NodePtr &srcNodePtr); Status GetPeerInAnchorToOutData(std::unordered_set &anchorSet, std::vector> &fusionOpPeerInDataAnchor, - std::vector> &fusionOpPeerInControlFromOutData, + std::vector>&fusionOpPeerInControlFromOutData, ge::NodePtr &srcNodePtr); }; } // namespace ge diff --git a/ge/graph/optimize/summary_optimize.cc b/ge/graph/optimize/summary_optimize.cc index a8325da3..077ab1b0 100644 --- a/ge/graph/optimize/summary_optimize.cc +++ b/ge/graph/optimize/summary_optimize.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,8 +80,8 @@ Status GraphOptimize::HandleSummaryOp(ComputeGraphPtr &compute_graph) { del_nodes.emplace_back(node_ptr); } } - GE_IF_BOOL_EXEC(!summary_output_indexes.empty(), - summary_output_indexes_.insert({compute_graph->GetGraphID(), summary_output_indexes})); + GE_IF_BOOL_EXEC(!summary_output_indexes.empty(), summary_output_indexes_.insert({compute_graph->GetGraphID(), + summary_output_indexes})); // add output nodes for summary std::vector> out_nodes_info; diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc old mode 100644 new mode 100755 index d1b00f12..87fac994 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -282,7 +282,7 @@ static std::string ToString(const std::vector &clusters) { ss << (*iter)->Id() << ")."; return ss.str(); } -} // namespace +} void DynamicShapePartitioner::MergeClustersUnknownShape() { // Merge unknown shape clusters @@ -354,19 +354,34 @@ Status DynamicShapePartitioner::MergeClusters() { return SUCCESS; } +bool DynamicShapePartitioner::JudgeUnknowShapeWithAttr(const OpDescPtr &opdesc) { + bool is_forced_unknown = false; + if (AttrUtils::GetBool(opdesc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_forced_unknown) && is_forced_unknown) { + GELOGD("Collect node %s as unknown as it was marked unknown forcibly.", opdesc->GetName().c_str()); + return true; + } + + bool forced_unknown = false; + if (AttrUtils::GetBool(opdesc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, forced_unknown) && forced_unknown) { + GELOGD("Collect node %s as unknown as it was marked force unknown node forcibly.", opdesc->GetName().c_str()); + return true; + } + return false; +} + Status DynamicShapePartitioner::CollectSpreadUnknownShapeNodes(NodePtr node) { if (unknown_shape_nodes_.count(node) > 0) { return SUCCESS; } auto opdesc = node->GetOpDesc(); + REQUIRE_NOT_NULL(opdesc, "Opdesc is nullptr."); // One can set 'ATTR_NAME_IS_UNKNOWN_SHAPE=true' on node so as to forcing the node flow into the unknown subgraph, // ignore the actual shape. - bool is_forced_unknown = false; - if (AttrUtils::GetBool(opdesc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_forced_unknown) && is_forced_unknown) { - GELOGD("Collect node %s as unknown as it was marked unknown forcibly.", node->GetName().c_str()); + if (JudgeUnknowShapeWithAttr(opdesc)) { unknown_shape_nodes_.insert(node); return SUCCESS; } + size_t anchor_index = 0; bool is_unknown = false; for (auto &out_tensor : opdesc->GetAllOutputsDesc()) { @@ -675,10 +690,10 @@ Status Cluster::BuildFrame() { auto src_cluster = partitioner_->node_2_cluster_[peer_out_control_anchor->GetOwnerNode()]; if (src_cluster->id_ != id_) { REQUIRE_GRAPH_SUCCESS( - GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor), - "Failed remove edge from node %s index %d to node %s index %d.", - peer_out_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(peer_out_control_anchor), - in_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(in_control_anchor)); + GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor), + "Failed remove edge from node %s index %d to node %s index %d.", + peer_out_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(peer_out_control_anchor), + in_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(in_control_anchor)); control_inputs_.insert(src_cluster); src_cluster->control_outputs_.insert(peer_out_control_anchor); } @@ -741,9 +756,9 @@ Status Cluster::BuildPartitionFrame() { auto src_cluster = partitioner_->node_2_cluster_[peer_out_control_anchor->GetOwnerNode()]; if (src_cluster->id_ != id_) { REQUIRE_GRAPH_SUCCESS( - GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor), - "Failed remove edge from %s:%d to %s:%d.", peer_out_control_anchor->GetOwnerNode()->GetName().c_str(), - peer_out_control_anchor->GetIdx(), node->GetName().c_str(), in_control_anchor->GetIdx()); + GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor), + "Failed remove edge from %s:%d to %s:%d.", peer_out_control_anchor->GetOwnerNode()->GetName().c_str(), + peer_out_control_anchor->GetIdx(), node->GetName().c_str(), in_control_anchor->GetIdx()); control_inputs_.insert(src_cluster); src_cluster->control_outputs_.insert(peer_out_control_anchor); } @@ -806,7 +821,7 @@ Status Cluster::BuildPartitionSubgraph() { int64_t parent_node_index = 0; for (auto anchor : inputs_) { auto data_op = - MakeShared(subgraph_->GetName() + std::string("Data_") + std::to_string(parent_node_index), ge::DATA); + MakeShared(subgraph_->GetName() + std::string("Data_") + std::to_string(parent_node_index), ge::DATA); REQUIRE_NOT_NULL(data_op, "Failed new memory for data op."); auto input_desc = anchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(anchor->GetIdx()); REQUIRE_GRAPH_SUCCESS(data_op->AddInputDesc(input_desc), "Failed add input desc."); diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index 06a94833..b0477ae8 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -145,6 +145,7 @@ class DynamicShapePartitioner { // Debug functions void DumpGraph(const std::string &suffix); std::string DebugString() const; + bool JudgeUnknowShapeWithAttr(const OpDescPtr &opdesc); // Util functions Status CollectSpreadUnknownShapeNodes(NodePtr node); Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow); diff --git a/ge/graph/partition/engine_place.cc b/ge/graph/partition/engine_place.cc old mode 100644 new mode 100755 index 80ac355f..cdf29e56 --- a/ge/graph/partition/engine_place.cc +++ b/ge/graph/partition/engine_place.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,13 +65,15 @@ Status EnginePlacer::Run() { std::string kernel_name; // Check if this node has assigned engine bool has_engine_attr = - AttrUtils::GetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, engine_name) && !engine_name.empty(); + AttrUtils::GetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, engine_name) && !engine_name.empty(); bool has_kernel_attr = - AttrUtils::GetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name) && !kernel_name.empty(); + AttrUtils::GetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name) && !kernel_name.empty(); bool use_exist_engine_name = !op_desc->GetOpKernelLibName().empty() || (has_kernel_attr && has_engine_attr); if (use_exist_engine_name) { if (op_desc->GetOpEngineName().empty()) { - GELOGI("Op %s set engine_name %s engine_name %s from attrs", op_desc->GetName().c_str(), engine_name.c_str(), + GELOGI("Op %s set engine_name %s engine_name %s from attrs", + op_desc->GetName().c_str(), + engine_name.c_str(), kernel_name.c_str()); op_desc->SetOpEngineName(engine_name); op_desc->SetOpKernelLibName(kernel_name); @@ -83,8 +85,8 @@ Status EnginePlacer::Run() { // If can't get op's engine name, keep check support finish and return failed if (engine_name.empty()) { is_check_support_success = false; - ErrorManager::GetInstance().ATCReportErrMessage("E13003", {"opname", "optype"}, - {op_desc->GetName(), op_desc->GetType()}); + ErrorManager::GetInstance().ATCReportErrMessage( + "E13003", {"opname", "optype"}, {op_desc->GetName(), op_desc->GetType()}); GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Can not find engine of op type %s", node_ptr->GetOpDesc()->GetType().c_str()); continue; @@ -119,3 +121,4 @@ Status EnginePlacer::AssignEngineAndLog(ge::ConstNodePtr node_ptr, const std::st return SUCCESS; } } // namespace ge + diff --git a/ge/graph/partition/engine_place.h b/ge/graph/partition/engine_place.h old mode 100644 new mode 100755 index 1672df0d..5dc3e6a0 --- a/ge/graph/partition/engine_place.h +++ b/ge/graph/partition/engine_place.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/partition/graph_partition.cc b/ge/graph/partition/graph_partition.cc old mode 100644 new mode 100755 index b280074e..07ab4198 --- a/ge/graph/partition/graph_partition.cc +++ b/ge/graph/partition/graph_partition.cc @@ -15,11 +15,14 @@ */ #include "graph/partition/graph_partition.h" + #include #include #include #include #include + +#include "analyzer/analyzer.h" #include "common/ge/ge_util.h" #include "common/op/ge_op_utils.h" #include "framework/common/types.h" @@ -149,18 +152,22 @@ Status ge::GraphPartitioner::RemoveNodeAndEdgeBetweenEndPld(ge::ComputeGraphPtr Status ge::GraphPartitioner::MergeAfterSubGraphOptimization(ge::ComputeGraphPtr &output_merged_compute_graph, const ge::ComputeGraphPtr &original_compute_graph) { + Status real_ret = SUCCESS; auto ret = MergeSubGraph(output_merged_compute_graph, original_compute_graph); if (ret != SUCCESS) { + // even though failed, ensure all op do finish check support + real_ret = FAILED; GELOGE(ret, "Graph merging Failed"); - return ret; } + GE_CHECK_NOTNULL(original_compute_graph); // partition sub graph for (const auto &sub_graph : original_compute_graph->GetAllSubgraphs()) { ComputeGraphPtr merged_sub_graph = nullptr; ret = MergeSubGraph(merged_sub_graph, sub_graph); if (ret != SUCCESS) { + real_ret = FAILED; GELOGE(ret, "Sub graph merging Failed"); - return ret; + continue; } // add sub graph output_merged_compute_graph->SetName(original_compute_graph->GetName()); @@ -182,18 +189,22 @@ Status ge::GraphPartitioner::MergeAfterSubGraphOptimization(ge::ComputeGraphPtr GELOGE(FAILED, "Find corresponding node failed, parent node name is %s", parent_node->GetName().c_str()); return FAILED;) auto corresponding_node = graph_info.corresponding_node_in_partitions_[parent_node]; - GE_IF_BOOL_EXEC(corresponding_node == nullptr, - GELOGE(FAILED, "Get null node, node name is %s", parent_node->GetName().c_str()); - return FAILED;); + GE_IF_BOOL_EXEC(corresponding_node == nullptr, GELOGE(FAILED, "Get null node, node name is %s", + parent_node->GetName().c_str()); return FAILED;); merged_sub_graph->SetParentNode(corresponding_node); auto subgraph_parent_graph = corresponding_node->GetOwnerComputeGraph(); merged_sub_graph->SetParentGraph(subgraph_parent_graph); ret = output_merged_compute_graph->AddSubgraph(sub_graph->GetName(), merged_sub_graph); GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, return ret;) } - graph_2_graph_partition_info_.clear(); - graph_2_subgraph_list_.clear(); - return SUCCESS; + ClearAllPartitionData(); + if (real_ret != SUCCESS) { + auto root_graph = ge::GraphUtils::FindRootGraph(original_compute_graph); + GE_CHECK_NOTNULL(root_graph); + (void)Analyzer::GetInstance()->SaveAnalyzerDataToFile(root_graph->GetSessionID(), + root_graph->GetGraphID()); + } + return real_ret; } Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_compute_graph, @@ -264,10 +275,10 @@ Status ge::GraphPartitioner::UpdatePldOpDesc(const NodePtr &dst_node, int input_ } const auto &input_desc = dst_node->GetOpDesc()->GetInputDesc(static_cast(input_index)); GE_IF_BOOL_EXEC(pld_op_desc->AddOutputDesc(input_desc) != GRAPH_SUCCESS, GELOGE(FAILED, "AddOutputDesc failed"); - return FAILED;) + return FAILED;) if (pld_op_desc->MutableOutputDesc(0) != nullptr) { ge::TensorUtils::SetRealDimCnt(*(pld_op_desc->MutableOutputDesc(0).get()), - static_cast(input_desc.GetShape().GetDims().size())); + static_cast(input_desc.GetShape().GetDims().size())); } else { GELOGE(GE_GRAPH_ADD_PLC_END_FAILED, "[GraphPartitioner]: pld_op_desc is null."); return FAILED; @@ -282,10 +293,10 @@ Status ge::GraphPartitioner::UpdateEndOpDesc(const NodePtr &src_node, int output } const auto &output_desc = src_node->GetOpDesc()->GetOutputDesc(static_cast(output_index)); GE_IF_BOOL_EXEC(end_op_desc->AddInputDesc(output_desc) != GRAPH_SUCCESS, GELOGE(FAILED, "AddInputDesc failed"); - return FAILED;) + return FAILED;) if (end_op_desc->MutableInputDesc(0) != nullptr) { ge::TensorUtils::SetRealDimCnt(*(end_op_desc->MutableInputDesc(0).get()), - static_cast(output_desc.GetShape().GetDims().size())); + static_cast(output_desc.GetShape().GetDims().size())); } else { GELOGE(GE_GRAPH_ADD_PLC_END_FAILED, "[GraphPartitioner]: pld_op_desc is null."); return FAILED; @@ -314,12 +325,12 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr GELOGW("SetInt peerIndex failed");) GE_IF_BOOL_EXEC(!AttrUtils::SetStr(end_op_desc, "parentOpType", dst_node->GetType()), GELOGW("SetStr parentOpType failed");) - GE_IF_BOOL_EXEC(!end_op_desc->SetExtAttr("parentNode", dst_node), GELOGW("SetEndExtAttr parentNode failed");) + GE_IF_BOOL_EXEC(!end_op_desc->SetExtAttr("parentNode", dst_node), + GELOGW("SetEndExtAttr parentNode failed");) OpDescPtr dst_node_op_desc = dst_node->GetOpDesc(); GE_CHECK_NOTNULL(dst_node_op_desc); - GE_IF_BOOL_EXEC( - !AttrUtils::SetStr(end_op_desc, ATTR_NAME_END_REAR_NODE_ENGINE_NAME, dst_node_op_desc->GetOpEngineName()), - GELOGW("SetStr rearNodeEngineName failed");) + GE_IF_BOOL_EXEC(!AttrUtils::SetStr(end_op_desc, ATTR_NAME_END_REAR_NODE_ENGINE_NAME, + dst_node_op_desc->GetOpEngineName()), GELOGW("SetStr rearNodeEngineName failed");) // replace input_desc of end with owner node's desc int output_index = ge::AnchorUtils::GetIdx(out_anchor); bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning); @@ -372,13 +383,13 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr GELOGW("SetStr parentId failed");) GE_IF_BOOL_EXEC(!AttrUtils::SetInt(pld_op_desc, "anchorIndex", AnchorUtils::GetIdx(out_anchor)), GELOGW("SetInt anchorIndex failed");) - GE_IF_BOOL_EXEC(!pld_op_desc->SetExtAttr("parentNode", src_node), GELOGW("SetPldExtAttr parentNode failed");) + GE_IF_BOOL_EXEC(!pld_op_desc->SetExtAttr("parentNode", src_node), + GELOGW("SetPldExtAttr parentNode failed");) OpDescPtr src_node_op_desc = src_node->GetOpDesc(); GE_CHECK_NOTNULL(src_node_op_desc); - GE_IF_BOOL_EXEC( - !AttrUtils::SetStr(pld_op_desc, ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME, src_node_op_desc->GetOpEngineName()), - GELOGW("SetStr frontNodeEngineName failed");) + GE_IF_BOOL_EXEC(!AttrUtils::SetStr(pld_op_desc, ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME, + src_node_op_desc->GetOpEngineName()), GELOGW("SetStr frontNodeEngineName failed");) // do not care over flow graph_info_.num_of_pld_end_++; // replace output_desc of pld with input node's output desc @@ -585,30 +596,32 @@ Status ge::GraphPartitioner::AddPartitionsToGraphNode(vectorSetParentNode(compute_graph->GetParentNode()); - (void)AttrUtils::SetStr(*sub_graph, ATTR_NAME_PARENT_GRAPH_NAME, compute_graph->GetName()); - auto sgi = MakeShared(); - if (sgi == nullptr) { - GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MakeShared sub graph info failed."); - return FAILED; - } - // set engine name - sgi->SetEngineName(engine_name); - // set stream label - string sub_graph_stream; - if (AttrUtils::GetStr(sub_graph->GetDirectNode().at(0)->GetOpDesc(), ATTR_NAME_STREAM_LABEL, sub_graph_stream)) { - sgi->SetStreamLabel(sub_graph_stream); - } - /// for now inputFlag is the same before and after partition. It should - /// be changed according to the real partition - std::vector sub_graph_input(graph_info_.input_size_, true); - std::vector sub_graph_output(graph_info_.output_size_, true); - sgi->SetSubGraph(sub_graph); - sgi->SetOutputFlag(sub_graph_output); - sgi->SetInputFlag(sub_graph_input); - sgi->SetOutputContext(graph_info_.output_name_); - AddEndPldInformationToSubGraphInfo(sgi); - GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", engine_name.c_str(), - sub_graph->GetName().c_str(), sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str()); + (void) AttrUtils::SetStr(*sub_graph, ATTR_NAME_PARENT_GRAPH_NAME, compute_graph->GetName()); + auto sgi = MakeShared(); + if (sgi == nullptr) { + GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MakeShared sub graph info failed."); + return FAILED; + } + // set engine name + sgi->SetEngineName(engine_name); + // set stream label + string sub_graph_stream; + if (AttrUtils::GetStr(sub_graph->GetDirectNode().at(0)->GetOpDesc(), ATTR_NAME_STREAM_LABEL, sub_graph_stream)) { + sgi->SetStreamLabel(sub_graph_stream); + } + /// for now inputFlag is the same before and after partition. It should + /// be changed according to the real partition + std::vector sub_graph_input(graph_info_.input_size_, true); + std::vector sub_graph_output(graph_info_.output_size_, true); + sgi->SetSubGraph(sub_graph); + sgi->SetOutputFlag(sub_graph_output); + sgi->SetInputFlag(sub_graph_input); + sgi->SetOutputContext(graph_info_.output_name_); + AddEndPldInformationToSubGraphInfo(sgi); + GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", + engine_name.c_str(), + sub_graph->GetName().c_str(), + sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str()); if (engine_name != input_subgraph_name) { // do not add Data subGraph into SubGraphInfo output_subgraphs.push_back(sgi); } else { @@ -834,22 +847,29 @@ bool ge::GraphPartitioner::HasSecondPath(size_t src, size_t dst, size_t upper_bo } Status ge::GraphPartitioner::Partition(ge::ComputeGraphPtr compute_graph, Mode mode) { - graph_2_graph_partition_info_.clear(); - graph_2_subgraph_list_.clear(); + ClearAllPartitionData(); + auto real_ret = SUCCESS; auto ret = PartitionSubGraph(compute_graph, mode); if (ret != SUCCESS) { GELOGE(ret, "Sub graph partition Failed"); - return ret; + real_ret = ret; } + GE_CHECK_NOTNULL(compute_graph); // partition sub graph for (const auto &sub_graph : compute_graph->GetAllSubgraphs()) { ret = PartitionSubGraph(sub_graph, mode); if (ret != SUCCESS) { GELOGE(ret, "Sub graph partition Failed"); - return ret; + real_ret = ret; } } - return SUCCESS; + if (real_ret != SUCCESS) { + auto root_graph = ge::GraphUtils::FindRootGraph(compute_graph); + GE_CHECK_NOTNULL(root_graph); + (void)Analyzer::GetInstance()->SaveAnalyzerDataToFile(root_graph->GetSessionID(), + root_graph->GetGraphID()); + } + return real_ret; } Status ge::GraphPartitioner::PartitionSubGraph(ge::ComputeGraphPtr compute_graph, Mode mode) { @@ -1037,4 +1057,12 @@ void ge::GraphPartitioner::AddEndPldInformationToSubGraphInfo(ge::SubGraphInfoPt } const Graph2SubGraphInfoList &ge::GraphPartitioner::GetSubGraphMap() { return graph_2_subgraph_list_; } + +void ge::GraphPartitioner::ClearAllPartitionData() { + graph_2_graph_partition_info_.clear(); + graph_2_subgraph_list_.clear(); + graph_2_input_subgraph_.clear(); + GELOGD("Clear all partition data success."); + return; +} } // namespace ge diff --git a/ge/graph/partition/graph_partition.h b/ge/graph/partition/graph_partition.h index a363bd9d..703a1570 100644 --- a/ge/graph/partition/graph_partition.h +++ b/ge/graph/partition/graph_partition.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -131,7 +131,7 @@ class GraphPartitioner { Status UpdatePldOpDesc(const NodePtr &dst_node, int input_index, OpDescPtr &end_op_desc); // Clear partition data - void ClearAllPartitionData(Mode mode); + void ClearAllPartitionData(); void SetMergedGraphId(ComputeGraphPtr &output_merged_compute_graph); struct GraphPartitionInfo { diff --git a/ge/graph/passes/addn_pass.cc b/ge/graph/passes/addn_pass.cc index c0592965..c8f820fc 100644 --- a/ge/graph/passes/addn_pass.cc +++ b/ge/graph/passes/addn_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/addn_pass.h b/ge/graph/passes/addn_pass.h index dd44e3cd..373d1842 100644 --- a/ge/graph/passes/addn_pass.h +++ b/ge/graph/passes/addn_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/aicpu_constant_folding_pass.cc b/ge/graph/passes/aicpu_constant_folding_pass.cc index 4157b5d6..ddc31079 100644 --- a/ge/graph/passes/aicpu_constant_folding_pass.cc +++ b/ge/graph/passes/aicpu_constant_folding_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ #include "init/gelib.h" namespace { -const char *const kKernelLibName = "aicpu_kernel"; +const char *const kKernelLibName = "aicpu_tf_kernel"; const char *const kNotSupported = "0"; const uint64_t kReleaseFlag = 1; const uint64_t kOpsFlag = 1; diff --git a/ge/graph/passes/aicpu_constant_folding_pass.h b/ge/graph/passes/aicpu_constant_folding_pass.h old mode 100644 new mode 100755 index 02babd8e..d584c392 --- a/ge/graph/passes/aicpu_constant_folding_pass.h +++ b/ge/graph/passes/aicpu_constant_folding_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/assert_pass.cc b/ge/graph/passes/assert_pass.cc index 725016a9..79f75f53 100644 --- a/ge/graph/passes/assert_pass.cc +++ b/ge/graph/passes/assert_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/assert_pass.h b/ge/graph/passes/assert_pass.h old mode 100644 new mode 100755 index 79955348..7d8546f2 --- a/ge/graph/passes/assert_pass.h +++ b/ge/graph/passes/assert_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_ASSERT_PASS_H_ #define GE_GRAPH_PASSES_ASSERT_PASS_H_ @@ -33,7 +34,7 @@ class AssertPass : public BaseNodePass { /// @param nodes_unused nodes to be deleted /// @return void /// - void CollectUnusedNode(const NodePtr& assert_node, std::vector& nodes_unused); + void CollectUnusedNode(const NodePtr &assert_node, std::vector& nodes_unused); /// /// remove unused nodes from graph diff --git a/ge/graph/passes/assign_pass.cc b/ge/graph/passes/assign_pass.cc index fe287f90..bb7a0f04 100644 --- a/ge/graph/passes/assign_pass.cc +++ b/ge/graph/passes/assign_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ namespace { const uint32_t kValidInputNodeOutputNum = 1; const int32_t kAssignRefInputIndex = 0; const int32_t kAssignValueInputIndex = 1; -} // namespace +} namespace ge { Status AssignPass::Run(NodePtr &node) { diff --git a/ge/graph/passes/assign_pass.h b/ge/graph/passes/assign_pass.h index d7dc5138..11cf1073 100644 --- a/ge/graph/passes/assign_pass.h +++ b/ge/graph/passes/assign_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc old mode 100644 new mode 100755 index 2c7fb9bb..690dee27 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,8 +50,8 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { return SUCCESS; } - bool is_known_graph = graph->GetGraphUnknownFlag(); - if (is_known_graph) { + bool is_unknown_graph = graph->GetGraphUnknownFlag(); + if (is_unknown_graph) { GELOGD("Graph[%s] is unknown graph. It will call fe interface to compile op.", graph->GetName().c_str()); GE_CHK_STATUS_RET(CompileUnknownGraphOp(atomic_node_vec)); return SUCCESS; @@ -196,7 +196,7 @@ NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) { GELOGW("Get graph session_graph_id attr failed."); } if (!session_graph_id.empty()) { - (void)AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); + (void) AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); } string node_name = op_desc->GetName(); // Only flush subgraph name @@ -214,18 +214,20 @@ NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) { Status AtomicAddrCleanPass::LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node) { GE_IF_BOOL_EXEC(atomic_node == nullptr || atomic_clean_node == nullptr, - DOMI_LOGE("param [atomic_node][atomic_clean_node] must not be null."); - return PARAM_INVALID); + DOMI_LOGE("param [atomic_node][atomic_clean_node] must not be null."); return PARAM_INVALID); InControlAnchorPtr in_ctrl_anchor = atomic_node->GetInControlAnchor(); OutControlAnchorPtr out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor(); if (in_ctrl_anchor == nullptr || out_ctrl_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, "Get control anchor faild, dst node: %s.", atomic_node->GetName().c_str()); + GELOGE(INTERNAL_ERROR, + "Get control anchor faild, dst node: %s.", + atomic_node->GetName().c_str()); return INTERNAL_ERROR; } graphStatus status = GraphUtils::AddEdge(out_ctrl_anchor, in_ctrl_anchor); if (status != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Graph add cleanAddrNode op out ctrl edge fail, dst node: %s.", + GELOGE(INTERNAL_ERROR, + "Graph add cleanAddrNode op out ctrl edge fail, dst node: %s.", atomic_node->GetName().c_str()); return INTERNAL_ERROR; } @@ -307,7 +309,7 @@ Status AtomicAddrCleanPass::CompileUnknownGraphOp(const vector &atomic_ return ge::GE_CLI_GE_NOT_INITIALIZED; } - for (auto &atomic_node : atomic_node_vec) { + for (auto &atomic_node: atomic_node_vec) { auto op_desc = atomic_node->GetOpDesc(); if (op_desc == nullptr) { GELOGW("op desc is nullptr."); diff --git a/ge/graph/passes/atomic_addr_clean_pass.h b/ge/graph/passes/atomic_addr_clean_pass.h old mode 100644 new mode 100755 index e22c1792..ad60b7b5 --- a/ge/graph/passes/atomic_addr_clean_pass.h +++ b/ge/graph/passes/atomic_addr_clean_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,10 +41,10 @@ class AtomicAddrCleanPass : public GraphPass { private: /** - * HandleLoopGraph - * @param graph - * @return - */ + * HandleLoopGraph + * @param graph + * @return + */ Status HandleLoopGraph(ComputeGraphPtr &graph, const vector &atomic_node_vec); /** * HandleNormalGraph @@ -84,6 +84,7 @@ class AtomicAddrCleanPass : public GraphPass { Status HandleDispersedAtomicNodes(ComputeGraphPtr &graph, const std::vector &atomic_node_vec, std::vector &common_atomic_nodes); + vector hcom_node_vec_; bool is_loop_graph_ = false; }; diff --git a/ge/graph/passes/attach_stream_label_pass.cc b/ge/graph/passes/attach_stream_label_pass.cc index b8065325..06c32e7d 100644 --- a/ge/graph/passes/attach_stream_label_pass.cc +++ b/ge/graph/passes/attach_stream_label_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -283,8 +283,8 @@ Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_ } std::string out_type = out_desc->GetType(); bool need_skip = - out_desc->HasAttr(ATTR_NAME_STREAM_LABEL) || (out_type == ENTER) || (out_type == REFENTER) || - (((cur_node->GetType() == ENTER) || (cur_node->GetType() == REFENTER)) && (out_type == STREAMACTIVE)); + out_desc->HasAttr(ATTR_NAME_STREAM_LABEL) || (out_type == ENTER) || (out_type == REFENTER) || + (((cur_node->GetType() == ENTER) || (cur_node->GetType() == REFENTER)) && (out_type == STREAMACTIVE)); if (need_skip) { continue; } diff --git a/ge/graph/passes/attach_stream_label_pass.h b/ge/graph/passes/attach_stream_label_pass.h old mode 100644 new mode 100755 index 5820480d..d228134f --- a/ge/graph/passes/attach_stream_label_pass.h +++ b/ge/graph/passes/attach_stream_label_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/base_pass.cc b/ge/graph/passes/base_pass.cc old mode 100644 new mode 100755 index 4da51ab0..8c808e46 --- a/ge/graph/passes/base_pass.cc +++ b/ge/graph/passes/base_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -134,7 +134,8 @@ Status BaseNodePass::IsolateAndDeleteNode(NodePtr &node, const std::vector GELOGE(FAILED, "parameter is null."); return FAILED; } - GELOGI("Prepare to isolate and delete node, name:%s, type:%s.", node->GetName().c_str(), node->GetType().c_str()); + GELOGI("Prepare to isolate and delete node, name:%s, type:%s.", node->GetName().c_str(), + node->GetType().c_str()); ComputeGraphPtr graph = node->GetOwnerComputeGraph(); if (graph == nullptr) { GELOGE(FAILED, "[%s] The owner graph must not be null.", node->GetName().c_str()); @@ -169,9 +170,9 @@ Status GEPass::Run(const NamesToPass &names_to_passes) { if (depth_ > kMaxRecursiveDepth) { GELOGE(PARAM_INVALID, - "The pass for root graph %s will be terminated because too many nesting" - " levels(%d) of subgraphs, last subgraph is %s", - root_graph_->GetName().c_str(), depth_, graph_->GetName().c_str()); + "The pass for root graph %s will be terminated because too many nesting" + " levels(%d) of subgraphs, last subgraph is %s", + root_graph_->GetName().c_str(), depth_, graph_->GetName().c_str()); return PARAM_INVALID; } @@ -211,8 +212,8 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { auto ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); if (ret != SUCCESS) { - GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", node->GetName().c_str(), - node->GetType().c_str(), ret); + GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", + node->GetName().c_str(), node->GetType().c_str(), ret); return ret; } @@ -228,8 +229,8 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { SetFlagOption(kOptimizeAfterSubGraph, names_to_passes); ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); if (ret != SUCCESS) { - GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", node->GetName().c_str(), - node->GetType().c_str(), ret); + GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", + node->GetName().c_str(), node->GetType().c_str(), ret); return ret; } @@ -262,8 +263,8 @@ Status GEPass::RunPassesOnSubGraph(const NodePtr &node, const NamesToPass &names for (const auto &name : sub_graph_names) { auto graph = root_graph_->GetSubgraph(name); if (graph == nullptr) { - GELOGW("Can not find the sub graph %s from node %s, the pass-process will skip it", name.c_str(), - node->GetName().c_str()); + GELOGW("Can not find the sub graph %s from node %s, the pass-process will skip it", + name.c_str(), node->GetName().c_str()); continue; } has_sub_graph = true; diff --git a/ge/graph/passes/base_pass.h b/ge/graph/passes/base_pass.h index 6e7b292e..bb41691d 100644 --- a/ge/graph/passes/base_pass.h +++ b/ge/graph/passes/base_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/bitcast_pass.cc b/ge/graph/passes/bitcast_pass.cc index e8e1f84f..8388b21a 100644 --- a/ge/graph/passes/bitcast_pass.cc +++ b/ge/graph/passes/bitcast_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,12 +56,14 @@ Status BitcastPass::Run(NodePtr &node) { } Status BitcastPass::CheckDstDataType(const OpDescPtr op_desc, ge::DataType &dst_data_type) { + if (!ge::AttrUtils::GetDataType(op_desc, kAttrNameType, dst_data_type)) { GELOGE(PARAM_INVALID, "Node failed to get attribute type."); return PARAM_INVALID; } if (dst_data_type >= ge::DT_UNDEFINED) { - GELOGE(PARAM_INVALID, "dst_data_type[%s] is not valid.", TypeUtils::DataTypeToSerialString(dst_data_type).c_str()); + GELOGE(PARAM_INVALID, "dst_data_type[%s] is not valid.", + TypeUtils::DataTypeToSerialString(dst_data_type).c_str()); return PARAM_INVALID; } @@ -89,7 +91,8 @@ Status BitcastPass::CheckOutputShape(const OpDescPtr op_desc, const ge::DataType // get origin data_type and shape ge::DataType ori_data_type = input_tensor_desc->GetDataType(); if (ori_data_type >= ge::DT_UNDEFINED) { - GELOGE(PARAM_INVALID, "ori_data_type[%s] is not valid.", TypeUtils::DataTypeToSerialString(ori_data_type).c_str()); + GELOGE(PARAM_INVALID, "ori_data_type[%s] is not valid.", + TypeUtils::DataTypeToSerialString(ori_data_type).c_str()); return PARAM_INVALID; } @@ -145,4 +148,4 @@ Status BitcastPass::CalcAndUpdateShape(BitcastPass::kVecInt64 &dim_vec, ge::Data return SUCCESS; } -} // namespace ge +} // namespace ge diff --git a/ge/graph/passes/bitcast_pass.h b/ge/graph/passes/bitcast_pass.h index 4a9e2e1b..34acaf57 100644 --- a/ge/graph/passes/bitcast_pass.h +++ b/ge/graph/passes/bitcast_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,8 @@ class BitcastPass : public BaseNodePass { private: Status CheckDstDataType(const OpDescPtr op_desc, ge::DataType &dst_data_type); Status CheckOutputShape(const OpDescPtr op_desc, const ge::DataType dst_data_type); - Status CalcAndUpdateShape(BitcastPass::kVecInt64 &dim_vec, ge::DataType ori_data_type, ge::DataType dst_data_type); + Status CalcAndUpdateShape(BitcastPass::kVecInt64 &dim_vec, ge::DataType ori_data_type, + ge::DataType dst_data_type); }; } // namespace ge diff --git a/ge/graph/passes/cast_remove_pass.cc b/ge/graph/passes/cast_remove_pass.cc index ab4f2098..62c92866 100644 --- a/ge/graph/passes/cast_remove_pass.cc +++ b/ge/graph/passes/cast_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/cast_remove_pass.h b/ge/graph/passes/cast_remove_pass.h index 67fa697e..0ee52998 100644 --- a/ge/graph/passes/cast_remove_pass.h +++ b/ge/graph/passes/cast_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/cast_translate_pass.cc b/ge/graph/passes/cast_translate_pass.cc index ee67e93d..01b5c96b 100644 --- a/ge/graph/passes/cast_translate_pass.cc +++ b/ge/graph/passes/cast_translate_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -52,13 +52,15 @@ bool CastTranslatePass::CheckInAndOutDataAnchor(NodePtr &node) const { bool CastTranslatePass::IsCastNode(NodePtr &node) const { std::string original_type; - GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, GELOGW("get original type failed"); return false); + GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, + GELOGW("get original type failed"); return false); return (original_type == CAST); } bool CastTranslatePass::IsTranslateNode(NodePtr &node) const { std::string original_type; - GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, GELOGW("get original type failed"); return false); + GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, + GELOGW("get original type failed"); return false); return (original_type == TRANSLATE); } @@ -176,14 +178,13 @@ bool CastTranslatePass::IsOpSupportedOptimize(NodePtr &cast_node, NodePtr &trans } if (is_src_cast) { - GE_IF_BOOL_EXEC(!AttrUtils::SetInt(trans_op_desc, ATTR_NAME_INPUT_DATATYPE, static_cast(cast_in_datatype)), - GELOGW("set ATTR_NAME_INPUT_DATATYPE failed"); - return false); + GE_IF_BOOL_EXEC( + !AttrUtils::SetInt(trans_op_desc, ATTR_NAME_INPUT_DATATYPE, static_cast(cast_in_datatype)), + GELOGW("set ATTR_NAME_INPUT_DATATYPE failed"); return false); } else { GE_IF_BOOL_EXEC( - !AttrUtils::SetInt(trans_op_desc, ATTR_NAME_OUTPUT_DATATYPE, static_cast(cast_out_datatype)), - GELOGW("set ATTR_NAME_INPUT_DATATYPE failed"); - return false); + !AttrUtils::SetInt(trans_op_desc, ATTR_NAME_OUTPUT_DATATYPE, static_cast(cast_out_datatype)), + GELOGW("set ATTR_NAME_INPUT_DATATYPE failed"); return false); } GELOGI("CastTranslatePass, translate in %d out %d.", trans_op_indesc->GetDataType(), trans_op_outdesc->GetDataType()); return true; diff --git a/ge/graph/passes/cast_translate_pass.h b/ge/graph/passes/cast_translate_pass.h old mode 100644 new mode 100755 index a802fe9e..04c03d42 --- a/ge/graph/passes/cast_translate_pass.h +++ b/ge/graph/passes/cast_translate_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/common_subexpression_elimination_pass.cc b/ge/graph/passes/common_subexpression_elimination_pass.cc index 4415d144..a4662d5d 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.cc +++ b/ge/graph/passes/common_subexpression_elimination_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "common_subexpression_elimination_pass.h" #include @@ -35,7 +34,9 @@ std::string GetCseKey(const NodePtr &node) { if (src_anchor == nullptr) { ss << in_anchor->GetIdx() << "-null-"; } else { - ss << in_anchor->GetIdx() << "-" << src_anchor->GetOwnerNode()->GetName() << "-" << src_anchor->GetIdx() << "-"; + ss << in_anchor->GetIdx() << "-" + << src_anchor->GetOwnerNode()->GetName() << "-" + << src_anchor->GetIdx() << "-"; } } @@ -74,13 +75,13 @@ Status CommonSubexpressionEliminationPass::Run(ComputeGraphPtr graph) { bool is_unknown = false; auto ret = NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown); if (ret != GRAPH_SUCCESS) { - GELOGW("Get node unknown status failed, node name:%s, type:%s.", node->GetName().c_str(), - node->GetType().c_str()); + GELOGW("Get node unknown status failed, node name:%s, type:%s.", + node->GetName().c_str(), node->GetType().c_str()); continue; } if (is_unknown) { - GELOGI("Current node %s, type %s is unknown shape which should be skip.", node->GetName().c_str(), - node->GetType().c_str()); + GELOGI("Current node %s, type %s is unknown shape which should be skip.", + node->GetName().c_str(), node->GetType().c_str()); continue; } auto key = GetCseKey(node); @@ -93,7 +94,7 @@ Status CommonSubexpressionEliminationPass::Run(ComputeGraphPtr graph) { if (node->GetAllOutDataAnchorsSize() != iter->second->GetAllOutDataAnchorsSize()) { GELOGW("The node %s and %s have the same CSE key, but different output anchor count, skip to fusion them", - iter->second->GetName().c_str(), node->GetName().c_str()); + iter->second->GetName().c_str(), node->GetName().c_str()); continue; } @@ -104,8 +105,8 @@ Status CommonSubexpressionEliminationPass::Run(ComputeGraphPtr graph) { ret = GraphUtils::ReplaceNodeAnchors(iter->second, node, {}, output_map); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s error node %u", node->GetName().c_str(), - iter->second->GetName().c_str(), ret); + GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s error node %u", + node->GetName().c_str(), iter->second->GetName().c_str(), ret); return INTERNAL_ERROR; } @@ -117,9 +118,11 @@ Status CommonSubexpressionEliminationPass::Run(ComputeGraphPtr graph) { return INTERNAL_ERROR; } - GELOGI("Remove node %s by the CSE process, replace it with node %s", node->GetName().c_str(), - iter->second->GetName().c_str()); + GELOGI("Remove node %s by the CSE process, replace it with node %s", + node->GetName().c_str(), iter->second->GetName().c_str()); } return SUCCESS; } } // namespace ge + + diff --git a/ge/graph/passes/common_subexpression_elimination_pass.h b/ge/graph/passes/common_subexpression_elimination_pass.h index b5aecf6b..83bfbace 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.h +++ b/ge/graph/passes/common_subexpression_elimination_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_COMMON_SUBEXPRESSION_ELIMINATION_H_ #define GE_COMMON_SUBEXPRESSION_ELIMINATION_H_ @@ -23,7 +22,7 @@ namespace ge { class CommonSubexpressionEliminationPass : public GraphPass { public: - Status Run(ge::ComputeGraphPtr graph) override; + Status Run(ge::ComputeGraphPtr graph) override ; }; } // namespace ge -#endif // GE_COMMON_SUBEXPRESSION_ELIMINATION_H_ +#endif //GE_COMMON_SUBEXPRESSION_ELIMINATION_H_ diff --git a/ge/graph/passes/compile_nodes_pass.cc b/ge/graph/passes/compile_nodes_pass.cc old mode 100644 new mode 100755 index a93671c7..9faa35ae --- a/ge/graph/passes/compile_nodes_pass.cc +++ b/ge/graph/passes/compile_nodes_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "graph/passes/compile_nodes_pass.h" #include @@ -30,7 +29,7 @@ using domi::ImplyType; namespace { const char *const kAICPUEngineName = "DNN_VM_AICPU"; -const char *const kAICPUKernelLibName = "aicpu_kernel"; +const char *const kAICPUKernelLibName = "aicpu_tf_kernel"; } // namespace namespace ge { diff --git a/ge/graph/passes/compile_nodes_pass.h b/ge/graph/passes/compile_nodes_pass.h index 70f8cbf5..e2fb59c2 100644 --- a/ge/graph/passes/compile_nodes_pass.h +++ b/ge/graph/passes/compile_nodes_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/cond_pass.cc b/ge/graph/passes/cond_pass.cc index c3a421b1..a2d77a1b 100644 --- a/ge/graph/passes/cond_pass.cc +++ b/ge/graph/passes/cond_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "graph/passes/cond_pass.h" #include "common/op/ge_op_utils.h" #include "graph/utils/graph_utils.h" @@ -21,9 +20,9 @@ #include "graph/utils/node_utils.h" namespace { -const std::string kStringLength = "StringLength"; -const size_t kScalarDimNum = 1; -} // namespace + const std::string kStringLength = "StringLength"; + const size_t kScalarDimNum = 1; +} namespace ge { Status CondPass::Run(NodePtr &node) { @@ -172,8 +171,8 @@ Status CondPass::GetCondInfoForWhile(const NodePtr &node, ComputeGraphPtr &graph // cond_graph has and only has one output uint32_t output_num = net_output_node->GetAllInDataAnchorsSize(); if (output_num != 1) { - GELOGE(FAILED, "output size of cond_graph is invalid, expect 1 but %u exactly, while_node:%s.", output_num, - node->GetName().c_str()); + GELOGE(FAILED, "output size of cond_graph is invalid, expect 1 but %u exactly, while_node:%s.", + output_num, node->GetName().c_str()); return FAILED; } @@ -234,9 +233,10 @@ Status CondPass::HandleScalarCond(const ComputeGraphPtr &graph, const OutDataAnc return FAILED; } - if (GraphUtils::InsertNodeAfter(out_anchor, {in_anchor}, cast_node) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Insert Cast node %s between %s->%s failed.", cast_node->GetName().c_str(), - out_anchor->GetOwnerNode()->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); + if (GraphUtils::InsertNodeAfter(out_anchor, { in_anchor }, cast_node) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Insert Cast node %s between %s->%s failed.", + cast_node->GetName().c_str(), out_anchor->GetOwnerNode()->GetName().c_str(), + in_anchor->GetOwnerNode()->GetName().c_str()); return FAILED; } @@ -279,9 +279,10 @@ Status CondPass::InsertNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr } AddRePassNode(new_node); - if (GraphUtils::InsertNodeAfter(out_anchor, {in_anchor}, new_node) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Insert %s node %s between %s->%s failed.", type.c_str(), new_node->GetName().c_str(), - out_anchor->GetOwnerNode()->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); + if (GraphUtils::InsertNodeAfter(out_anchor, { in_anchor }, new_node) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Insert %s node %s between %s->%s failed.", type.c_str(), + new_node->GetName().c_str(), out_anchor->GetOwnerNode()->GetName().c_str(), + in_anchor->GetOwnerNode()->GetName().c_str()); return FAILED; } @@ -313,7 +314,8 @@ NodePtr CondPass::AddCastNode(const ComputeGraphPtr &graph, const std::string &n GELOGE(FAILED, "Create cast op_desc failed, name: %s.", name.c_str()); return nullptr; } - if (!(AttrUtils::SetInt(cast_desc, CAST_ATTR_SRCT, src) && AttrUtils::SetInt(cast_desc, CAST_ATTR_DSTT, dst) && + if (!(AttrUtils::SetInt(cast_desc, CAST_ATTR_SRCT, src) && + AttrUtils::SetInt(cast_desc, CAST_ATTR_DSTT, dst) && AttrUtils::SetInt(cast_desc, CAST_ATTR_DST_TYPE, dst) && AttrUtils::SetBool(cast_desc, CAST_ATTR_TRUNCATE, false))) { GELOGE(FAILED, "Set CAST_ATTR failed, node: %s.", name.c_str()); diff --git a/ge/graph/passes/cond_pass.h b/ge/graph/passes/cond_pass.h index fead8474..5c0c83bc 100644 --- a/ge/graph/passes/cond_pass.h +++ b/ge/graph/passes/cond_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_COND_PASS_H #define GE_GRAPH_PASSES_COND_PASS_H @@ -34,7 +33,7 @@ class CondPass : public BaseNodePass { /// @return Status /// static Status GetCondInfo(const NodePtr &node, ComputeGraphPtr &graph, OutDataAnchorPtr &cond_out_anchor, - InDataAnchorPtr &cond_in_anchor); + InDataAnchorPtr &cond_in_anchor); /// /// @brief Get cond info for if node @@ -45,7 +44,7 @@ class CondPass : public BaseNodePass { /// @return Status /// static Status GetCondInfoForIf(const NodePtr &node, ComputeGraphPtr &graph, OutDataAnchorPtr &cond_out_anchor, - InDataAnchorPtr &cond_in_anchor); + InDataAnchorPtr &cond_in_anchor); /// /// @brief Get cond info for while node @@ -56,7 +55,7 @@ class CondPass : public BaseNodePass { /// @return Status /// static Status GetCondInfoForWhile(const NodePtr &node, ComputeGraphPtr &graph, OutDataAnchorPtr &cond_out_anchor, - InDataAnchorPtr &cond_in_anchor); + InDataAnchorPtr &cond_in_anchor); /// /// @brief Process Cond Op with non-scalar cond_input @@ -97,8 +96,8 @@ class CondPass : public BaseNodePass { /// @param [in] type /// @return Status /// - Status InsertNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, const InDataAnchorPtr &in_anchor, - const std::string &type); + Status InsertNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, + const InDataAnchorPtr &in_anchor, const std::string &type); /// /// @brief Add cast node @@ -109,8 +108,8 @@ class CondPass : public BaseNodePass { /// @param [in] dst /// @return NodePtr /// - NodePtr AddCastNode(const ComputeGraphPtr &graph, const std::string &name, const GeTensorDesc &tensor, DataType src, - DataType dst); + NodePtr AddCastNode(const ComputeGraphPtr &graph, const std::string &name, const GeTensorDesc &tensor, + DataType src, DataType dst); }; } // namespace ge -#endif // GE_GRAPH_PASSES_COND_PASS_H +#endif //GE_GRAPH_PASSES_COND_PASS_H diff --git a/ge/graph/passes/cond_remove_pass.cc b/ge/graph/passes/cond_remove_pass.cc index 1650be92..ec26ba3e 100644 --- a/ge/graph/passes/cond_remove_pass.cc +++ b/ge/graph/passes/cond_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "graph/passes/cond_remove_pass.h" #include "common/op/ge_op_utils.h" #include "graph/utils/graph_utils.h" @@ -29,7 +28,7 @@ const uint32_t kFalseIndex = 0; /// Extra 1 byte store '\0' const int32_t kStrHeadLen = 9; const int32_t kInvalidRetVal = -1; -} // namespace +} namespace ge { Status CondRemovePass::Run(NodePtr &node) { @@ -229,16 +228,17 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c const auto &output_desc_size = node->GetOpDesc()->GetOutputsSize(); // Create subgraph opdesc & node auto partitioncall_opdesc = - CreateSubgraphOpDesc(save_branch->GetName(), input_desc_size - kConditionIndexNum, output_desc_size); + CreateSubgraphOpDesc(save_branch->GetName(), input_desc_size - kConditionIndexNum, output_desc_size); auto partitioncall_node = node->GetOwnerComputeGraph()->AddNode(partitioncall_opdesc); // Link node's peerout anchors to new node's inanchors for (const auto &input_anchor : node->GetAllInAnchors()) { for (const auto &peerout_anchor : input_anchor->GetPeerAnchors()) { if (GraphUtils::AddEdge(peerout_anchor, partitioncall_node->GetInAnchor( - input_anchor->GetIdx() - kConditionIndexNum)) != ge::GRAPH_SUCCESS) { + input_anchor->GetIdx() - kConditionIndexNum)) != ge::GRAPH_SUCCESS) { GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%d, output num:%d", peerout_anchor->GetOwnerNode()->GetName().c_str(), peerout_anchor->GetIdx(), - partitioncall_node->GetName().c_str(), input_anchor->GetIdx(), input_desc_size, output_desc_size); + partitioncall_node->GetName().c_str(), input_anchor->GetIdx(), input_desc_size, + output_desc_size); return FAILED; } } @@ -332,4 +332,4 @@ Status CondRemovePass::GetCondInfo(const NodePtr &node, ComputeGraphPtr &graph, return SUCCESS; } -} // namespace ge +} diff --git a/ge/graph/passes/cond_remove_pass.h b/ge/graph/passes/cond_remove_pass.h index 69dd7195..72ca64b8 100644 --- a/ge/graph/passes/cond_remove_pass.h +++ b/ge/graph/passes/cond_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_COND_REMOVE_PASS_H #define GE_GRAPH_PASSES_COND_REMOVE_PASS_H diff --git a/ge/graph/passes/constant_folding_pass.cc b/ge/graph/passes/constant_folding_pass.cc index 80bf7867..4db14fc3 100644 --- a/ge/graph/passes/constant_folding_pass.cc +++ b/ge/graph/passes/constant_folding_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,27 +17,46 @@ #include "graph/passes/constant_folding_pass.h" #include - -#include "common/debug/log.h" -#include "common/types.h" -#include "framework/common/debug/ge_log.h" #include "graph/operator_factory.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/node_utils.h" -#include "graph/utils/op_desc_utils.h" #include "graph/utils/type_utils.h" -#include "inc/kernel.h" +#include "init/gelib.h" namespace ge { const int64_t kStartCallNum = 1; +const std::string kKernelLibName = "aicpu_tf_kernel"; +// tf_kernel.json opsFlag config +const std::string kOpsFlagClose = "0"; + +Status RunOpKernelWithCheck(NodePtr &node, + const vector &inputs, + std::vector &outputs) { + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized or is finalized."); + return UNSUPPORTED; + } + OpsKernelInfoStorePtr kernel_info = instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(kKernelLibName); + if (kernel_info == nullptr) { + GELOGE(FAILED, "Get op kernel info store %s failed", kKernelLibName.c_str()); + return UNSUPPORTED; + } + + std::string ops_flag; + kernel_info->opsFlagCheck(*node, ops_flag); + if (ops_flag == kOpsFlagClose) { + return UNSUPPORTED; + } + return FoldingPass::RunOpKernel(node, inputs, outputs); +} const std::unordered_map> - &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { + &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { return statistic_of_ge_constant_folding_; } const std::unordered_map> - &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { + &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { return statistic_of_op_constant_folding_; } @@ -63,8 +82,8 @@ Status ConstantFoldingPass::Run(ge::NodePtr &node) { auto inputs = OpDescUtils::GetInputData(input_nodes); vector outputs; // Statistic of ge constant folding kernel - uint64_t start_time = GetCurrentTimestap(); - auto ret = RunOpKernel(node, inputs, outputs); + uint64_t start_time = GetCurrentTimestamp(); + auto ret = RunOpKernelWithCheck(node, inputs, outputs); if (ret != SUCCESS) { auto op_kernel = folding_pass::GetKernelByType(node); if (op_kernel == nullptr) { @@ -74,9 +93,9 @@ Status ConstantFoldingPass::Run(ge::NodePtr &node) { } // Statistic of op and fe constant folding kernel - start_time = GetCurrentTimestap(); + start_time = GetCurrentTimestamp(); ret = op_kernel->Compute(node_desc, inputs, outputs); - uint64_t cost_time = GetCurrentTimestap() - start_time; + uint64_t cost_time = GetCurrentTimestamp() - start_time; if (statistic_of_ge_constant_folding_.find(node->GetType()) != statistic_of_ge_constant_folding_.end()) { uint64_t &cnt = statistic_of_ge_constant_folding_[node->GetType()].first; uint64_t &cur_cost_time = statistic_of_ge_constant_folding_[node->GetType()].second; @@ -100,10 +119,10 @@ Status ConstantFoldingPass::Run(ge::NodePtr &node) { uint64_t &cnt = statistic_of_op_constant_folding_[node->GetType()].first; uint64_t &cost_time = statistic_of_op_constant_folding_[node->GetType()].second; cnt++; - cost_time += GetCurrentTimestap() - start_time; + cost_time += GetCurrentTimestamp() - start_time; } else { statistic_of_op_constant_folding_[node->GetType()] = - std::pair(kStartCallNum, GetCurrentTimestap() - start_time); + std::pair(kStartCallNum, GetCurrentTimestamp() - start_time); } } diff --git a/ge/graph/passes/constant_folding_pass.h b/ge/graph/passes/constant_folding_pass.h index 683b66f1..c977157e 100644 --- a/ge/graph/passes/constant_folding_pass.h +++ b/ge/graph/passes/constant_folding_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,6 @@ class ConstantFoldingPass : public FoldingPass { Status Run(ge::NodePtr &node) override; const std::unordered_map> &GetGeConstantFoldingPerfStatistic() const; const std::unordered_map> &GetOpConstantFoldingPerfStatistic() const; - private: std::unordered_map> statistic_of_op_constant_folding_; std::unordered_map> statistic_of_ge_constant_folding_; diff --git a/ge/graph/passes/constant_fuse_same_pass.cc b/ge/graph/passes/constant_fuse_same_pass.cc index 4197f429..d0970c59 100644 --- a/ge/graph/passes/constant_fuse_same_pass.cc +++ b/ge/graph/passes/constant_fuse_same_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -132,11 +132,11 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph, fuse_nodes[map_key].emplace_back(node); GELOGD("ConstantFuseSamePass, format %s, datatype %s, data_size %d, shape_size %zu. node name %s", TypeUtils::FormatToSerialString(map_key.format).c_str(), - TypeUtils::DataTypeToSerialString(map_key.data_type).c_str(), map_key.data_size, map_key.shape.size(), - node->GetName().c_str()); + TypeUtils::DataTypeToSerialString(map_key.data_type).c_str(), + map_key.data_size, map_key.shape.size(), node->GetName().c_str()); } - GELOGI("ConstantFuseSamePass, total_const_nums %d, insert_const_nums %d, fuse_nodes size is %zu.", total_const_nums, - insert_const_nums, fuse_nodes.size()); + GELOGI("ConstantFuseSamePass, total_const_nums %d, insert_const_nums %d, fuse_nodes size is %zu.", + total_const_nums, insert_const_nums, fuse_nodes.size()); } Status ConstantFuseSamePass::MoveOutDataEdges(NodePtr &src_node, NodePtr &dst_node) { diff --git a/ge/graph/passes/constant_fuse_same_pass.h b/ge/graph/passes/constant_fuse_same_pass.h old mode 100644 new mode 100755 index fffb784c..4935da84 --- a/ge/graph/passes/constant_fuse_same_pass.h +++ b/ge/graph/passes/constant_fuse_same_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ struct SameConstKey { std::vector shape; public: - bool operator<(const SameConstKey &key) const { + bool operator< (const SameConstKey &key) const { if (data_size != key.data_size) { return data_size < key.data_size; } @@ -66,9 +66,11 @@ class ConstantFuseSamePass : public GraphPass { Status Run(ge::ComputeGraphPtr graph) override; private: - void GetFuseConstNodes(ComputeGraphPtr &graph, std::map> &fuse_nodes); + void GetFuseConstNodes(ComputeGraphPtr &graph, + std::map> &fuse_nodes); Status MoveOutDataEdges(NodePtr &src_node, NodePtr &dst_node); - Status FuseConstNodes(ComputeGraphPtr &graph, std::map> &fuse_nodes); + Status FuseConstNodes(ComputeGraphPtr &graph, + std::map> &fuse_nodes); }; -} // namespace ge -#endif // GE_GRAPH_PASSES_CONSTANT_FUSE_SAME_PASS_H_ +} // namespace ge +#endif // GE_GRAPH_PASSES_CONSTANT_FUSE_SAME_PASS_H_ diff --git a/ge/graph/passes/control_trigger_pass.cc b/ge/graph/passes/control_trigger_pass.cc index 0c00d553..e179c64e 100644 --- a/ge/graph/passes/control_trigger_pass.cc +++ b/ge/graph/passes/control_trigger_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/control_trigger_pass.h b/ge/graph/passes/control_trigger_pass.h old mode 100644 new mode 100755 index 44d11cad..03ddbbd2 --- a/ge/graph/passes/control_trigger_pass.h +++ b/ge/graph/passes/control_trigger_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,15 @@ #include "inc/graph_pass.h" namespace ge { -enum ControlNodeType { kNotControlOp, kCondSwitch, kCondMerge, kLoopSwitchT, kLoopSwitchF, kEnter, kInvalidType }; +enum ControlNodeType { + kNotControlOp, + kCondSwitch, + kCondMerge, + kLoopSwitchT, + kLoopSwitchF, + kEnter, + kInvalidType +}; class ControlTriggerPass : public GraphPass { public: diff --git a/ge/graph/passes/ctrl_edge_transfer_pass.cc b/ge/graph/passes/ctrl_edge_transfer_pass.cc old mode 100644 new mode 100755 index 9454c00d..f53dc7be --- a/ge/graph/passes/ctrl_edge_transfer_pass.cc +++ b/ge/graph/passes/ctrl_edge_transfer_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/util.h" #include "graph/utils/graph_utils.h" +#include "graph/debug/ge_attr_define.h" namespace ge { /* Pass Explaination: @@ -42,6 +43,12 @@ Status CtrlEdgeTransferPass::Run(ge::ComputeGraphPtr graph) { GELOGD("CtrlEdgeTransferPass start running"); GE_CHECK_NOTNULL(graph); + bool is_dynamic_shape = false; + (void)AttrUtils::GetBool(graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); + if (!is_dynamic_shape) { + return SUCCESS; + } + for (ge::NodePtr &n : graph->GetDirectNode()) { auto op_desc = n->GetOpDesc(); if (op_desc == nullptr) { @@ -58,15 +65,14 @@ Status CtrlEdgeTransferPass::Run(ge::ComputeGraphPtr graph) { for (auto &in_control_node : n->GetInControlNodes()) { GE_CHECK_NOTNULL(in_control_node); - GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(), n->GetInControlAnchor()), - "remove edge failed"); + GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(), + n->GetInControlAnchor()), "remove edge failed"); for (auto &out_node : n->GetOutNodes()) { if (out_node == nullptr) { continue; } - GE_CHK_STATUS_RET( - ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(), out_node->GetInControlAnchor()), - "add edge failed."); + GE_CHK_STATUS_RET(ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(), + out_node->GetInControlAnchor()), "add edge failed."); } } } diff --git a/ge/graph/passes/ctrl_edge_transfer_pass.h b/ge/graph/passes/ctrl_edge_transfer_pass.h old mode 100644 new mode 100755 index ee981012..1b6a624c --- a/ge/graph/passes/ctrl_edge_transfer_pass.h +++ b/ge/graph/passes/ctrl_edge_transfer_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_CTRL_EDGE_TRANSFER_PASS_H_ #define GE_GRAPH_PASSES_CTRL_EDGE_TRANSFER_PASS_H_ diff --git a/ge/graph/passes/data_pass.cc b/ge/graph/passes/data_pass.cc index 517e7737..38688848 100644 --- a/ge/graph/passes/data_pass.cc +++ b/ge/graph/passes/data_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ namespace ge { Status DataPass::Run(ComputeGraphPtr compute_graph) { GE_CHECK_NOTNULL(compute_graph); - if (compute_graph->GetParentNode() == nullptr) { // for subgraph post process. + if (compute_graph->GetParentNode() == nullptr) { // for subgraph post process. return SUCCESS; } @@ -34,10 +34,10 @@ Status DataPass::Run(ComputeGraphPtr compute_graph) { if (node->GetType() == DATA) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - break; // parent_index not set, Graph from IR. + break; // parent_index not set, Graph from IR. } - return SUCCESS; // Graph from Parser. + return SUCCESS; // Graph from Parser. } } @@ -65,16 +65,16 @@ Status DataPass::Run(ComputeGraphPtr compute_graph) { auto post_func = domi::OpRegistry::Instance()->GetParseSubgraphPostFunc(parent_node->GetType()); if (post_func == nullptr) { - GELOGW("The subgraph post func for node %s type %s is null.", parent_node->GetName().c_str(), - parent_node->GetType().c_str()); + GELOGW("The subgraph post func for node %s type %s is null.", + parent_node->GetName().c_str(), parent_node->GetType().c_str()); return SUCCESS; } auto graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); auto ret = post_func(subgraph_name, graph); if (ret != SUCCESS) { - GELOGE(FAILED, "Failed to post-process subgraph %s on node %s type %s", graph.GetName().c_str(), - parent_node->GetName().c_str(), parent_node->GetType().c_str()); + GELOGE(FAILED, "Failed to post-process subgraph %s on node %s type %s", + graph.GetName().c_str(), parent_node->GetName().c_str(), parent_node->GetType().c_str()); return FAILED; } diff --git a/ge/graph/passes/data_pass.h b/ge/graph/passes/data_pass.h index 1f6d0f0b..bce2fd5a 100644 --- a/ge/graph/passes/data_pass.h +++ b/ge/graph/passes/data_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dimension_adjust_pass.cc b/ge/graph/passes/dimension_adjust_pass.cc old mode 100644 new mode 100755 index a734ddc3..fc5fe69f --- a/ge/graph/passes/dimension_adjust_pass.cc +++ b/ge/graph/passes/dimension_adjust_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,8 +58,8 @@ Status DimensionAdjustPass::Run(ge::NodePtr &node) { return INTERNAL_ERROR; } if (is_unknown) { - GELOGI("Current node %s, type %s is unknown shape which should be skip.", node->GetName().c_str(), - node->GetType().c_str()); + GELOGI("Current node %s, type %s is unknown shape which should be skip.", + node->GetName().c_str(), node->GetType().c_str()); return SUCCESS; } diff --git a/ge/graph/passes/dimension_adjust_pass.h b/ge/graph/passes/dimension_adjust_pass.h old mode 100644 new mode 100755 index fa9d2320..685d9694 --- a/ge/graph/passes/dimension_adjust_pass.h +++ b/ge/graph/passes/dimension_adjust_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dimension_compute_pass.cc b/ge/graph/passes/dimension_compute_pass.cc old mode 100644 new mode 100755 index a429e69d..dfa2d404 --- a/ge/graph/passes/dimension_compute_pass.cc +++ b/ge/graph/passes/dimension_compute_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ * limitations under the License. */ + #include "graph/passes/dimension_compute_pass.h" #include diff --git a/ge/graph/passes/dimension_compute_pass.h b/ge/graph/passes/dimension_compute_pass.h index 40110757..ba1a057c 100644 --- a/ge/graph/passes/dimension_compute_pass.h +++ b/ge/graph/passes/dimension_compute_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dropout_pass.cc b/ge/graph/passes/dropout_pass.cc index ab88aa23..09c297a6 100644 --- a/ge/graph/passes/dropout_pass.cc +++ b/ge/graph/passes/dropout_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dropout_pass.h b/ge/graph/passes/dropout_pass.h old mode 100644 new mode 100755 index 506ee5d6..f127224e --- a/ge/graph/passes/dropout_pass.h +++ b/ge/graph/passes/dropout_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/end_of_sequence_add_control_pass.cc b/ge/graph/passes/end_of_sequence_add_control_pass.cc old mode 100644 new mode 100755 index 90c0841c..d6503d0d --- a/ge/graph/passes/end_of_sequence_add_control_pass.cc +++ b/ge/graph/passes/end_of_sequence_add_control_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/end_of_sequence_add_control_pass.h b/ge/graph/passes/end_of_sequence_add_control_pass.h index 2540a988..dcc65848 100644 --- a/ge/graph/passes/end_of_sequence_add_control_pass.h +++ b/ge/graph/passes/end_of_sequence_add_control_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,22 +33,22 @@ class EndOfSequenceAddControlPass : public GraphPass { private: /** - * Get EndOfSequence node in graph, nullptr if not exist. - * @param graph - * @return EndOfSequence node - */ + * Get EndOfSequence node in graph, nullptr if not exist. + * @param graph + * @return EndOfSequence node + */ inline NodePtr GetEndOfSequence(const ComputeGraphPtr &graph) const; /** - * Check whether this node is a data-like node. - * @param node - * @return - */ + * Check whether this node is a data-like node. + * @param node + * @return + */ bool IsDataLikeNode(const NodePtr &node); /** - * Check whether this node is a data-like node. - * @param node - * @return - */ + * Check whether this node is a data-like node. + * @param node + * @return + */ Status AddControlEdge(NodePtr &end_of_sequence, std::vector &target_nodes); }; } // namespace ge diff --git a/ge/graph/passes/enter_pass.cc b/ge/graph/passes/enter_pass.cc index 84621689..f19223f1 100644 --- a/ge/graph/passes/enter_pass.cc +++ b/ge/graph/passes/enter_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,16 +47,17 @@ Status EnterPass::Run(NodePtr &node) { return SUCCESS; } - bool need_remove_flag = - in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty() && node->GetOutDataNodes().empty(); + bool need_remove_flag = in_node->GetInControlNodes().empty() && + node->GetInControlNodes().empty() && + node->GetOutDataNodes().empty(); if (need_remove_flag) { for (auto &out_ctrl_node : node->GetOutControlNodes()) { if (out_ctrl_node == nullptr) { continue; } if (GraphUtils::RemoveEdge(node->GetOutControlAnchor(), out_ctrl_node->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Remove Enter ctrl output fail, %s->%s", node->GetName().c_str(), - out_ctrl_node->GetName().c_str()); + GELOGE(FAILED, "Remove Enter ctrl output fail, %s->%s", + node->GetName().c_str(), out_ctrl_node->GetName().c_str()); return FAILED; } } diff --git a/ge/graph/passes/enter_pass.h b/ge/graph/passes/enter_pass.h index 04ac62ee..dc6bffb1 100644 --- a/ge/graph/passes/enter_pass.h +++ b/ge/graph/passes/enter_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/flow_ctrl_pass.cc b/ge/graph/passes/flow_ctrl_pass.cc old mode 100644 new mode 100755 index 430cf86d..23e14b43 --- a/ge/graph/passes/flow_ctrl_pass.cc +++ b/ge/graph/passes/flow_ctrl_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -122,11 +122,10 @@ NodePtr FlowCtrlPass::InsertOp(ComputeGraphPtr &compute_graph, const string &nod NodePtr FlowCtrlPass::InsertStreamSwitchOp(ComputeGraphPtr &compute_graph, const string &switch_name, const NodePtr &loop_cond, const NodePtr &iter_per_loop) { - GE_IF_BOOL_EXEC(loop_cond == nullptr || loop_cond->GetOpDesc() == nullptr, GELOGE(FAILED, "loop_cond is null"); - return nullptr); + GE_IF_BOOL_EXEC(loop_cond == nullptr || loop_cond->GetOpDesc() == nullptr, + GELOGE(FAILED, "loop_cond is null"); return nullptr); GE_IF_BOOL_EXEC(iter_per_loop == nullptr || iter_per_loop->GetOpDesc() == nullptr, - GELOGE(FAILED, "iter_per_loop is nullptr"); - return nullptr); + GELOGE(FAILED, "iter_per_loop is nullptr"); return nullptr); std::vector input_desc_list = {loop_cond->GetOpDesc()->GetOutputDesc(0), iter_per_loop->GetOpDesc()->GetOutputDesc(0)}; std::vector output_desc_list; @@ -151,10 +150,9 @@ NodePtr FlowCtrlPass::InsertStreamSwitchOp(ComputeGraphPtr &compute_graph, const } // stream switch op need switch cond by attr. - GE_IF_BOOL_EXEC( - !AttrUtils::SetInt(stream_switch->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_COND, static_cast(RT_LESS)), - DOMI_LOGE("set ATTR_NAME_STREAM_SWITCH_COND failed"); - return nullptr); + GE_IF_BOOL_EXEC(!AttrUtils::SetInt(stream_switch->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_COND, + static_cast(RT_LESS)), + DOMI_LOGE("set ATTR_NAME_STREAM_SWITCH_COND failed"); return nullptr); return stream_switch; } @@ -204,7 +202,8 @@ Status FlowCtrlPass::AddGlobalStepVariableNode(ComputeGraphPtr &compute_graph) { GeTensorDesc tensor_desc(GeShape({1}), FORMAT_ND, DT_UINT64); std::vector input_desc_list = {}; std::vector output_desc_list = {tensor_desc}; - NodePtr global_step = InsertOp(compute_graph, VARIABLE, NODE_NAME_GLOBAL_STEP, input_desc_list, output_desc_list); + NodePtr global_step = InsertOp(compute_graph, VARIABLE, NODE_NAME_GLOBAL_STEP, + input_desc_list, output_desc_list); if (global_step == nullptr) { GELOGE(FAILED, "Add global_step node failed, global_step is null."); return FAILED; @@ -222,8 +221,8 @@ Status FlowCtrlPass::AddGlobalStepVariableNode(ComputeGraphPtr &compute_graph) { NodePtr FlowCtrlPass::InsertAssignOp(ge::ComputeGraphPtr &compute_graph, const string &node_type, const string &node_name, const NodePtr &ref_node, const NodePtr &value_node) { - GE_IF_BOOL_EXEC(ref_node == nullptr || value_node == nullptr || ref_node->GetOpDesc() == nullptr || - value_node->GetOpDesc() == nullptr, + GE_IF_BOOL_EXEC(ref_node == nullptr || value_node == nullptr || + ref_node->GetOpDesc() == nullptr || value_node->GetOpDesc() == nullptr, GELOGE(FAILED, "ref node or value node is null"); return nullptr); GeTensorDesc ref_tensor_desc = ref_node->GetOpDesc()->GetOutputDesc(0); @@ -265,7 +264,7 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co */ // Insert AssignAdd node NodePtr assign_add_node = - InsertAssignOp(compute_graph, ASSIGNADD, NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD, loop_cond_node, loop_inc_node); + InsertAssignOp(compute_graph, ASSIGNADD, NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD, loop_cond_node, loop_inc_node); if (assign_add_node == nullptr || switch_node == nullptr) { GELOGE(PARAM_INVALID, "assign add node or switch node is null"); return FAILED; @@ -276,7 +275,7 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node, active_name), "set stream label failed"); // used for stream assign to find true branch - GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, {active_name}), "set active label list failed"); + GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); // 2. Insert active node NodePtr active_node = InsertOp(compute_graph, STREAMACTIVE, active_name, {}, {}); @@ -286,8 +285,7 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co } GE_CHK_STATUS_RET(SetStreamLabel(active_node, active_name), "set stream label failed"); GE_IF_BOOL_EXEC(!AttrUtils::SetBool(active_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, true), - DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); - return FAILED); + DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); // add ctrl edges graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), assign_add_node->GetInControlAnchor()); @@ -319,7 +317,7 @@ Status FlowCtrlPass::CreateIterCtrlFalseBranch(ComputeGraphPtr &compute_graph, c */ // Insert Assign node NodePtr assign_node = - InsertAssignOp(compute_graph, ASSIGN, NODE_NAME_FLOWCTRL_LOOP_ASSIGN, loop_cond_node, loop_reset_node); + InsertAssignOp(compute_graph, ASSIGN, NODE_NAME_FLOWCTRL_LOOP_ASSIGN, loop_cond_node, loop_reset_node); if (assign_node == nullptr || switch_node == nullptr) { GELOGE(PARAM_INVALID, "assign_node or switch node is null"); return FAILED; @@ -415,8 +413,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, * itersPerLoop loopCond */ GE_IF_BOOL_EXEC(loop_after_node == nullptr || compute_graph == nullptr, - DOMI_LOGE("loop after node or compute graph is null"); - return FAILED); + DOMI_LOGE("loop after node or compute graph is null"); return FAILED); InDataAnchorPtr in_anchor = loop_after_node->GetInDataAnchor(0); if (in_anchor == nullptr || in_anchor->GetPeerOutAnchor() == nullptr) { GELOGE(FAILED, "Find %s in data anchor failed.", loop_after_node->GetName().c_str()); @@ -471,8 +468,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, GE_CHK_STATUS_RET(SetStreamLabel(active_node, active_name), "set stream label failed"); GE_IF_BOOL_EXEC(!AttrUtils::SetBool(active_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, true), - DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); - return FAILED); + DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), active_node->GetInControlAnchor()); if (add_ret != GRAPH_SUCCESS) { @@ -482,9 +478,9 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, } // used for stream assign to find true branch - GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, {active_name}), "set active label list failed"); + GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); // used for stream assign to find active stream - GE_CHK_STATUS_RET(SetActiveLabelList(active_node, {loop_pre_node->GetName()}), "set active label list failed"); + GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { loop_pre_node->GetName() }), "set active label list failed"); return SUCCESS; } } // namespace ge diff --git a/ge/graph/passes/flow_ctrl_pass.h b/ge/graph/passes/flow_ctrl_pass.h old mode 100644 new mode 100755 index a928aaa7..d01dcd44 --- a/ge/graph/passes/flow_ctrl_pass.h +++ b/ge/graph/passes/flow_ctrl_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/folding_pass.cc b/ge/graph/passes/folding_pass.cc old mode 100644 new mode 100755 index b52a3226..93dc2c40 --- a/ge/graph/passes/folding_pass.cc +++ b/ge/graph/passes/folding_pass.cc @@ -30,6 +30,7 @@ #include "graph/debug/ge_attr_define.h" #include "ge_local_engine/engine/host_cpu_engine.h" + namespace ge { namespace folding_pass { shared_ptr GetKernelByType(const NodePtr &node) { @@ -83,7 +84,7 @@ NodePtr AddConstNodeToGraph(GeTensorPtr &tensor, ComputeGraphPtr &graph) { } GE_IF_BOOL_EXEC(graph == nullptr, GELOGW("input param graph is null"); return nullptr); - (void)AttrUtils::SetListStr(const_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())); + (void) AttrUtils::SetListStr(const_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())); return graph->AddNodeFront(const_desc); } @@ -112,7 +113,8 @@ NodePtr AddIdentityNodeToGraph(const std::string &name, const GeTensorDesc &tens } } // namespace -Status FoldingPass::RunOpKernel(NodePtr &node, const vector &inputs, +Status FoldingPass::RunOpKernel(NodePtr &node, + const vector &inputs, std::vector &outputs) { return HostCpuEngine::GetInstance().Run(node, inputs, outputs); } @@ -135,8 +137,8 @@ Status FoldingPass::Folding(NodePtr &node, vector &outputs) { auto in_data_nodes = node->GetInDataNodes(); std::unordered_set in_data_nodes_set(in_data_nodes.begin(), in_data_nodes.end()); if (IsolateAndDeleteNode(node, {}) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to isolate and delete node %s, type %s.", node->GetName().c_str(), - node->GetType().c_str()); + GELOGE(INTERNAL_ERROR, "Failed to isolate and delete node %s, type %s.", + node->GetName().c_str(), node->GetType().c_str()); return INTERNAL_ERROR; } for (auto iter = in_data_nodes_set.begin(); iter != in_data_nodes_set.end(); ++iter) { @@ -147,8 +149,8 @@ Status FoldingPass::Folding(NodePtr &node, vector &outputs) { continue; } if (IsolateAndDeleteNode(pre_node, {}) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to isolate and delete in data node %s, type %s.", pre_node->GetName().c_str(), - pre_node->GetType().c_str()); + GELOGE(INTERNAL_ERROR, "Failed to isolate and delete in data node %s, type %s.", + pre_node->GetName().c_str(), pre_node->GetType().c_str()); return INTERNAL_ERROR; } } @@ -186,7 +188,7 @@ Status FoldingPass::DealWithInNodes(NodePtr &node) { node->GetName().c_str()); auto identity_name = node->GetName() + "_ctrl_identity_" + std::to_string(in_data_anchor->GetIdx()); auto identity = - AddIdentityNodeToGraph(identity_name, node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx()), graph); + AddIdentityNodeToGraph(identity_name, node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx()), graph); if (identity == nullptr) { GELOGE(INTERNAL_ERROR, "Failed to add identity node to graph."); return INTERNAL_ERROR; @@ -235,8 +237,8 @@ Status FoldingPass::AddConstNode(NodePtr &node, IndexsToAnchors indexes_to_ancho auto const_node = AddConstNodeToGraph(weight, graph); if (const_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to add dynamic const node, node name:%s, index:%zu.", node->GetName().c_str(), - index); + GELOGE(INTERNAL_ERROR, "Failed to add dynamic const node, node name:%s, index:%zu.", + node->GetName().c_str(), index); return INTERNAL_ERROR; } GELOGI("add const_node:%s, replace node %s, type %s, index %zu.", const_node->GetName().c_str(), diff --git a/ge/graph/passes/folding_pass.h b/ge/graph/passes/folding_pass.h old mode 100644 new mode 100755 index 0ffd2eb2..745cffd7 --- a/ge/graph/passes/folding_pass.h +++ b/ge/graph/passes/folding_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_FOLDING_PASS_H_ #define GE_GRAPH_PASSES_FOLDING_PASS_H_ @@ -28,19 +29,19 @@ namespace ge { namespace folding_pass { shared_ptr GetKernelByType(const NodePtr &node); bool IsNoNeedConstantFolding(const NodePtr &node); -} // namespace folding_pass +} using IndexsToAnchors = std::map>; class FoldingPass : public BaseNodePass { public: static Status RunOpKernel(NodePtr &node, const vector &inputs, vector &outputs); - protected: Status Folding(NodePtr &node, vector &outputs); - private: - Status AddConstNode(NodePtr &node, IndexsToAnchors indexes_to_anchors, std::vector &v_weight); + Status AddConstNode(NodePtr &node, + IndexsToAnchors indexes_to_anchors, + std::vector &v_weight); Status DealWithInNodes(NodePtr &node); Status RemoveNodeKeepingCtrlEdges(NodePtr &node); Status ConnectNodeToInAnchor(InDataAnchorPtr &in_anchor, NodePtr &node, int node_index); diff --git a/ge/graph/passes/for_pass.cc b/ge/graph/passes/for_pass.cc index e913985b..f3caea35 100644 --- a/ge/graph/passes/for_pass.cc +++ b/ge/graph/passes/for_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,17 +28,17 @@ #include "graph/utils/op_desc_utils.h" namespace { -const uint32_t kWhileIInputIndex = 0; -const uint32_t kWhileAbsDeltaInputIndex = 1; -const uint32_t kWhileRangeInputIndex = 2; -const uint32_t kWhileStartInputIndex = 3; -const uint32_t kWhileDeltaInputIndex = 4; -const uint32_t kWhileDataInputIndex = 5; -const uint32_t kSubgraphLoopVarInputIndex = 0; -const uint32_t kSubgraphInputIndex = 1; -const uint32_t kWhileOutputIndex = 5; -const std::string kAbs = "Abs"; -} // namespace + const uint32_t kWhileIInputIndex = 0; + const uint32_t kWhileAbsDeltaInputIndex = 1; + const uint32_t kWhileRangeInputIndex = 2; + const uint32_t kWhileStartInputIndex = 3; + const uint32_t kWhileDeltaInputIndex = 4; + const uint32_t kWhileDataInputIndex = 5; + const uint32_t kSubgraphLoopVarInputIndex = 0; + const uint32_t kSubgraphInputIndex = 1; + const uint32_t kWhileOutputIndex = 5; + const std::string kAbs = "Abs"; +} namespace ge { Status ForPass::Run(NodePtr &node) { @@ -54,12 +54,12 @@ Status ForPass::Run(NodePtr &node) { GE_CHECK_NOTNULL(root_graph); ForInfo for_info; - GE_CHK_STATUS_RET(BuildForInfo(root_graph, node, for_info), "Build ForInfo failed, node:%s.", - node->GetName().c_str()); + GE_CHK_STATUS_RET(BuildForInfo(root_graph, node, for_info), + "Build ForInfo failed, node:%s.", node->GetName().c_str()); WhileInfo while_info; - GE_CHK_STATUS_RET(TranWhileInfo(graph, for_info, while_info), "Transfer WhileInfo from ForInfo failed, node:%s.", - node->GetName().c_str()); + GE_CHK_STATUS_RET(TranWhileInfo(graph, for_info, while_info), + "Transfer WhileInfo from ForInfo failed, node:%s.", node->GetName().c_str()); ComputeGraphPtr cond_graph = BuildCondGraph(while_info); if ((cond_graph == nullptr) || (root_graph->AddSubgraph(cond_graph) != GRAPH_SUCCESS)) { @@ -73,8 +73,8 @@ Status ForPass::Run(NodePtr &node) { return FAILED; } - GE_CHK_STATUS_RET(UpdateForBodyInputMapping(while_info), "Update InputMapping for for-body-graph failed, node:%s.", - node->GetName().c_str()); + GE_CHK_STATUS_RET(UpdateForBodyInputMapping(while_info), + "Update InputMapping for for-body-graph failed, node:%s.", node->GetName().c_str()); // for node has and only has one subgraph GE_CHECK_NOTNULL(node->GetOpDesc()); @@ -190,10 +190,10 @@ Status ForPass::FindInputsAndOutputs(const NodePtr &node, std::vectorGetName().c_str(), index); return FAILED; } - GE_IF_BOOL_EXEC( - in_data_anchor->GetPeerOutAnchor() == nullptr, - GELOGW("Get null input by index %d from node %s ", in_data_anchor->GetIdx(), node->GetName().c_str()); - continue); + GE_IF_BOOL_EXEC(in_data_anchor->GetPeerOutAnchor() == nullptr, + GELOGW("Get null input by index %d from node %s ", + in_data_anchor->GetIdx(), node->GetName().c_str()); + continue); data_inputs.emplace_back(in_data_anchor->GetPeerOutAnchor()); } @@ -270,8 +270,8 @@ Status ForPass::TranWhileInfo(const ComputeGraphPtr &graph, const ForInfo &for_i return FAILED; } - GELOGI("Transfer for_info to while_info succ, for_node:%s, while_node:%s.", for_name.c_str(), - while_info.while_node->GetName().c_str()); + GELOGI("Transfer for_info to while_info succ, for_node:%s, while_node:%s.", + for_name.c_str(), while_info.while_node->GetName().c_str()); return SUCCESS; } @@ -316,8 +316,8 @@ OpDescPtr ForPass::CreateConstDesc(const std::string &name, int32_t value) { /// @param [out] abs_delta_input /// @return Status /// -Status ForPass::CreateLoopInput(const ComputeGraphPtr &graph, const ForInfo &for_info, OutDataAnchorPtr &range_input, - OutDataAnchorPtr &abs_delta_input) { +Status ForPass::CreateLoopInput(const ComputeGraphPtr &graph, const ForInfo &for_info, + OutDataAnchorPtr &range_input, OutDataAnchorPtr &abs_delta_input) { std::string for_name = for_info.for_node->GetName(); GELOGD("Begin to create loop_count input, node:%s", for_name.c_str()); @@ -332,16 +332,16 @@ Status ForPass::CreateLoopInput(const ComputeGraphPtr &graph, const ForInfo &for // i * |delta| < |limit-start| PartialGraphBuilder graph_builder; graph_builder.SetOwnerGraph(graph) - .AddExistNode(for_info.start->GetOwnerNode()) - .AddExistNode(for_info.limit->GetOwnerNode()) - .AddExistNode(for_info.delta->GetOwnerNode()) - .AddNode(CreateOpDesc(sub_name_0, SUB, false)) - .AddNode(CreateOpDesc(abs_name_0, kAbs, true)) - .AddNode(CreateOpDesc(abs_name_1, kAbs, true)) - .AddDataLink(delta->GetOwnerNode()->GetName(), delta->GetIdx(), abs_name_0, 0) - .AddDataLink(limit->GetOwnerNode()->GetName(), limit->GetIdx(), sub_name_0, 0) - .AddDataLink(start->GetOwnerNode()->GetName(), start->GetIdx(), sub_name_0, 1) - .AddDataLink(sub_name_0, 0, abs_name_1, 0); + .AddExistNode(for_info.start->GetOwnerNode()) + .AddExistNode(for_info.limit->GetOwnerNode()) + .AddExistNode(for_info.delta->GetOwnerNode()) + .AddNode(CreateOpDesc(sub_name_0, SUB, false)) + .AddNode(CreateOpDesc(abs_name_0, kAbs, true)) + .AddNode(CreateOpDesc(abs_name_1, kAbs, true)) + .AddDataLink(delta->GetOwnerNode()->GetName(), delta->GetIdx(), abs_name_0, 0) + .AddDataLink(limit->GetOwnerNode()->GetName(), limit->GetIdx(), sub_name_0, 0) + .AddDataLink(start->GetOwnerNode()->GetName(), start->GetIdx(), sub_name_0, 1) + .AddDataLink(sub_name_0, 0, abs_name_1, 0); graphStatus error_code = GRAPH_SUCCESS; std::string error_msg; @@ -380,9 +380,12 @@ Status ForPass::CreateLoopInput(const ComputeGraphPtr &graph, const ForInfo &for OpDescPtr ForPass::CreateOpDesc(const std::string &name, const std::string &type, bool io_equal_flag) { OpDescBuilder op_desc_builder(name, type); if (io_equal_flag) { - op_desc_builder.AddInput("x").AddOutput("y"); + op_desc_builder.AddInput("x") + .AddOutput("y"); } else { - op_desc_builder.AddInput("x1").AddInput("x2").AddOutput("y"); + op_desc_builder.AddInput("x1") + .AddInput("x2") + .AddOutput("y"); } return op_desc_builder.Build(); @@ -477,7 +480,8 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) { if (peer_out_anchor == nullptr) { continue; } - GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(peer_out_anchor, in_data_anchor), "Add data-edge %s:%d->%s:%d failed.", + GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(peer_out_anchor, in_data_anchor), + "Add data-edge %s:%d->%s:%d failed.", peer_out_anchor->GetOwnerNode()->GetName().c_str(), peer_out_anchor->GetIdx(), while_node->GetName().c_str(), i); } @@ -488,16 +492,17 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) { GE_CHECK_NOTNULL(out_data_anchor); for (auto &peer_in_anchor : while_info.data_outputs[i]) { GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(out_data_anchor, peer_in_anchor), - "Add data-edge %s:%d->%s:%d failed.", while_node->GetName().c_str(), - i + kWhileOutputIndex, peer_in_anchor->GetOwnerNode()->GetName().c_str(), - peer_in_anchor->GetIdx()); + "Add data-edge %s:%d->%s:%d failed.", + while_node->GetName().c_str(), i + kWhileOutputIndex, + peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); } } InControlAnchorPtr in_ctrl_anchor = while_node->GetInControlAnchor(); GE_CHECK_NOTNULL(in_ctrl_anchor); for (auto &peer_out_anchor : while_info.ctrl_inputs) { - GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(peer_out_anchor, in_ctrl_anchor), "Add ctrl-edge %s->%s failed.", + GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(peer_out_anchor, in_ctrl_anchor), + "Add ctrl-edge %s->%s failed.", peer_out_anchor->GetOwnerNode()->GetName().c_str(), in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); } @@ -505,7 +510,8 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) { OutControlAnchorPtr out_ctrl_anchor = while_node->GetOutControlAnchor(); GE_CHECK_NOTNULL(out_ctrl_anchor); for (auto &peer_in_anchor : while_info.ctrl_outputs) { - GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(out_ctrl_anchor, peer_in_anchor), "Add ctrl-edge %s->%s failed.", + GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(out_ctrl_anchor, peer_in_anchor), + "Add ctrl-edge %s->%s failed.", out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetOwnerNode()->GetName().c_str()); } @@ -532,11 +538,11 @@ ComputeGraphPtr ForPass::BuildCondGraph(WhileInfo &while_info) { graph_builder.AddNode(CreateOpDesc(less_name, LESS, false)); // Set Input - graph_builder.SetInput(kWhileIInputIndex, {mul_name}, {0}) - .SetInput(kWhileAbsDeltaInputIndex, {mul_name}, {1}) - .SetInput(kWhileRangeInputIndex, {less_name}, {1}) - .SetUselessInput(kWhileStartInputIndex) - .SetUselessInput(kWhileDeltaInputIndex); + graph_builder.SetInput(kWhileIInputIndex, { mul_name }, { 0 }) + .SetInput(kWhileAbsDeltaInputIndex, { mul_name }, { 1 }) + .SetInput(kWhileRangeInputIndex, { less_name }, { 1 }) + .SetUselessInput(kWhileStartInputIndex) + .SetUselessInput(kWhileDeltaInputIndex); size_t input_num = while_info.data_inputs.size(); for (size_t i = kWhileDataInputIndex; i < input_num; i++) { graph_builder.SetUselessInput(i); @@ -588,9 +594,9 @@ ComputeGraphPtr ForPass::BuildBodyGraph(WhileInfo &while_info) { std::string mul_name = "Mul"; std::string add_name_1 = "Add_1"; graph_builder.AddNode(CreateConstDesc(const_name, 1)) - .AddNode(CreateOpDesc(add_name_0, ADD, false)) - .AddNode(CreateOpDesc(mul_name, MUL, false)) - .AddNode(CreateOpDesc(add_name_1, ADD, false)); + .AddNode(CreateOpDesc(add_name_0, ADD, false)) + .AddNode(CreateOpDesc(mul_name, MUL, false)) + .AddNode(CreateOpDesc(add_name_1, ADD, false)); // Add Subgraph node auto input_num = static_cast(while_info.data_inputs.size()); @@ -600,13 +606,13 @@ ComputeGraphPtr ForPass::BuildBodyGraph(WhileInfo &while_info) { graph_builder.AddNode(CreateSubgraphOpDesc(sub_graph_node_name, sub_graph_input_num, sub_graph_output_num)); // Set Input - graph_builder.SetInput(kWhileIInputIndex, {add_name_0, mul_name}, {0, 0}) - .SetUselessInput(kWhileAbsDeltaInputIndex) - .SetUselessInput(kWhileRangeInputIndex) - .SetInput(kWhileStartInputIndex, {add_name_1}, {0}) - .SetInput(kWhileDeltaInputIndex, {mul_name}, {1}); + graph_builder.SetInput(kWhileIInputIndex, { add_name_0, mul_name }, { 0, 0 }) + .SetUselessInput(kWhileAbsDeltaInputIndex) + .SetUselessInput(kWhileRangeInputIndex) + .SetInput(kWhileStartInputIndex, { add_name_1 }, { 0 }) + .SetInput(kWhileDeltaInputIndex, { mul_name }, { 1 }); for (uint32_t i = 0; i < input_num - kWhileDataInputIndex; i++) { - graph_builder.SetInput(i + kWhileDataInputIndex, {sub_graph_node_name}, {i + kSubgraphInputIndex}); + graph_builder.SetInput(i + kWhileDataInputIndex, { sub_graph_node_name }, { i + kSubgraphInputIndex }); } // Add Outputs @@ -620,8 +626,8 @@ ComputeGraphPtr ForPass::BuildBodyGraph(WhileInfo &while_info) { // Add Edges graph_builder.AddDataLink(const_name, 0, add_name_0, 1) - .AddDataLink(mul_name, 0, add_name_1, 1) - .AddDataLink(add_name_1, 0, sub_graph_node_name, kSubgraphLoopVarInputIndex); + .AddDataLink(mul_name, 0, add_name_1, 1) + .AddDataLink(add_name_1, 0, sub_graph_node_name, kSubgraphLoopVarInputIndex); // Add Input-Mapping std::map input_mapping; @@ -668,7 +674,8 @@ ComputeGraphPtr ForPass::BuildBodyGraph(WhileInfo &while_info) { /// OpDescPtr ForPass::CreateSubgraphOpDesc(const std::string &name, uint32_t input_num, uint32_t output_num) { OpDescBuilder op_desc_builder(name, PARTITIONEDCALL); - op_desc_builder.AddDynamicInput("args", input_num).AddDynamicOutput("output", output_num); + op_desc_builder.AddDynamicInput("args", input_num) + .AddDynamicOutput("output", output_num); OpDescPtr op_desc = op_desc_builder.Build(); if (op_desc == nullptr) { @@ -710,3 +717,4 @@ Status ForPass::UpdateForBodyInputMapping(const WhileInfo &while_info) { return SUCCESS; } } // namespace ge + diff --git a/ge/graph/passes/for_pass.h b/ge/graph/passes/for_pass.h index f25655f8..d6f307d1 100644 --- a/ge/graph/passes/for_pass.h +++ b/ge/graph/passes/for_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_FOR_PASS_H #define GE_GRAPH_PASSES_FOR_PASS_H @@ -35,16 +34,8 @@ struct ForInfo { struct WhileInfo { WhileInfo() - : while_node(nullptr), - sub_graph_node(nullptr), - i(nullptr), - abs_delta(nullptr), - range(nullptr), - start(nullptr), - delta(nullptr), - for_body(nullptr), - while_cond(nullptr), - while_body(nullptr) {} + : while_node(nullptr), sub_graph_node(nullptr), i(nullptr), abs_delta(nullptr), range(nullptr), + start(nullptr), delta(nullptr), for_body(nullptr), while_cond(nullptr), while_body(nullptr) {} ge::NodePtr while_node; ge::NodePtr sub_graph_node; ge::OutDataAnchorPtr i; @@ -196,4 +187,4 @@ class ForPass : public BaseNodePass { static OpDescPtr CreateSubgraphOpDesc(const std::string &name, uint32_t input_num, uint32_t output_num); }; } // namespace ge -#endif // GE_GRAPH_PASSES_FOR_PASS_H +#endif //GE_GRAPH_PASSES_FOR_PASS_H diff --git a/ge/graph/passes/get_original_format_pass.cc b/ge/graph/passes/get_original_format_pass.cc index 8c3c84f9..e743f190 100644 --- a/ge/graph/passes/get_original_format_pass.cc +++ b/ge/graph/passes/get_original_format_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -97,9 +97,9 @@ Status GetOriginalFormatPass::SetOriginalFormat(const ge::ComputeGraphPtr &graph OpDescPtr tmpSecondOpPtr = bias_node_ptr->GetInDataNodes().at(1)->GetOpDesc(); GE_CHECK_NOTNULL(tmpSecondOpPtr); GE_IF_BOOL_EXEC( - !AttrUtils::GetInt(tmp_first_op_ptr, ATTR_NAME_FORMAT, first_input_format), continue_flag = true; break); + !AttrUtils::GetInt(tmp_first_op_ptr, ATTR_NAME_FORMAT, first_input_format), continue_flag = true; break); GE_IF_BOOL_EXEC( - !AttrUtils::GetInt(tmpSecondOpPtr, ATTR_NAME_FORMAT, second_input_format), continue_flag = true; break); + !AttrUtils::GetInt(tmpSecondOpPtr, ATTR_NAME_FORMAT, second_input_format), continue_flag = true; break); if (first_input_format != second_input_format) { GELOGW("biasadd node is followed two nodes with different format, get original format failed"); diff --git a/ge/graph/passes/get_original_format_pass.h b/ge/graph/passes/get_original_format_pass.h old mode 100644 new mode 100755 index 813fb2bf..66e0222e --- a/ge/graph/passes/get_original_format_pass.h +++ b/ge/graph/passes/get_original_format_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/global_step_insert_pass.cc b/ge/graph/passes/global_step_insert_pass.cc old mode 100644 new mode 100755 index 460f6ad6..4431fc3d --- a/ge/graph/passes/global_step_insert_pass.cc +++ b/ge/graph/passes/global_step_insert_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,11 +28,13 @@ #include "graph/passes/pass_utils.h" namespace ge { -NodePtr GlobalStepInsertPass::InsertOp(ComputeGraphPtr &compute_graph, const string &node_type, const string &node_name, +NodePtr GlobalStepInsertPass::InsertOp(ComputeGraphPtr &compute_graph, + const string &node_type, + const string &node_name, const std::vector &input_list, const std::vector &output_list) { OpDescPtr op_desc = MakeShared(node_name, node_type); - GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(FAILED, "Make OpDesc failed"); return nullptr); + GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(FAILED,"Make OpDesc failed"); return nullptr); for (auto &input_desc : input_list) { graphStatus graph_status = op_desc->AddInputDesc(input_desc); @@ -50,11 +52,11 @@ NodePtr GlobalStepInsertPass::InsertOp(ComputeGraphPtr &compute_graph, const str } } - GE_IF_BOOL_EXEC(compute_graph == nullptr, GELOGE(FAILED, "compute_graph is nullptr"); return nullptr); + GE_IF_BOOL_EXEC(compute_graph == nullptr, GELOGE(FAILED,"compute_graph is nullptr"); return nullptr); NodePtr node = compute_graph->AddNode(op_desc); GE_IF_BOOL_EXEC(node == nullptr, - GELOGE(FAILED, "add node failed, name:%s, type:%s.", node_name.c_str(), node_type.c_str()); - return nullptr); + GELOGE(FAILED, "add node failed, name:%s, type:%s.", node_name.c_str(), node_type.c_str()); + return nullptr); GELOGI("Insert op success, name:%s, type:%s.", node_name.c_str(), node_type.c_str()); return node; @@ -81,7 +83,8 @@ Status GlobalStepInsertPass::Run(ComputeGraphPtr compute_graph) { GeTensorDesc tensor_desc(GeShape({1}), FORMAT_ND, DT_UINT64); std::vector input_desc_list = {}; std::vector output_desc_list = {tensor_desc}; - NodePtr global_step = InsertOp(compute_graph, VARIABLE, NODE_NAME_GLOBAL_STEP, input_desc_list, output_desc_list); + NodePtr global_step = InsertOp(compute_graph, VARIABLE, NODE_NAME_GLOBAL_STEP, + input_desc_list, output_desc_list); if (global_step == nullptr) { GELOGE(FAILED, "Add global_step node failed, global_step is null."); return FAILED; diff --git a/ge/graph/passes/global_step_insert_pass.h b/ge/graph/passes/global_step_insert_pass.h old mode 100644 new mode 100755 index 46bc85d6..da83e93a --- a/ge/graph/passes/global_step_insert_pass.h +++ b/ge/graph/passes/global_step_insert_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ namespace ge { /// in order to make the global step variable place in known subgraph /// class GlobalStepInsertPass : public GraphPass { - public: +public: /// /// @param compute_graph graph /// @return SUCCESS: do success @@ -38,8 +38,7 @@ class GlobalStepInsertPass : public GraphPass { /// Other: failed /// Status Run(ComputeGraphPtr compute_graph) override; - - private: +private: /// /// Universal insert node to graph. /// @param compute_graph graph @@ -49,9 +48,12 @@ class GlobalStepInsertPass : public GraphPass { /// @param output_list output desc list /// @return the inserted node. if insert failed return nullptr. /// - NodePtr InsertOp(ComputeGraphPtr &compute_graph, const string &node_type, const string &node_name, - const std::vector &input_list, const std::vector &output_list); + NodePtr InsertOp(ComputeGraphPtr &compute_graph, + const string &node_type, + const string &node_name, + const std::vector &input_list, + const std::vector &output_list); }; -} // namespace ge +} // namespace ge #endif // GE_GRAPH_PASSES_GLOBAL_STEP_INSERT_PASS_H_ \ No newline at end of file diff --git a/ge/graph/passes/guarantee_const_pass.cc b/ge/graph/passes/guarantee_const_pass.cc index f099c01d..a2d8f262 100644 --- a/ge/graph/passes/guarantee_const_pass.cc +++ b/ge/graph/passes/guarantee_const_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/guarantee_const_pass.h b/ge/graph/passes/guarantee_const_pass.h old mode 100644 new mode 100755 index 7f289a10..1f297944 --- a/ge/graph/passes/guarantee_const_pass.h +++ b/ge/graph/passes/guarantee_const_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/hccl_group_pass.cc b/ge/graph/passes/hccl_group_pass.cc index d8f11434..bbfd9b56 100644 --- a/ge/graph/passes/hccl_group_pass.cc +++ b/ge/graph/passes/hccl_group_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/hccl_group_pass.h b/ge/graph/passes/hccl_group_pass.h index 059710ce..dbe15e96 100644 --- a/ge/graph/passes/hccl_group_pass.h +++ b/ge/graph/passes/hccl_group_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,6 @@ namespace ge { class HcclGroupPass : public BaseNodePass { public: Status Run(NodePtr &node) override; - private: Status MarkGroupForFusedNode(NodePtr &fused_node); }; diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc old mode 100644 new mode 100755 index b8787476..21747f42 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,8 +43,7 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { } GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable), - GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); - return FAILED); + GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED); if (!node_input_mutable) { continue; } diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h old mode 100644 new mode 100755 index 44b40241..e73a5483 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ class HcclMemcpyPass : public GraphPass { std::string CheckDuplicateName(const std::string &node_name); Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, - const InDataAnchorPtr &hccl_in_anchor); + const InDataAnchorPtr &hccl_in_anchor); std::unordered_map node_num_map_; }; diff --git a/ge/graph/passes/identity_pass.cc b/ge/graph/passes/identity_pass.cc old mode 100644 new mode 100755 index 57b7c46d..5a54e391 --- a/ge/graph/passes/identity_pass.cc +++ b/ge/graph/passes/identity_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,14 +47,14 @@ Status CheckIdentityUsable(const NodePtr &node, bool &usable) { auto in_node_opdesc = in_node->GetOpDesc(); GE_CHECK_NOTNULL(in_node_opdesc); // near entrance of subgraph || near subgraph - if ((in_node->GetType() == DATA && NodeUtils::IsSubgraphInput(in_node)) || - !in_node_opdesc->GetSubgraphInstanceNames().empty()) { + if ((in_node->GetType() == DATA && NodeUtils::IsSubgraphInput(in_node)) + || !in_node_opdesc->GetSubgraphInstanceNames().empty()) { usable = true; return SUCCESS; } - GE_CHK_STATUS_RET(GetOriginalType(in_node, node_type), "Failed to get node type from node %s", - node->GetName().c_str()); + GE_CHK_STATUS_RET(GetOriginalType(in_node, node_type), + "Failed to get node type from node %s", node->GetName().c_str()); bool need_skip = (node_type != SWITCH) && (node_type != REFSWITCH) && (node_type != SWITCHN); if (need_skip) { GELOGD("skip identity %s connected to switch", node->GetName().c_str()); @@ -70,12 +70,13 @@ Status CheckIdentityUsable(const NodePtr &node, bool &usable) { auto out_node_opdesc = out_node->GetOpDesc(); GE_CHECK_NOTNULL(out_node_opdesc); // near output of subgraph || near subgraph - if (NodeUtils::IsSubgraphOutput(out_node) || !out_node_opdesc->GetSubgraphInstanceNames().empty()) { + if (NodeUtils::IsSubgraphOutput(out_node) + || !out_node_opdesc->GetSubgraphInstanceNames().empty()) { usable = true; return SUCCESS; } - GE_CHK_STATUS_RET(GetOriginalType(out_node, node_type), "Failed to get node type from node %s", - node->GetName().c_str()); + GE_CHK_STATUS_RET(GetOriginalType(out_node, node_type), + "Failed to get node type from node %s", node->GetName().c_str()); if ((node_type != MERGE) && (node_type != REFMERGE)) { GELOGD("skip identity %s connected to merge", node->GetName().c_str()); break; diff --git a/ge/graph/passes/identity_pass.h b/ge/graph/passes/identity_pass.h index a4a80efc..a0d3f032 100644 --- a/ge/graph/passes/identity_pass.h +++ b/ge/graph/passes/identity_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/infershape_pass.cc b/ge/graph/passes/infershape_pass.cc old mode 100644 new mode 100755 index cacca584..7b8f7b50 --- a/ge/graph/passes/infershape_pass.cc +++ b/ge/graph/passes/infershape_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include "analyzer/analyzer.h" #include "framework/common/util.h" #include "graph/shape_refiner.h" +#include "graph/utils/graph_utils.h" namespace ge { Status InferShapePass::Run(NodePtr &node) { @@ -29,9 +30,13 @@ Status InferShapePass::Run(NodePtr &node) { // select INFERSHAPE failed info auto graph = node->GetOwnerComputeGraph(); GE_CHECK_NOTNULL(graph); - analyzer::DataInfo analyze_info{graph->GetSessionID(), graph->GetGraphID(), analyzer::INFER_SHAPE, node, - "InferShapeFailed!"}; + auto root_graph = ge::GraphUtils::FindRootGraph(graph); + GE_CHECK_NOTNULL(root_graph); + analyzer::DataInfo analyze_info{root_graph->GetSessionID(), root_graph->GetGraphID(), + analyzer::INFER_SHAPE, node, "InferShapeFailed!"}; (void)Analyzer::GetInstance()->DoAnalyze(analyze_info); + (void)Analyzer::GetInstance()->SaveAnalyzerDataToFile(root_graph->GetSessionID(), + root_graph->GetGraphID()); GELOGE(GE_GRAPH_INFERSHAPE_FAILED, "infershape failed. node: %s", node->GetName().c_str()); return GE_GRAPH_INFERSHAPE_FAILED; diff --git a/ge/graph/passes/infershape_pass.h b/ge/graph/passes/infershape_pass.h index 9e4df9a6..30cf0472 100644 --- a/ge/graph/passes/infershape_pass.h +++ b/ge/graph/passes/infershape_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/input_output_connection_identify_pass.cc b/ge/graph/passes/input_output_connection_identify_pass.cc index 45560bf5..0d198dfb 100644 --- a/ge/graph/passes/input_output_connection_identify_pass.cc +++ b/ge/graph/passes/input_output_connection_identify_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -165,8 +165,8 @@ Status InputOutputConnectionIdentifyPass::ProcessOutputNode(const NodePtr &node, } Status InputOutputConnectionIdentifyPass::SetNodeAttrOfConnectingInputOutput( - const map> &connect_input_node_idx, - const map> &connect_output_node_idx) { + const map> &connect_input_node_idx, + const map> &connect_output_node_idx) { for (const auto &iter : connect_input_node_idx) { GE_CHECK_NOTNULL(iter.first); if (iter.first->GetOpDesc() != nullptr) { diff --git a/ge/graph/passes/input_output_connection_identify_pass.h b/ge/graph/passes/input_output_connection_identify_pass.h old mode 100644 new mode 100755 index 0dd32102..97ed315d --- a/ge/graph/passes/input_output_connection_identify_pass.h +++ b/ge/graph/passes/input_output_connection_identify_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/isolated_op_remove_pass.cc b/ge/graph/passes/isolated_op_remove_pass.cc index 152104eb..5c9093e9 100644 --- a/ge/graph/passes/isolated_op_remove_pass.cc +++ b/ge/graph/passes/isolated_op_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/isolated_op_remove_pass.h b/ge/graph/passes/isolated_op_remove_pass.h old mode 100644 new mode 100755 index f17df21a..3b7fe7d1 --- a/ge/graph/passes/isolated_op_remove_pass.h +++ b/ge/graph/passes/isolated_op_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/iterator_op_pass.cc b/ge/graph/passes/iterator_op_pass.cc index 656ed390..1ec2bba9 100644 --- a/ge/graph/passes/iterator_op_pass.cc +++ b/ge/graph/passes/iterator_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,8 +68,8 @@ Status IteratorOpPass::Run(ge::ComputeGraphPtr graph) { int64_t loop_per_iter = 0; ge::GeTensorDesc ge_tensor_desc; - Status status = - VarManager::Instance(graph->GetSessionID())->GetCurVarDesc(NODE_NAME_FLOWCTRL_LOOP_PER_ITER, ge_tensor_desc); + Status status = VarManager::Instance(graph->GetSessionID())->GetCurVarDesc(NODE_NAME_FLOWCTRL_LOOP_PER_ITER, + ge_tensor_desc); GE_IF_BOOL_EXEC(status != SUCCESS, GELOGW("Fail to Get var_desc of NODE_NAME_FLOWCTRL_LOOP_PER_ITER failed."); continue); Status ret; @@ -78,8 +78,8 @@ Status IteratorOpPass::Run(ge::ComputeGraphPtr graph) { // EOS will not be considered if ret is not SUCCESS. GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGW("Set rt context RT_CTX_NORMAL_MODE failed."); continue); - status = - GetVariableValue(graph->GetSessionID(), ge_tensor_desc, NODE_NAME_FLOWCTRL_LOOP_PER_ITER, &loop_per_iter); + status = GetVariableValue(graph->GetSessionID(), ge_tensor_desc, NODE_NAME_FLOWCTRL_LOOP_PER_ITER, + &loop_per_iter); ret = SetRtContext(graph->GetSessionID(), graph->GetGraphID(), rtContext_t(), RT_CTX_GEN_MODE); // The following process will be affected if ret is not SUCCESS. @@ -144,7 +144,8 @@ ge::NodePtr IteratorOpPass::InsertEndOfSequenceNode(const ge::NodePtr &pre_node, auto out_anchor = pre_node->GetOutDataAnchor(0); ge::graphStatus status; status = GraphUtils::AddEdge(out_anchor, end_of_seq_node->GetInDataAnchor(0)); - GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, "Graph add EndOfSequence op input edge fail, dst node: %s.", + GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, + "Graph add EndOfSequence op input edge fail, dst node: %s.", end_of_seq_node->GetName().c_str()); // EOS(control) --> subsequent of memcpy OutControlAnchorPtr out_ctrl_anchor = end_of_seq_node->GetOutControlAnchor(); @@ -157,8 +158,10 @@ ge::NodePtr IteratorOpPass::InsertEndOfSequenceNode(const ge::NodePtr &pre_node, } status = GraphUtils::AddEdge(out_ctrl_anchor, in_ctrl_anchor); GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, - "Graph add EndOfSequence op out ctrl edge fail, dst node: %s.", out_node->GetName().c_str()); - GELOGI("Graph add EndOfSequence op out ctrl edge, dst node: %s.", out_node->GetName().c_str()); + "Graph add EndOfSequence op out ctrl edge fail, dst node: %s.", + out_node->GetName().c_str()); + GELOGI("Graph add EndOfSequence op out ctrl edge, dst node: %s.", + out_node->GetName().c_str()); } return end_of_seq_node; @@ -229,19 +232,18 @@ ge::NodePtr IteratorOpPass::InsertMemcpyAsyncNode(const ge::NodePtr &pre_node, c } // Control out OutControlAnchorPtr out_ctrl_anchor = pre_node->GetOutControlAnchor(); - GE_IF_BOOL_EXEC( - out_ctrl_anchor != nullptr, for (auto &peer_in_ctrl_anchor - : out_ctrl_anchor->GetPeerInControlAnchors()) { - ge::graphStatus status = GraphUtils::RemoveEdge(out_ctrl_anchor, peer_in_ctrl_anchor); - GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, "Remove edge failed, dst node: %s.", - peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); - status = GraphUtils::AddEdge(memcpy_async_node->GetOutControlAnchor(), peer_in_ctrl_anchor); - GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, - "Graph add memcpyAsync op out ctrl edge fail, dst node: %s.", - peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); - GELOGI("Graph add memcpyAsync op out ctrl edge, dst node: %s.", - peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); - }); + GE_IF_BOOL_EXEC(out_ctrl_anchor != nullptr, + for (auto &peer_in_ctrl_anchor : out_ctrl_anchor->GetPeerInControlAnchors()) { + ge::graphStatus status = GraphUtils::RemoveEdge(out_ctrl_anchor, peer_in_ctrl_anchor); + GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, "Remove edge failed, dst node: %s.", + peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); + status = GraphUtils::AddEdge(memcpy_async_node->GetOutControlAnchor(), peer_in_ctrl_anchor); + GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, + "Graph add memcpyAsync op out ctrl edge fail, dst node: %s.", + peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); + GELOGI("Graph add memcpyAsync op out ctrl edge, dst node: %s.", + peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); + }); GELOGI("Insert memcpyAsync op success."); return memcpy_async_node; @@ -280,8 +282,8 @@ ge::OpDescPtr IteratorOpPass::CreateMemcpyAsyncOp(const ge::NodePtr &pre_node) { } Status IteratorOpPass::SetRtContext(uint64_t session_id, uint32_t graph_id, rtContext_t rt_context, rtCtxMode_t mode) { - GELOGI("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id, - static_cast(mode), ge::GetContext().DeviceId()); + GELOGI("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, + graph_id, static_cast(mode), ge::GetContext().DeviceId()); GE_CHK_RT_RET(rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId())); GE_CHK_RT_RET(rtCtxSetCurrent(rt_context)); diff --git a/ge/graph/passes/iterator_op_pass.h b/ge/graph/passes/iterator_op_pass.h index 77e80600..d9303358 100644 --- a/ge/graph/passes/iterator_op_pass.h +++ b/ge/graph/passes/iterator_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/link_gen_mask_nodes_pass.cc b/ge/graph/passes/link_gen_mask_nodes_pass.cc old mode 100644 new mode 100755 index 4f122fb2..9bd991aa --- a/ge/graph/passes/link_gen_mask_nodes_pass.cc +++ b/ge/graph/passes/link_gen_mask_nodes_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/link_gen_mask_nodes_pass.h b/ge/graph/passes/link_gen_mask_nodes_pass.h index f9979ab1..12d68f1b 100644 --- a/ge/graph/passes/link_gen_mask_nodes_pass.h +++ b/ge/graph/passes/link_gen_mask_nodes_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/mark_agnostic_pass.cc b/ge/graph/passes/mark_agnostic_pass.cc index 6f520dd8..0275bc9f 100644 --- a/ge/graph/passes/mark_agnostic_pass.cc +++ b/ge/graph/passes/mark_agnostic_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "graph/passes/mark_agnostic_pass.h" #include "utils/node_utils.h" @@ -37,4 +36,4 @@ Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { } return SUCCESS; } -} // namespace ge \ No newline at end of file +} \ No newline at end of file diff --git a/ge/graph/passes/mark_agnostic_pass.h b/ge/graph/passes/mark_agnostic_pass.h index 7fd3189d..9c581abe 100644 --- a/ge/graph/passes/mark_agnostic_pass.h +++ b/ge/graph/passes/mark_agnostic_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_MARK_AGNOSTIC_PASS_H_ #define GE_MARK_AGNOSTIC_PASS_H_ @@ -24,6 +23,6 @@ class MarkAgnosticPass : public GraphPass { public: Status Run(ComputeGraphPtr graph) override; }; -} // namespace ge +} -#endif // GE_MARK_AGNOSTIC_PASS_H_ +#endif //GE_MARK_AGNOSTIC_PASS_H_ diff --git a/ge/graph/passes/mark_graph_unknown_status_pass.cc b/ge/graph/passes/mark_graph_unknown_status_pass.cc index 7106e58c..d8f5feff 100644 --- a/ge/graph/passes/mark_graph_unknown_status_pass.cc +++ b/ge/graph/passes/mark_graph_unknown_status_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,17 +16,24 @@ #include "graph/passes/mark_graph_unknown_status_pass.h" #include "graph/utils/node_utils.h" +#include "graph/debug/ge_attr_define.h" namespace ge { Status MarkGraphUnknownStatusPass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); bool is_unknown_shape = false; + bool forced_unknown = false; for (const auto &node : graph->GetDirectNode()) { GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), "Get node[%s] shape status failed!", node->GetName().c_str()); if (is_unknown_shape) { break; } + if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, forced_unknown) && forced_unknown) { + GELOGD("node %s was marked as unknown shape.", node->GetName().c_str()); + is_unknown_shape = true; + break; + } } graph->SetGraphUnknownFlag(is_unknown_shape); GELOGD("mark graph [%s] unknown status success! value is %d", graph->GetName().c_str(), is_unknown_shape); diff --git a/ge/graph/passes/mark_graph_unknown_status_pass.h b/ge/graph/passes/mark_graph_unknown_status_pass.h index 662e321c..a1148c6e 100644 --- a/ge/graph/passes/mark_graph_unknown_status_pass.h +++ b/ge/graph/passes/mark_graph_unknown_status_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/mark_same_addr_pass.cc b/ge/graph/passes/mark_same_addr_pass.cc index 0ed151d3..2441d0bd 100644 --- a/ge/graph/passes/mark_same_addr_pass.cc +++ b/ge/graph/passes/mark_same_addr_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/mark_same_addr_pass.h b/ge/graph/passes/mark_same_addr_pass.h index ebfcf6b2..518fe418 100644 --- a/ge/graph/passes/mark_same_addr_pass.h +++ b/ge/graph/passes/mark_same_addr_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/memcpy_addr_async_pass.cc b/ge/graph/passes/memcpy_addr_async_pass.cc old mode 100644 new mode 100755 index 934f4737..3ede39a7 --- a/ge/graph/passes/memcpy_addr_async_pass.cc +++ b/ge/graph/passes/memcpy_addr_async_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -99,13 +99,13 @@ Status MemcpyAddrAsyncPass::AddMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, GELOGI("Insert memcpy_addr_async for known graph."); auto sub_graph = user_data_for_known_->GetOwnerComputeGraph(); NodePtr memcpy_addr_async_node = - CreateMemcpyAddrAsyncNode(sub_graph, peer_out_anchor_for_known_, out_of_user_data_for_known_); + CreateMemcpyAddrAsyncNode(sub_graph, peer_out_anchor_for_known_, out_of_user_data_for_known_); GE_IF_BOOL_EXEC(memcpy_addr_async_node == nullptr, GELOGE(INTERNAL_ERROR, "CreateMemcpyAddrAsyncNode for known failed."); return INTERNAL_ERROR); Status ret = - InsertMemcpyAddrAsyncNode(peer_out_anchor_for_known_, in_anchor_for_known_, memcpy_addr_async_node); + InsertMemcpyAddrAsyncNode(peer_out_anchor_for_known_, in_anchor_for_known_, memcpy_addr_async_node); GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "InsertMemcpyAddrAsyncNode for known failed."); return ret); } } @@ -136,7 +136,7 @@ void MemcpyAddrAsyncPass::FindUserDataForNonDynamic(const ge::NodePtr &parent_no OutDataAnchorPtr out_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(out_anchor == nullptr, GELOGE(INTERNAL_ERROR, "Cannot find out_anchor of %s.", parent_node->GetName().c_str()); - return ); + return); NodePtr in_node = out_anchor->GetOwnerNode(); GELOGI("in_node of parent_node is %s.", in_node->GetName().c_str()); if (in_node->GetType() == DATA) { @@ -261,7 +261,9 @@ Status MemcpyAddrAsyncPass::InsertMemAddrAsyncNodeBeforeNetoutput(const ComputeG auto in_node = NodeUtils::GetInDataNodeByIndex(*node, in_data_anchor->GetIdx()); GE_CHECK_NOTNULL(in_node); auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - if ((in_node->GetType() != CONSTANT) && (in_node->GetType() != CONSTANTOP) && (in_node->GetType() != DATA)) { + if ((in_node->GetType() != CONSTANT) && + (in_node->GetType() != CONSTANTOP) && + (in_node->GetType() != DATA)) { continue; } auto desc = in_node->GetOpDesc(); diff --git a/ge/graph/passes/memcpy_addr_async_pass.h b/ge/graph/passes/memcpy_addr_async_pass.h old mode 100644 new mode 100755 index a70fcbdd..0f22d10b --- a/ge/graph/passes/memcpy_addr_async_pass.h +++ b/ge/graph/passes/memcpy_addr_async_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/merge_pass.cc b/ge/graph/passes/merge_pass.cc index 8e691518..61aab4aa 100644 --- a/ge/graph/passes/merge_pass.cc +++ b/ge/graph/passes/merge_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -97,9 +97,9 @@ bool MergePass::IsNeedChangeIndexToConstant(NodePtr &node) const { for (const auto &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { if (peer_in_anchor != nullptr && peer_in_anchor->GetOwnerNode() != nullptr) { GELOGI( - "[%s] MergePass, value_index link to other node, " - "change it to be Constant.", - node->GetName().c_str()); + "[%s] MergePass, value_index link to other node, " + "change it to be Constant.", + node->GetName().c_str()); return true; } } @@ -159,15 +159,14 @@ Status MergePass::CreateConstByValue(NodePtr &node, int value_index, OpDescPtr & // 3. create attr value of Constant, is a tensor GeTensorPtr const_tensor_ptr = - MakeShared(original_out_tensor_desc, reinterpret_cast(&value_index), sizeof(int)); + MakeShared(original_out_tensor_desc, reinterpret_cast(&value_index), sizeof(int)); if (const_tensor_ptr == nullptr) { GELOGE(FAILED, "[%s] Make shared of Constant tensor failed.", constant_name.c_str()); return FAILED; } GE_IF_BOOL_EXEC(!AttrUtils::SetTensor(op_desc, ATTR_NAME_WEIGHTS, const_tensor_ptr), - GELOGE(FAILED, "get ATTR_NAME_WEIGHTS failed"); - return FAILED); + GELOGE(FAILED, "get ATTR_NAME_WEIGHTS failed"); return FAILED); // 4. set Constant output desc GE_CHK_STATUS_RET(op_desc->AddOutputDesc(original_out_tensor_desc), "add out put desc failed"); diff --git a/ge/graph/passes/merge_pass.h b/ge/graph/passes/merge_pass.h old mode 100644 new mode 100755 index ef586713..53582ff6 --- a/ge/graph/passes/merge_pass.h +++ b/ge/graph/passes/merge_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/merge_to_stream_merge_pass.cc b/ge/graph/passes/merge_to_stream_merge_pass.cc index 34daa681..0ff05c23 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.cc +++ b/ge/graph/passes/merge_to_stream_merge_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -134,7 +134,7 @@ Status MergeToStreamMergePass::AddMemcpyAsyncNodes(const ComputeGraphPtr &graph, GE_CHK_BOOL_EXEC(active_node != nullptr, return FAILED, "Create StreamActive node failed."); GE_CHK_STATUS(GraphUtils::AddEdge(active_node->GetOutControlAnchor(), node->GetInControlAnchor()), "StreamActive add ctrl edge failed."); - if (SetActiveLabelList(active_node, {node->GetName()}) != SUCCESS) { + if (SetActiveLabelList(active_node, { node->GetName() }) != SUCCESS) { GELOGE(FAILED, "SetActiveLabelList for node %s failed.", active_node->GetName().c_str()); return FAILED; } @@ -193,7 +193,7 @@ NodePtr MergeToStreamMergePass::CreateActiveNode(const ComputeGraphPtr &graph, c GE_CHK_BOOL_EXEC(active_node != nullptr, return nullptr, "Create StreamActive node failed."); GE_IF_BOOL_EXEC(GraphUtils::AddEdge(node->GetOutControlAnchor(), active_node->GetInControlAnchor()) != SUCCESS, GELOGE(INTERNAL_ERROR, "add edge failed"); - return nullptr); + return nullptr); GE_IF_BOOL_EXEC(SetSwitchBranchNodeLabel(active_node, node_name) != SUCCESS, GELOGE(INTERNAL_ERROR, "set switch branch node label failed"); return nullptr); diff --git a/ge/graph/passes/merge_to_stream_merge_pass.h b/ge/graph/passes/merge_to_stream_merge_pass.h index 9f713989..6eb2b22c 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.h +++ b/ge/graph/passes/merge_to_stream_merge_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc old mode 100644 new mode 100755 index 80355ca7..732844e5 --- a/ge/graph/passes/multi_batch_clone_pass.cc +++ b/ge/graph/passes/multi_batch_clone_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include "common/formats/utils/formats_trans_utils.h" #include "common/ge/ge_util.h" +#include "graph/common/local_context.h" #include "graph/preprocess/multi_batch_options.h" #include "graph/utils/node_utils.h" #include "graph/utils/op_desc_utils.h" @@ -33,6 +34,7 @@ const std::string kMultiBatchCaseNode = "ascend_mbatch_shape_case"; const std::string kMultiBatchDataNode = "ascend_mbatch_shape_data"; const std::string kMultiBatchConstNode = "ascend_mbatch_shape_const"; const std::string kMultiBatchMapIndexNode = "ascend_mbatch_shape_mapindex"; +const std::string kMultiBatchNodePostfix = "_ascend_mbatch_batch_"; } // namespace Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { @@ -53,6 +55,13 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { return INTERNAL_ERROR; } + // parser data dynamic info from atc parameter --input_shape + if (multibatch::ParserDataToDynmaicInfo(batch_shapes_, GetLocalOmgContext().user_input_dims, + data_to_dynamic_info_) != SUCCESS) { + GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed"); + return PARAM_INVALID; + } + (void)AttrUtils::GetStr(graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); ComputeGraphPtr branch = MakeShared(graph->GetName()); if (branch == nullptr) { @@ -118,8 +127,8 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { if (data_node->GetType() == DATA) { direct_output_[i] = data_node->GetName(); GE_CHK_GRAPH_STATUS_RET( - GraphUtils::RemoveEdge(data_node->GetOutDataAnchor(kDataOutIndex), output->GetInDataAnchor(i)), - "Remove edge failed"); + GraphUtils::RemoveEdge(data_node->GetOutDataAnchor(kDataOutIndex), output->GetInDataAnchor(i)), + "Remove edge failed"); } } @@ -165,6 +174,15 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { } } + std::vector data_name_order; + for (auto &item : GetLocalOmgContext().user_input_dims) { + data_name_order.push_back(item.first); + } + if (!AttrUtils::SetListStr(op_desc, ATTR_USER_DESIGNEATE_SHAPE_ORDER, data_name_order)) { + GELOGE(FAILED, "Failed to add user designate shape order attr on case node %s", + op_desc->GetName().c_str()); + return FAILED; + } GE_CHK_STATUS_RET(multibatch::StampDynamicType(op_desc), "Set dynamic type failed"); GE_CHK_STATUS_RET(CreateIndexNode(graph), "Create index node failed"); @@ -275,8 +293,8 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { OpDescBuilder op_builder(kMultiBatchMapIndexNode, "MapIndex"); op_builder.AddInput("x", data_node->GetOpDesc()->GetOutputDesc(0)) - .AddInput("data_seq", const_node->GetOpDesc()->GetOutputDesc(0)) - .AddOutput("y", GeTensorDesc(GeShape(), FORMAT_ND, DT_INT32)); + .AddInput("data_seq", const_node->GetOpDesc()->GetOutputDesc(0)) + .AddOutput("y", GeTensorDesc(GeShape(), FORMAT_ND, DT_INT32)); const OpDescPtr op_desc = op_builder.Build(); if (op_desc == nullptr) { @@ -334,8 +352,8 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { const NodePtr &data = graph->AddNode(op_desc); GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", data->GetName().c_str(), - case_node_->GetName().c_str()); + GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", + data->GetName().c_str(), case_node_->GetName().c_str()); return FAILED; } @@ -375,8 +393,8 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { const NodePtr &data = graph->AddNode(op_desc); GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", data->GetName().c_str(), - case_node_->GetName().c_str()); + GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", + data->GetName().c_str(), case_node_->GetName().c_str()); return FAILED; } all_const_nodes.emplace_back(data); @@ -391,6 +409,7 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { // Const no InputDesc, Data need InputDesc. (void)op_desc->AddInputDesc(op_desc->GetOutputDesc(kDataOutIndex)); (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index); + (void)NodeUtils::AppendInputAnchor(all_const_nodes_[i], 1); } all_const_nodes_.swap(all_const_nodes); @@ -423,8 +442,8 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { const auto it = direct_output_.find(i); if (it == direct_output_.end()) { if (GraphUtils::AddEdge(case_node_->GetOutDataAnchor(i), node->GetInDataAnchor(i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Case:%s to NetOutput:%s", case_node_->GetName().c_str(), - node->GetName().c_str()); + GELOGE(FAILED, "Failed to add edge between Case:%s to NetOutput:%s", + case_node_->GetName().c_str(), node->GetName().c_str()); return FAILED; } } else { @@ -434,8 +453,8 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { return GE_GRAPH_GRAPH_NODE_NULL; } if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(kDataOutIndex), node->GetInDataAnchor(i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Data:%s to NetOutput:%s", data_node->GetName().c_str(), - node->GetName().c_str()); + GELOGE(FAILED, "Failed to add edge between Data:%s to NetOutput:%s", + data_node->GetName().c_str(), node->GetName().c_str()); return FAILED; } } @@ -454,6 +473,7 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { /// Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); + auto data_name = data->GetName(); const auto &dims = data_shape.GetDims(); if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { return SUCCESS; @@ -464,9 +484,10 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { int64_t max_size = 0; for (size_t i = 0; i < batch_shapes_.size(); ++i) { int64_t size = 1; - for (auto dim : batch_shapes_[i]) { + for (auto dim : data_to_dynamic_info_.at(data_name).at(i)) { if (INT64_MAX / dim < size) { - GELOGE(PARAM_INVALID, "The shape %s size overflow", formats::ShapeToString(batch_shapes_[i]).c_str()); + GELOGE(PARAM_INVALID, "The shape %s size overflow", + formats::ShapeToString(data_to_dynamic_info_.at(data_name).at(i)).c_str()); return PARAM_INVALID; } size *= dim; @@ -477,17 +498,17 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { } } - return SetShapeToData(batch_shapes_[max_shape_index], data, data_shape); + return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), data, data_shape); } /// /// @ingroup ge /// @brief Set shape to Data node in branch. /// @param [in] const NodePtr &data: data in branch. -/// @param [in] const std::vector &shapes: dims of shape. +/// @param [in] size_t index: The batch index. /// @return 0: SUCCESS / others: FAILED /// -Status MultiBatchClonePass::UpdataShapeToData(const NodePtr &data, const vector &shapes) { +Status MultiBatchClonePass::UpdateShapeToData(const NodePtr &data, size_t index) { auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); const auto &dims = data_shape.GetDims(); if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { @@ -495,7 +516,16 @@ Status MultiBatchClonePass::UpdataShapeToData(const NodePtr &data, const vector< } (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims()); - return SetShapeToData(shapes, data, data_shape); + auto data_name = data->GetName(); + size_t pos = data_name.find(kMultiBatchNodePostfix); + if (pos == string::npos) { + GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.", + kMultiBatchNodePostfix.c_str(), data_name.c_str()); + return FAILED; + } + + auto parent_name = data_name.substr(0, pos); + return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(index), data, data_shape); } /// @@ -534,42 +564,38 @@ Status MultiBatchClonePass::SetShapeToData(const vector &shapes, const /// @return 0: SUCCESS / others: FAILED /// Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const ComputeGraphPtr &branch) { - const std::string name = graph->GetName() + "_branche_"; const auto &op_desc = case_node_->GetOpDesc(); for (size_t i = 0; i < batch_shapes_.size(); ++i) { std::vector input_nodes; std::vector output_nodes; - const std::string prefix = "branche_" + std::to_string(i) + "_"; - ComputeGraphPtr subgraph = (i == 0) ? branch : GraphUtils::CloneGraph(branch, prefix, input_nodes, output_nodes); + const std::string postfix = kMultiBatchNodePostfix + std::to_string(i); + ComputeGraphPtr subgraph = (i == 0) ? branch : GraphUtils::CloneGraph(branch, postfix, input_nodes, output_nodes); if (subgraph == nullptr) { GELOGE(FAILED, "Create multi-batch case node failed"); return FAILED; } - subgraph->SetName(name + std::to_string(i)); + subgraph->SetName("Batch_" + std::to_string(i)); subgraph->SetParentNode(case_node_); subgraph->SetParentGraph(graph); - (void)AttrUtils::SetStr(subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); - all_branch_output_[subgraph] = subgraph->FindFirstNodeMatchType(NETOUTPUT); - graph->AddSubgraph(subgraph->GetName(), subgraph); + all_branch_output_[subgraph] = subgraph->FindFirstNodeMatchType(NETOUTPUT); - const std::string key_name = "branches" + std::to_string(i); + const string key_name = "branches" + std::to_string(i); op_desc->AddSubgraphName(key_name); op_desc->SetSubgraphInstanceName(i, subgraph->GetName()); for (const auto &data : input_nodes) { - GE_CHK_STATUS_RET(UpdataShapeToData(data, batch_shapes_[i]), "Update %s failed", subgraph->GetName().c_str()); + GE_CHK_STATUS_RET(UpdateShapeToData(data, i), "Update %s failed", subgraph->GetName().c_str()); } } // Origninal graph take as first subgraph, update node name. for (const auto &n : branch->GetDirectNode()) { const auto &op_desc = n->GetOpDesc(); - op_desc->SetName("branche_0_" + n->GetName()); - + op_desc->SetName(n->GetName() + kMultiBatchNodePostfix + "0"); if (n->GetType() == DATA) { - GE_CHK_STATUS_RET(UpdataShapeToData(n, batch_shapes_[0]), "Update %s failed", branch->GetName().c_str()); + GE_CHK_STATUS_RET(UpdateShapeToData(n, 0), "Update %s failed", branch->GetName().c_str()); } } diff --git a/ge/graph/passes/multi_batch_clone_pass.h b/ge/graph/passes/multi_batch_clone_pass.h old mode 100644 new mode 100755 index 0d52b738..1155dfc8 --- a/ge/graph/passes/multi_batch_clone_pass.h +++ b/ge/graph/passes/multi_batch_clone_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,10 +107,10 @@ class MultiBatchClonePass : public GraphPass { /// @ingroup ge /// @brief Set shape to Data node in branch. /// @param [in] const NodePtr &data: data in branch. - /// @param [in] const std::vector &shapes: dims of shape. + /// @param [in] size_t index: The batch index. /// @return 0: SUCCESS / others: FAILED /// - Status UpdataShapeToData(const NodePtr &data, const std::vector &shapes); + Status UpdateShapeToData(const NodePtr &data, size_t index); /// /// @ingroup ge @@ -165,6 +165,7 @@ class MultiBatchClonePass : public GraphPass { std::map direct_output_; std::map all_branch_output_; + std::map>> data_to_dynamic_info_; NodePtr case_node_; }; diff --git a/ge/graph/passes/multi_batch_pass.cc b/ge/graph/passes/multi_batch_pass.cc index 32152a6f..70a09065 100644 --- a/ge/graph/passes/multi_batch_pass.cc +++ b/ge/graph/passes/multi_batch_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -95,6 +95,34 @@ Status MultiBatchPass::ClearStatus() { return SUCCESS; } +/// +/// @ingroup ge +/// @brief Set batch label for Case mode. +/// @param [in] const ComputeGraphPtr &graph: Root/Case graph. +/// @param [in] const NodePtr &case_node: Case Node. +/// @return 0: SUCCESS / others: FAILED +/// +Status MultiBatchPass::SetCaseLabel(const ComputeGraphPtr &graph, const NodePtr &case_node) { + const auto &func_desc = case_node->GetOpDesc(); + if (!func_desc->HasAttr(ATTR_NAME_BATCH_NUM)) { + GELOGD("Graph: %s Not multi-batch, Node: %s", graph->GetName().c_str(), case_node->GetName().c_str()); + return SUCCESS; + } + + const auto &dynamic_branch_names = func_desc->GetSubgraphInstanceNames(); + for (size_t i = 0; i < dynamic_branch_names.size(); ++i) { + const auto &subgraph = graph->GetSubgraph(dynamic_branch_names[i]); + GE_CHECK_NOTNULL(subgraph); + + const string batch_label = "Batch_" + std::to_string(i); + for (const auto &node : subgraph->GetDirectNode()) { + (void)AttrUtils::SetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label); + } + } + + return SUCCESS; +} + /// /// @brief Replace & Combine SwitchN nodes /// @param [in] graph @@ -103,6 +131,10 @@ Status MultiBatchPass::ClearStatus() { /// Status MultiBatchPass::FindPredValue(const ComputeGraphPtr &graph, OutDataAnchorPtr &pred_value) { for (const NodePtr &node : graph->GetDirectNode()) { + if (node->GetType() == CASE) { + GE_CHK_STATUS_RET(SetCaseLabel(graph, node), "Set batch label failed"); + continue; + } if (node->GetType() != SWITCHN) { continue; } diff --git a/ge/graph/passes/multi_batch_pass.h b/ge/graph/passes/multi_batch_pass.h index 1806229f..a714992a 100644 --- a/ge/graph/passes/multi_batch_pass.h +++ b/ge/graph/passes/multi_batch_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,8 @@ class MultiBatchPass : public GraphPass { bool CheckDims(const std::vector> &output_shape) const; NodePtr CreateSwitchCaseNode(const ComputeGraphPtr &graph, const std::string &name, - const OutDataAnchorPtr &pred_value, const std::vector> &batch_shape, + const OutDataAnchorPtr &pred_value, + const std::vector> &batch_shape, const std::vector> &combined_batch); Status BypassSwitchN(const NodePtr &switch_n_node, const NodePtr &switch_case_node); Status AttachLabel(const NodePtr &switch_case_node); @@ -53,6 +54,15 @@ class MultiBatchPass : public GraphPass { Status AttachLabelOnly(uint32_t batch_num); Status GetUserDesignateShape(); + /// + /// @ingroup ge + /// @brief Set batch label for Case mode. + /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. + /// @param [in] const NodePtr &case_node: Case Node. + /// @return 0: SUCCESS / others: FAILED + /// + Status SetCaseLabel(const ComputeGraphPtr &graph, const NodePtr &case_node); + std::vector switch_n_nodes_; std::vector bypass_nodes_; std::vector> batch_head_nodes_; diff --git a/ge/graph/passes/net_output_pass.cc b/ge/graph/passes/net_output_pass.cc index 8ded625c..e3f2b71a 100644 --- a/ge/graph/passes/net_output_pass.cc +++ b/ge/graph/passes/net_output_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,9 +34,9 @@ namespace ge { static std::map output_type_str_to_datatype = { - {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"INT8", ge::DT_INT8}, {"INT16", ge::DT_INT16}, - {"UINT16", ge::DT_UINT16}, {"UINT8", ge::DT_UINT8}, {"INT32", ge::DT_INT32}, {"INT64", ge::DT_INT64}, - {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"INT8", ge::DT_INT8}, {"INT16", ge::DT_INT16}, + {"UINT16", ge::DT_UINT16}, {"UINT8", ge::DT_UINT8}, {"INT32", ge::DT_INT32}, {"INT64", ge::DT_INT64}, + {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; // the size of user defined output datatype or format string after split by ":". const size_t kUserDefinedElementCount = 2; @@ -436,7 +436,7 @@ Status NetOutputPass::AddCtrlEdgesBetweenLeafAndNetOutput(const ge::ComputeGraph Status NetOutputPass::CreateNetOutputNode(OpDescPtr &net_output_desc, const ge::ComputeGraphPtr &graph) { // Only flush subgraph name string node_name = - (graph->GetParentGraph() != nullptr) ? (graph->GetName() + "_" + NODE_NAME_NET_OUTPUT) : NODE_NAME_NET_OUTPUT; + (graph->GetParentGraph() != nullptr) ? (graph->GetName() + "_" + NODE_NAME_NET_OUTPUT) : NODE_NAME_NET_OUTPUT; net_output_desc = MakeShared(node_name, NETOUTPUT); if (net_output_desc == nullptr) { GELOGE(MEMALLOC_FAILED, "Make shared net output op failed."); @@ -629,7 +629,7 @@ Status NetOutputPass::SetUserDefDTypeAndFormatFromAtcParams(const NodePtr &outpu GELOGD("Add user-define datatype:%s to netoutput node.", TypeUtils::DataTypeToSerialString(output_data_type).c_str()); userdef_dtypes.push_back( - std::to_string(index).append(":").append(TypeUtils::DataTypeToSerialString(output_data_type))); + std::to_string(index).append(":").append(TypeUtils::DataTypeToSerialString(output_data_type))); continue; } // Output_node is not set,check if is_output_adjust_hw_layout is set @@ -638,7 +638,7 @@ Status NetOutputPass::SetUserDefDTypeAndFormatFromAtcParams(const NodePtr &outpu // Set DT_FLOAT16 & FORMAT_NC1HWC0 userdef_dtypes.push_back(std::to_string(index).append(":").append(TypeUtils::DataTypeToSerialString(DT_FLOAT16))); userdef_formats.push_back( - std::to_string(index).append(":").append(TypeUtils::FormatToSerialString(FORMAT_NC1HWC0))); + std::to_string(index).append(":").append(TypeUtils::FormatToSerialString(FORMAT_NC1HWC0))); } } if (!userdef_dtypes.empty() && !ge::AttrUtils::SetListStr(op_desc, ATTR_ATC_USER_DEFINE_DATATYPE, userdef_dtypes)) { diff --git a/ge/graph/passes/net_output_pass.h b/ge/graph/passes/net_output_pass.h index 567d1246..b959bd96 100644 --- a/ge/graph/passes/net_output_pass.h +++ b/ge/graph/passes/net_output_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/next_iteration_pass.cc b/ge/graph/passes/next_iteration_pass.cc index 73b3b77e..5cd0f29f 100644 --- a/ge/graph/passes/next_iteration_pass.cc +++ b/ge/graph/passes/next_iteration_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/next_iteration_pass.h b/ge/graph/passes/next_iteration_pass.h old mode 100644 new mode 100755 index 6f28a618..f8223c20 --- a/ge/graph/passes/next_iteration_pass.h +++ b/ge/graph/passes/next_iteration_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/no_use_reshape_remove_pass.cc b/ge/graph/passes/no_use_reshape_remove_pass.cc index 07f58417..66a798a5 100644 --- a/ge/graph/passes/no_use_reshape_remove_pass.cc +++ b/ge/graph/passes/no_use_reshape_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/no_use_reshape_remove_pass.h b/ge/graph/passes/no_use_reshape_remove_pass.h old mode 100644 new mode 100755 index 7ca36807..c142d8d2 --- a/ge/graph/passes/no_use_reshape_remove_pass.h +++ b/ge/graph/passes/no_use_reshape_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/parallel_concat_start_op_pass.cc b/ge/graph/passes/parallel_concat_start_op_pass.cc old mode 100644 new mode 100755 index 0ac26b91..508d9b19 --- a/ge/graph/passes/parallel_concat_start_op_pass.cc +++ b/ge/graph/passes/parallel_concat_start_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/parallel_concat_start_op_pass.h b/ge/graph/passes/parallel_concat_start_op_pass.h old mode 100644 new mode 100755 index 0f6e754a..db9d235a --- a/ge/graph/passes/parallel_concat_start_op_pass.h +++ b/ge/graph/passes/parallel_concat_start_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/pass_manager.cc b/ge/graph/passes/pass_manager.cc index 5be54f0a..59ede66b 100644 --- a/ge/graph/passes/pass_manager.cc +++ b/ge/graph/passes/pass_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ #include "omg/omg_inner_types.h" namespace ge { -const vector> &PassManager::GraphPasses() const { return names_to_graph_passes_; } +const vector>& PassManager::GraphPasses() const { return names_to_graph_passes_; } Status PassManager::AddPass(const string &pass_name, GraphPass *pass) { GE_CHECK_NOTNULL(pass); @@ -53,7 +53,7 @@ Status PassManager::Run(const ComputeGraphPtr &graph, vectorGetName().c_str()); return status; } - for (const auto &subgraph : graph->GetAllSubgraphs()) { + for (const auto &subgraph :graph->GetAllSubgraphs()) { GE_CHECK_NOTNULL(subgraph); GE_CHK_STATUS_RET(pass->ClearStatus(), "pass clear status failed for subgraph %s", subgraph->GetName().c_str()); string subgraph_pass_name = pass_name + "::" + graph->GetName(); diff --git a/ge/graph/passes/pass_utils.cc b/ge/graph/passes/pass_utils.cc index a51b4e29..5359ff63 100644 --- a/ge/graph/passes/pass_utils.cc +++ b/ge/graph/passes/pass_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -93,8 +93,8 @@ Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, T *b GeTensorDesc output_tensor_desc(out_desc); output_tensor_desc.SetShape(out_shape); - GeTensorPtr output_tensor_ptr = - MakeShared(output_tensor_desc, reinterpret_cast(buf), sizeof(T) * len); + GeTensorPtr output_tensor_ptr = MakeShared( + output_tensor_desc, reinterpret_cast(buf), sizeof(T) * len); if (output_tensor_ptr == nullptr) { GELOGE(MEMALLOC_FAILED, "Make shared failed"); return MEMALLOC_FAILED; diff --git a/ge/graph/passes/pass_utils.h b/ge/graph/passes/pass_utils.h old mode 100644 new mode 100755 index b889a056..fbfb3b47 --- a/ge/graph/passes/pass_utils.h +++ b/ge/graph/passes/pass_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ class PassUtils { static Status RemoveBranch(const NodePtr &node, std::vector &delete_nodes, std::vector &end_nodes); static Status RemoveInactiveBranchToMerge(const OutDataAnchorPtr &inactive_output_anchor, - std::vector &delete_nodes, std::vector &end_nodes); + std::vector &delete_nodes, std::vector &end_nodes); /// /// check is need iter flow ctrl. diff --git a/ge/graph/passes/permute_pass.cc b/ge/graph/passes/permute_pass.cc index e55edbb2..73d9a7f1 100644 --- a/ge/graph/passes/permute_pass.cc +++ b/ge/graph/passes/permute_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ Status PermutePass::Run(ComputeGraphPtr graph) { /// Input format 5D means NHWC in 4D way. So if input origin foramt is NCHW and /// permute paramter list is [0,3,1,2], this permute can be optimised. GE_IF_BOOL_EXEC( - GetLocalOmgContext().format != DOMI_TENSOR_ND, + GetLocalOmgContext().format != DOMI_TENSOR_ND, // Get input origin foramt for (NodePtr &n : graph->GetDirectNode()) { diff --git a/ge/graph/passes/permute_pass.h b/ge/graph/passes/permute_pass.h old mode 100644 new mode 100755 index e4415b6e..9c4b911e --- a/ge/graph/passes/permute_pass.h +++ b/ge/graph/passes/permute_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/placeholder_with_default_pass.cc b/ge/graph/passes/placeholder_with_default_pass.cc index 7a72fc36..4c902322 100644 --- a/ge/graph/passes/placeholder_with_default_pass.cc +++ b/ge/graph/passes/placeholder_with_default_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/placeholder_with_default_pass.h b/ge/graph/passes/placeholder_with_default_pass.h index d48a0a5a..f2b26933 100644 --- a/ge/graph/passes/placeholder_with_default_pass.h +++ b/ge/graph/passes/placeholder_with_default_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prevent_gradient_pass.cc b/ge/graph/passes/prevent_gradient_pass.cc index 87c1b3a1..402529c3 100644 --- a/ge/graph/passes/prevent_gradient_pass.cc +++ b/ge/graph/passes/prevent_gradient_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prevent_gradient_pass.h b/ge/graph/passes/prevent_gradient_pass.h old mode 100644 new mode 100755 index 8fe02b96..f1542c22 --- a/ge/graph/passes/prevent_gradient_pass.h +++ b/ge/graph/passes/prevent_gradient_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/print_op_pass.cc b/ge/graph/passes/print_op_pass.cc old mode 100644 new mode 100755 index fba7b712..28b2332b --- a/ge/graph/passes/print_op_pass.cc +++ b/ge/graph/passes/print_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/print_op_pass.h b/ge/graph/passes/print_op_pass.h old mode 100644 new mode 100755 index 15b0badc..deaf559b --- a/ge/graph/passes/print_op_pass.h +++ b/ge/graph/passes/print_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prune_pass.cc b/ge/graph/passes/prune_pass.cc index af10c54f..f5f4cbcb 100644 --- a/ge/graph/passes/prune_pass.cc +++ b/ge/graph/passes/prune_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prune_pass.h b/ge/graph/passes/prune_pass.h old mode 100644 new mode 100755 index 4bc6f184..c8cf8247 --- a/ge/graph/passes/prune_pass.h +++ b/ge/graph/passes/prune_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/ref_identity_delete_op_pass.cc b/ge/graph/passes/ref_identity_delete_op_pass.cc index 5bc0fad6..95f710f2 100644 --- a/ge/graph/passes/ref_identity_delete_op_pass.cc +++ b/ge/graph/passes/ref_identity_delete_op_pass.cc @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #include "ref_identity_delete_op_pass.h" #include diff --git a/ge/graph/passes/ref_identity_delete_op_pass.h b/ge/graph/passes/ref_identity_delete_op_pass.h index 3e42def4..8363528e 100644 --- a/ge/graph/passes/ref_identity_delete_op_pass.h +++ b/ge/graph/passes/ref_identity_delete_op_pass.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef GE_GRAPH_PASSES_REF_IDENTITY_DELETE_OP_PASS_H_ #define GE_GRAPH_PASSES_REF_IDENTITY_DELETE_OP_PASS_H_ diff --git a/ge/graph/passes/remove_nodes_pass.cc b/ge/graph/passes/remove_nodes_pass.cc index b29d6af3..c238f003 100644 --- a/ge/graph/passes/remove_nodes_pass.cc +++ b/ge/graph/passes/remove_nodes_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "remove_nodes_pass.h" #include "debug/ge_log.h" #include "inc/framework/common/util.h" diff --git a/ge/graph/passes/remove_nodes_pass.h b/ge/graph/passes/remove_nodes_pass.h index 32acda1b..1d4fced9 100644 --- a/ge/graph/passes/remove_nodes_pass.h +++ b/ge/graph/passes/remove_nodes_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_REMOVE_NODES_PASS_H_ #define GE_REMOVE_NODES_PASS_H_ #include "graph/passes/base_pass.h" @@ -30,4 +29,4 @@ class RemoveNodesPass : public BaseNodePass { std::map> remove_node_attr_names_to_arg_; }; } // namespace ge -#endif // GE_REMOVE_NODES_PASS_H_ +#endif //GE_REMOVE_NODES_PASS_H_ diff --git a/ge/graph/passes/replace_transshape_pass.cc b/ge/graph/passes/replace_transshape_pass.cc index 28a8244d..9004df4e 100644 --- a/ge/graph/passes/replace_transshape_pass.cc +++ b/ge/graph/passes/replace_transshape_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -96,7 +96,7 @@ Status ReplaceTransShapePass::ReplaceTransShapeNode(ComputeGraphPtr &graph, Node void ReplaceTransShapePass::CopyControlEdges(NodePtr &old_node, NodePtr &new_node, bool input_check_flag) { GE_CHECK_NOTNULL_JUST_RETURN(old_node); GE_CHECK_NOTNULL_JUST_RETURN(new_node); - GE_IF_BOOL_EXEC(old_node == new_node, return ); + GE_IF_BOOL_EXEC(old_node == new_node, return); for (NodePtr &node : old_node->GetInControlNodes()) { auto out_control_anchor = node->GetOutControlAnchor(); GE_IF_BOOL_EXEC(!out_control_anchor->IsLinkedWith(new_node->GetInControlAnchor()), { @@ -133,8 +133,8 @@ void ReplaceTransShapePass::RemoveControlEdges(NodePtr &node) { } void ReplaceTransShapePass::ReplaceControlEdges(NodePtr &old_node, NodePtr &new_node) { - GE_IF_BOOL_EXEC(old_node == new_node, return ); + GE_IF_BOOL_EXEC(old_node == new_node, return); CopyControlEdges(old_node, new_node); RemoveControlEdges(old_node); } -} // namespace ge +} diff --git a/ge/graph/passes/replace_transshape_pass.h b/ge/graph/passes/replace_transshape_pass.h index 6673b11d..0620ed2d 100644 --- a/ge/graph/passes/replace_transshape_pass.h +++ b/ge/graph/passes/replace_transshape_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/replace_with_empty_const_pass.cc b/ge/graph/passes/replace_with_empty_const_pass.cc index 212b1979..171c76d0 100644 --- a/ge/graph/passes/replace_with_empty_const_pass.cc +++ b/ge/graph/passes/replace_with_empty_const_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,7 +89,7 @@ Status ReplaceWithEmptyConstPass::ReplaceWithEmptyConst(NodePtr &node_to_replace } // Repalce data anchors - for (const auto &anchor_idx : shape_2_out_idx.second) { + for (const auto &anchor_idx: shape_2_out_idx.second) { if (GraphUtils::ReplaceNodeDataAnchors(const_node, node_to_replace, {}, {anchor_idx}) != GRAPH_SUCCESS) { GELOGE(FAILED, "[%s] ReplaceNodeAnchors failed.", node_to_replace->GetName().c_str()); return FAILED; diff --git a/ge/graph/passes/replace_with_empty_const_pass.h b/ge/graph/passes/replace_with_empty_const_pass.h index 495b75b3..5083c699 100644 --- a/ge/graph/passes/replace_with_empty_const_pass.h +++ b/ge/graph/passes/replace_with_empty_const_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/reshape_recovery_pass.cc b/ge/graph/passes/reshape_recovery_pass.cc index 013c8af4..f0987ff5 100644 --- a/ge/graph/passes/reshape_recovery_pass.cc +++ b/ge/graph/passes/reshape_recovery_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "graph/passes/reshape_recovery_pass.h" #include "common/ge/ge_util.h" @@ -50,8 +49,8 @@ Status InsertReshapeIfNeed(const NodePtr &node) { GE_CHECK_NOTNULL(src_tensor); for (auto dst_anchor : src_anchor->GetPeerInDataAnchors()) { auto dst_node = dst_anchor->GetOwnerNode(); - GELOGD("Try insert reshape between %s[%d] and %s[%d] to keep the shape continues", node->GetName().c_str(), - src_anchor->GetIdx(), dst_node->GetName().c_str(), dst_anchor->GetIdx()); + GELOGD("Try insert reshape between %s[%d] and %s[%d] to keep the shape continues", + node->GetName().c_str(), src_anchor->GetIdx(), dst_node->GetName().c_str(), dst_anchor->GetIdx()); GE_CHECK_NOTNULL(dst_node); GE_CHECK_NOTNULL(dst_node->GetOpDesc()); auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); @@ -64,12 +63,12 @@ Status InsertReshapeIfNeed(const NodePtr &node) { GE_CHECK_NOTNULL(reshape); auto ret = GraphUtils::InsertNodeBetweenDataAnchors(src_anchor, dst_anchor, reshape); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to insert reshape between node %s and %s", node->GetName().c_str(), - dst_node->GetName().c_str()); + GELOGE(INTERNAL_ERROR, "Failed to insert reshape between node %s and %s", + node->GetName().c_str(), dst_node->GetName().c_str()); return INTERNAL_ERROR; } - GELOGI("Insert reshape between %s and %s to keep the shape continues", node->GetName().c_str(), - dst_node->GetName().c_str()); + GELOGI("Insert reshape between %s and %s to keep the shape continues", + node->GetName().c_str(), dst_node->GetName().c_str()); } } } diff --git a/ge/graph/passes/reshape_recovery_pass.h b/ge/graph/passes/reshape_recovery_pass.h index b3ab1baa..f16d5efb 100644 --- a/ge/graph/passes/reshape_recovery_pass.h +++ b/ge/graph/passes/reshape_recovery_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_RESHAPE_RECOVERY_PASS_H_ #define GE_RESHAPE_RECOVERY_PASS_H_ #include "inc/graph_pass.h" @@ -24,4 +23,4 @@ class ReshapeRecoveryPass : public GraphPass { }; } // namespace ge -#endif // GE_RESHAPE_RECOVERY_PASS_H_ +#endif //GE_RESHAPE_RECOVERY_PASS_H_ diff --git a/ge/graph/passes/reshape_remove_pass.cc b/ge/graph/passes/reshape_remove_pass.cc old mode 100644 new mode 100755 index 0f6d52d1..ffa6a485 --- a/ge/graph/passes/reshape_remove_pass.cc +++ b/ge/graph/passes/reshape_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,8 @@ Status ReshapeRemovePass::Run(NodePtr &node) { bool is_shape_unknown = false; if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { if (is_shape_unknown) { - GELOGI("op:%s is unknown shape, can not be deleted.", node->GetName().c_str()); + GELOGI("op:%s is unknown shape, can not be deleted.", + node->GetName().c_str()); return SUCCESS; } } diff --git a/ge/graph/passes/reshape_remove_pass.h b/ge/graph/passes/reshape_remove_pass.h index 044bbdb7..c89caf86 100644 --- a/ge/graph/passes/reshape_remove_pass.h +++ b/ge/graph/passes/reshape_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_add_control_pass.cc b/ge/graph/passes/resource_pair_add_control_pass.cc old mode 100644 new mode 100755 index bba8ee71..432bff9e --- a/ge/graph/passes/resource_pair_add_control_pass.cc +++ b/ge/graph/passes/resource_pair_add_control_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_add_control_pass.h b/ge/graph/passes/resource_pair_add_control_pass.h index 02ebd78f..5e1a4465 100644 --- a/ge/graph/passes/resource_pair_add_control_pass.h +++ b/ge/graph/passes/resource_pair_add_control_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_remove_control_pass.cc b/ge/graph/passes/resource_pair_remove_control_pass.cc old mode 100644 new mode 100755 index 00d97798..83fc7081 --- a/ge/graph/passes/resource_pair_remove_control_pass.cc +++ b/ge/graph/passes/resource_pair_remove_control_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_remove_control_pass.h b/ge/graph/passes/resource_pair_remove_control_pass.h index ab40b130..80f6b3ef 100644 --- a/ge/graph/passes/resource_pair_remove_control_pass.h +++ b/ge/graph/passes/resource_pair_remove_control_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/same_transdata_breadth_fusion_pass.cc b/ge/graph/passes/same_transdata_breadth_fusion_pass.cc index 2146a35d..5709dcb7 100644 --- a/ge/graph/passes/same_transdata_breadth_fusion_pass.cc +++ b/ge/graph/passes/same_transdata_breadth_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -223,7 +223,7 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkDataOutput2PreNode(const NodeP } graphStatus SameTransdataBreadthFusionPass::ReLinkOutDataPeerInControlNodes2PreNode( - const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor) { + const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor) { GE_CHECK_NOTNULL(pre_out_anchor); GE_CHECK_NOTNULL(transdata_node); auto transdata_peer_out_control_anchor = pre_out_anchor->GetOwnerNode()->GetOutControlAnchor(); @@ -278,8 +278,8 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkTransdataOutput2PreNode(const } graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInControlAnchors( - const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, - const OutControlAnchorPtr &transdata_peer_out_control_anchor) { + const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, + const OutControlAnchorPtr &transdata_peer_out_control_anchor) { GE_CHECK_NOTNULL(transdata_node_keep); GE_CHECK_NOTNULL(pre_out_anchor); auto out_control_anchor = transdata_node_keep->GetOutControlAnchor(); @@ -315,8 +315,8 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInControlAnchors } graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInDataAnchors( - const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, - const OutControlAnchorPtr &transdata_peer_out_control_anchor) { + const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, + const OutControlAnchorPtr &transdata_peer_out_control_anchor) { GE_CHECK_NOTNULL(transdata_node_keep); GE_CHECK_NOTNULL(pre_out_anchor); auto out_control_anchor = transdata_node_keep->GetOutControlAnchor(); @@ -354,8 +354,8 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInDataAnchors( } graphStatus SameTransdataBreadthFusionPass::ReLinkTransdataControlOutput2PreNode( - const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, - const OutControlAnchorPtr &transdata_peer_out_control_anchor) { + const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, + const OutControlAnchorPtr &transdata_peer_out_control_anchor) { if (ReLinkOutControlPeerInControlAnchors(transdata_node_keep, pre_out_anchor, transdata_peer_out_control_anchor) != GRAPH_SUCCESS) { return GRAPH_FAILED; @@ -595,8 +595,8 @@ void SameTransdataBreadthFusionPass::CopyTensorDesc(const ConstGeTensorDescPtr & } graphStatus SameTransdataBreadthFusionPass::LinkNewCastNode2RemainTransdata( - const ComputeGraphPtr &graph, const vector &same_transdata_nodes, const OutDataAnchorPtr &transdata_out_anchor, - const NodePtr &transdata_node_keep) { + const ComputeGraphPtr &graph, const vector &same_transdata_nodes, const OutDataAnchorPtr &transdata_out_anchor, + const NodePtr &transdata_node_keep) { for (size_t i = 1; i < same_transdata_nodes.size(); ++i) { int anchors_index = same_transdata_nodes[i]; bool reuse_nodes = AllNodeBeforeTransdataHasOneDataOut(anchors_index); @@ -734,8 +734,9 @@ graphStatus SameTransdataBreadthFusionPass::AddCastNode(const ComputeGraphPtr &g } graphStatus SameTransdataBreadthFusionPass::GetSubGraphsBetweenNormalAndTransdataNode( - OutDataAnchorPtr &out_anchor, std::vector>> &sub_graphs_out, - std::vector> &nodes_list) { + OutDataAnchorPtr &out_anchor, + std::vector>> &sub_graphs_out, + std::vector> &nodes_list) { graphStatus ret = GRAPH_SUCCESS; if (out_anchor == nullptr) { GELOGE(GRAPH_FAILED, "out data anchor is null!This should not happen!"); diff --git a/ge/graph/passes/same_transdata_breadth_fusion_pass.h b/ge/graph/passes/same_transdata_breadth_fusion_pass.h old mode 100644 new mode 100755 index a6a3bb26..92e559a0 --- a/ge/graph/passes/same_transdata_breadth_fusion_pass.h +++ b/ge/graph/passes/same_transdata_breadth_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,10 +34,9 @@ class SameTransdataBreadthFusionPass : public GraphPass { private: graphStatus ExtractTransNode(const ComputeGraphPtr &graph); - graphStatus GetSubGraphsBetweenNormalAndTransdataNode( - OutDataAnchorPtr &out_anchor, - std::vector>> &sub_graphs_out, - std::vector> &nodes_list); + graphStatus GetSubGraphsBetweenNormalAndTransdataNode(OutDataAnchorPtr &out_anchor, + std::vector>> &sub_graphs_out, + std::vector> &nodes_list); void GetSubGraphNodesInfo(); @@ -45,7 +44,9 @@ class SameTransdataBreadthFusionPass : public GraphPass { std::set GetInControlIdentityNodes(const NodePtr &node, int subgraph_index); OpDescPtr GetCastOp(const GeTensorDesc &in_desc, const GeTensorDesc &out_desc); - graphStatus AddCastNode(const ComputeGraphPtr &graph, int anchors_index, OutDataAnchorPtr &pre_out_anchor, + graphStatus AddCastNode(const ComputeGraphPtr &graph, + int anchors_index, + OutDataAnchorPtr &pre_out_anchor, NodePtr &first_link_node); void GetSameTransdataNode(vector &same_transdata_nodes); @@ -53,10 +54,12 @@ class SameTransdataBreadthFusionPass : public GraphPass { graphStatus ReLinkTransdataOutput2PreNode(const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor, const NodePtr &relink_node); - graphStatus RelinkTransdataControlEdge(ComputeGraphPtr graph, NodePtr transdata_node_remove, + graphStatus RelinkTransdataControlEdge(ComputeGraphPtr graph, + NodePtr transdata_node_remove, NodePtr transdata_node_keep); - graphStatus LinkNewCastNode2RemainTransdata(const ComputeGraphPtr &graph, const vector &same_transdata_nodes, + graphStatus LinkNewCastNode2RemainTransdata(const ComputeGraphPtr &graph, + const vector &same_transdata_nodes, const OutDataAnchorPtr &transdata_out_anchor, const NodePtr &transdata_node_keep); @@ -76,7 +79,8 @@ class SameTransdataBreadthFusionPass : public GraphPass { graphStatus RelinkInControlEdge(const NodePtr &node_src, const NodePtr &node_dst); - graphStatus ReLinkDataOutput2PreNode(const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor, + graphStatus ReLinkDataOutput2PreNode(const NodePtr &transdata_node, + const OutDataAnchorPtr &pre_out_anchor, const NodePtr &relink_node); graphStatus ReLinkOutDataPeerInControlNodes2PreNode(const NodePtr &transdata_node, diff --git a/ge/graph/passes/save_pass.cc b/ge/graph/passes/save_pass.cc old mode 100644 new mode 100755 index 49196206..a2e34b1d --- a/ge/graph/passes/save_pass.cc +++ b/ge/graph/passes/save_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,8 +47,7 @@ Status SavePass::Run(ge::ComputeGraphPtr graph) { out_index.emplace_back(out_anchor->GetIdx()); ge::OpDescPtr op_desc = peer_node->GetOpDesc(); GE_IF_BOOL_EXEC(!ge::AttrUtils::SetStr(op_desc, kVarAttrVarIsSave, kVarIsSave), - GELOGE(INTERNAL_ERROR, "get kVarAttrVarIsSave failed"); - return INTERNAL_ERROR); + GELOGE(INTERNAL_ERROR, "get kVarAttrVarIsSave failed"); return INTERNAL_ERROR); } } } diff --git a/ge/graph/passes/save_pass.h b/ge/graph/passes/save_pass.h old mode 100644 new mode 100755 index ce8c8a7a..512dfa62 --- a/ge/graph/passes/save_pass.h +++ b/ge/graph/passes/save_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/set_input_output_offset_pass.cc b/ge/graph/passes/set_input_output_offset_pass.cc index 58c3be85..beac831c 100644 --- a/ge/graph/passes/set_input_output_offset_pass.cc +++ b/ge/graph/passes/set_input_output_offset_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,9 +80,9 @@ Status SetInputOutputOffsetPass::SetInputOffsetForFusion(const std::vectorGetName().c_str(), parent_index, data->GetName().c_str()); } } for (const auto &data_nodes : graph_nodes) { if (data_nodes.second.size() != graph_nodes.begin()->second.size()) { - GELOGE(FAILED, "Subgraph %s has invalid Data nodes[%zu != %zu]", data_nodes.first->GetName().c_str(), - data_nodes.second.size(), graph_nodes.begin()->second.size()); + GELOGE(FAILED, "Subgraph %s has invalid Data nodes[%zu != %zu]", + data_nodes.first->GetName().c_str(), data_nodes.second.size(), graph_nodes.begin()->second.size()); return FAILED; } } @@ -195,8 +195,8 @@ bool SubexpressionMigrationPass::GetAssociatedNodes(const NodePtr &node, mapGetInDataAnchor(i); const auto &out_anchor = in_anchor->GetPeerOutAnchor(); if (out_anchor == nullptr) { - inputs[i] = kInvalidParent; - continue; + inputs[i] = kInvalidParent; + continue; } // Has none Data input node, Can not move to parent. @@ -302,7 +302,7 @@ Status SubexpressionMigrationPass::GraphNodeMigration(const ComputeGraphPtr &gra continue; } - GELOGI("Move to parent: %s", base_node->GetName().c_str()); + GELOGI("Move to parent: %s, parent index: %u", base_node->GetName().c_str(), base_idx); if (AppendParallelNode(graph_nodes, func_node, outputs) != SUCCESS) { return FAILED; } @@ -335,12 +335,12 @@ Status SubexpressionMigrationPass::AppendParallelNode(map append_num; for (auto &groups : graph_nodes) { const auto &subgraph = groups.first; auto &data_nodes = groups.second; - uint32_t data_index = data_nodes.size(); - item.second = data_index + kCaseInputBase; // Update to valid parent index. + item.second = func_node->GetAllInDataAnchorsSize() + append_num[subgraph]; // Update to valid parent index. std::string data_name = subgraph->GetName() + "_data_" + std::to_string(item.second); OpDescBuilder op_builder(data_name, DATA); @@ -350,6 +350,7 @@ Status SubexpressionMigrationPass::AppendParallelNode(mapGetName().c_str()); return FAILED; @@ -360,11 +361,13 @@ Status SubexpressionMigrationPass::AppendParallelNode(mapAddNode(op_desc); + GELOGI("Add Node: %s, parent index: %u", op_desc->GetName().c_str(), item.second); } // Add InputTensor to functional Node. - NodeUtils::AppendInputAnchor(func_node, item.second + 1); + GE_CHK_GRAPH_STATUS_RET(NodeUtils::AppendInputAnchor(func_node, item.second + 1), "Append input failed"); migration_append_ = true; } @@ -385,7 +388,7 @@ Status SubexpressionMigrationPass::DetachParallelNode(const mapGetAllInDataAnchors()) { const auto &out_anchor = in_anchor->GetPeerOutAnchor(); if (out_anchor == nullptr) { - continue; + continue; } GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); @@ -412,12 +415,12 @@ Status SubexpressionMigrationPass::DetachParallelNode(const mapGetOpDesc()->GetOutputDesc(i); const auto &data_desc = data_node->GetOpDesc(); - (void)data_desc->UpdateInputDesc(kDataOutIndex, out_desc); // Set Data Input to new connect Node. - (void)data_desc->UpdateOutputDesc(kDataOutIndex, out_desc); // Set Data Output to new connect Node. + (void)data_desc->UpdateInputDesc(kDataOutIndex, out_desc); // Set Data Input to new connect Node. + (void)data_desc->UpdateOutputDesc(kDataOutIndex, out_desc); // Set Data Output to new connect Node. for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { if (in_anchor == nullptr) { - continue; + continue; } GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); const auto &owner_node = in_anchor->GetOwnerNode(); @@ -452,7 +455,7 @@ Status SubexpressionMigrationPass::AttachParallelNode(const ComputeGraphPtr &gra GELOGE(FAILED, "Node: %s parent index %u not found", attach->GetName().c_str(), i); return FAILED; } - if (it_idx->second == kInvalidParent) { // Not connect, Skip. + if (it_idx->second == kInvalidParent) { // Not connect, Skip. continue; } @@ -468,13 +471,13 @@ Status SubexpressionMigrationPass::AttachParallelNode(const ComputeGraphPtr &gra if (it_idx == outputs.end()) { return FAILED; } - if (it_idx->second == kInvalidParent) { // Not connect, Skip. + if (it_idx->second == kInvalidParent) { // Not connect, Skip. continue; } const auto &out_desc = attach->GetOpDesc()->GetOutputDesc(i); const auto &func_desc = func_node->GetOpDesc(); - (void)func_desc->UpdateInputDesc(it_idx->second, out_desc); // Set Data Input to new connect Node. + (void)func_desc->UpdateInputDesc(it_idx->second, out_desc); // Set Data Input to new connect Node. const auto &in_anchor = func_node->GetInDataAnchor(it_idx->second); const auto &out_anchor = in_anchor->GetPeerOutAnchor(); diff --git a/ge/graph/passes/subexpression_migration_pass.h b/ge/graph/passes/subexpression_migration_pass.h old mode 100644 new mode 100755 index fbe28cae..d2733fcf --- a/ge/graph/passes/subexpression_migration_pass.h +++ b/ge/graph/passes/subexpression_migration_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,8 +25,8 @@ #include #include -using std::map; using std::set; +using std::map; namespace ge { class SubexpressionMigrationPass : public GraphPass { @@ -64,8 +64,8 @@ class SubexpressionMigrationPass : public GraphPass { /// @param [in] anchor_idx: Anchor index of node. /// @return true: Same / false: not same /// - bool IsParallelNodeSame(const map> &graph_nodes, const NodePtr &base_node, - uint32_t node_idx, uint32_t anchor_idx); + bool IsParallelNodeSame(const map> &graph_nodes, + const NodePtr &base_node, uint32_t node_idx, uint32_t anchor_idx); /// /// @ingroup ge @@ -78,8 +78,8 @@ class SubexpressionMigrationPass : public GraphPass { /// @return 0: SUCCESS / others: FAILED /// Status GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node, - map> &graph_nodes, const NodePtr &data_base, - uint32_t data_idx); + map> &graph_nodes, + const NodePtr &data_base, uint32_t data_idx); /// /// @ingroup ge @@ -104,8 +104,8 @@ class SubexpressionMigrationPass : public GraphPass { /// @param [in] outputs: Parent index of Node output. /// @return 0: SUCCESS / others: FAILED /// - Status AppendParallelNode(map> &graph_nodes, const NodePtr &func_node, - map &outputs); + Status AppendParallelNode(map> &graph_nodes, + const NodePtr &func_node, map &outputs); /// /// @ingroup ge diff --git a/ge/graph/passes/subgraph_const_migration_pass.cc b/ge/graph/passes/subgraph_const_migration_pass.cc new file mode 100644 index 00000000..d88fb878 --- /dev/null +++ b/ge/graph/passes/subgraph_const_migration_pass.cc @@ -0,0 +1,565 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "subgraph_const_migration_pass.h" + +#include "graph/utils/node_utils.h" +#include "ge_local_engine/engine/host_cpu_engine.h" +#include "graph/passes/folding_pass.h" + +namespace ge { +constexpr uint32_t kDataOutIndex = 0; +constexpr uint32_t kCaseInputBase = 1; +constexpr uint32_t kInvalidParent = 0x7fffffffU; + +bool IsSameOpNode(const NodePtr &src_node, const NodePtr &dst_node) { + if ((src_node == nullptr) && (dst_node == nullptr)) { + return true; + } + + if ((src_node == nullptr) || (dst_node == nullptr)) { + return false; + } + + if (src_node->GetType() != dst_node->GetType()) { + return false; + } + + if ((src_node->GetInControlNodes().size() != dst_node->GetInControlNodes().size()) || + (src_node->GetOutDataNodesSize() != dst_node->GetOutDataNodesSize())) { + return false; + } + + set related_parent; + const auto in_nodes = src_node->GetInControlNodes(); + for (uint32_t i = 0; i < in_nodes.size(); ++i) { + const auto owner_node = in_nodes.at(i); + uint32_t parent_index = 0; + if (!AttrUtils::GetInt(owner_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { + return false; + } + + related_parent.insert(parent_index); + } + + for (const auto &in_node : dst_node->GetInControlNodes()) { + uint32_t parent_index = 0; + if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { + return false; + } + + if (related_parent.count(parent_index) == 0) { + return false; + } + } + + return true; +} + +/*********************************************************************************************************************** + +-----------+ + | Data | + +-----------+ + | + | + +-----------+ + | Cast | + +-----------+ + | + | + +-----------+ +-----------+ +-----------+ + | TransData | | Data | | Data | + +-----------+ +-----------+ +-----------+ + \ | / + \ | / + \ | / + \ | / + +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ + | Data | | Data | | Data | | Data | | Data | | Data | | Conv2D | + +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ + \ \ | / / | | + \ \ | / / | | + \ \ | / / | | + \ \ | / / | | + \ +-----------+ / | +-----------+ + +---------------| Const |----------------+ | | Pooling | + +-----------+ | +-----------+ + \ | / + \ | / + \ +-----------+ / + +-----------------------------------| Conv2D |------+ + +-----------+ + | + | + +-----------+ + | Node | + +-----------+ +***********************************************************************************************************************/ +Status SubgraphConstMigrationPass::Run(ComputeGraphPtr graph) { + GE_CHECK_NOTNULL(graph); + if (graph->GetParentGraph() != nullptr) { + GELOGD("Subgraph %s skip the SubgraphConstMigrationPass", graph->GetName().c_str()); + return SUCCESS; + } + + GELOGD("Begin to run Subgraph Const Migration on graph: %s", graph->GetName().c_str()); + for (const auto &node : graph->GetDirectNode()) { + if (node->GetType() != CASE) { + continue; + } + + const auto &func_desc = node->GetOpDesc(); + if (!func_desc->HasAttr(ATTR_NAME_BATCH_NUM)) { + GELOGD("Not multi-batch, Skip Case: %s", node->GetName().c_str()); + continue; + } + + do { + migration_append_ = false; + map> graph_datas; + if (ClassifyDataNodes(graph, func_desc, graph_datas) != SUCCESS) { + return FAILED; + } + + if (graph_datas.empty()) { + GELOGW("Graph: %s subgraph is empty", graph->GetName().c_str()); + break; + } + + // {subgraph0, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}} + // {subgraph1, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}} + // {subgraph2, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}} + const auto base_nodes = graph_datas.begin()->second; // Need copy. + for (const auto &node_item : base_nodes) { + if (GraphNodeMigration(graph, node, graph_datas, node_item.second, node_item.first) != SUCCESS) { + return FAILED; + } + } + } while (migration_append_); + } + + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief Get all Data nodes for all subgraph. +/// @param [in] graph: Root compute graph. +/// @param [in] func_desc: functional OpDesc of Case. +/// @param [out] graph_datas: Data groups of subgraph. +/// @return 0: SUCCESS / others: FAILED +/// +Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc, + map> &graph_datas) { + for (const auto &name : func_desc->GetSubgraphInstanceNames()) { + const auto &subgraph = graph->GetSubgraph(name); + if (subgraph == nullptr) { + GELOGE(GE_GRAPH_EMPTY_SUBGRAPH, "Subgraph not found, name: %s", name.c_str()); + return GE_GRAPH_EMPTY_SUBGRAPH; + } + + auto &data_nodes = graph_datas[subgraph]; + for (auto &data : subgraph->GetDirectNode()) { + if (data->GetType() != DATA) { + continue; + } + + uint32_t parent_index = 0; + if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { + GELOGE(FAILED, "Parent index not found, name: %s", data->GetName().c_str()); + return FAILED; + } + + data_nodes[parent_index] = data; + GELOGD("%s, Parent index: %u, Data: %s", subgraph->GetName().c_str(), parent_index, data->GetName().c_str()); + } + } + + for (const auto &data_nodes : graph_datas) { + if (data_nodes.second.size() != graph_datas.begin()->second.size()) { + GELOGE(FAILED, "Subgraph %s has invalid Data nodes[%zu != %zu]", + data_nodes.first->GetName().c_str(), data_nodes.second.size(), graph_datas.begin()->second.size()); + return FAILED; + } + } + + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief Get all Data nodes for all subgraph. +/// @param [in] node: Const node of subgraph. +/// @param [out] inputs: parent index to Const. +/// @param [out] outputs: Data groups of subgraph. +/// @return true: SUCCESS / false: FAILED +/// +bool SubgraphConstMigrationPass::GetAssociatedNodes(const NodePtr &node, map &inputs, + map &outputs) { + for (uint32_t i = 0; i < node->GetAllOutDataAnchorsSize(); ++i) { + outputs[i] = kInvalidParent; + } + + uint32_t out_index = 0; + const auto in_nodes = node->GetInAllNodes(); + for (size_t i = 0; i < in_nodes.size(); ++i) { + const auto owner_node = in_nodes.at(i); + if (owner_node->GetType() != DATA) { + return false; + } + + uint32_t parent_index = 0; + if (!AttrUtils::GetInt(owner_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { + return false; + } + + // Input Data feed other Node, need add new Data. + inputs[i] = parent_index; + if ((out_index == outputs.size()) && owner_node->GetOutDataNodes().empty()) { + outputs[out_index] = parent_index; + ++out_index; + } + } + + return true; +} + +/// +/// @ingroup ge +/// @brief Get all Data nodes for all subgraph. +/// @param [in] graph_nodes: Data groups of subgraph. +/// @param [in] data_base: Data Node for migration. +/// @param [in] data_idx: Data groups of subgraph. +/// @param [in] data_idx: Data groups of subgraph. +/// @return true: Same / false: not same +/// +bool SubgraphConstMigrationPass::IsParallelNodeSame(const map> &graph_datas, + const NodePtr &const_node, uint32_t parent_index, size_t index) { + auto it = graph_datas.begin(); + for (++it; it != graph_datas.end(); ++it) { + const auto &data_nodes = it->second; + auto data_it = data_nodes.find(parent_index); + if (data_it == data_nodes.end()) { + GELOGE(FAILED, "Data: %s not fount, index: %u", const_node->GetName().c_str(), parent_index); + return false; + } + + const auto &work_data = data_it->second; + const auto &out_anchor = work_data->GetOutControlAnchor(); + const auto &in_anchors = out_anchor->GetPeerInControlAnchors(); + if (in_anchors.size() <= index || in_anchors.at(index) == nullptr) { + GELOGW("Node anchors not same, Data: %s -> %s anchor size: %zu, index: %zu", + work_data->GetName().c_str(), const_node->GetName().c_str(), in_anchors.size(), index); + return false; + } + + const auto &in_anchor = in_anchors.at(index); + const auto &work_node = in_anchor->GetOwnerNode(); + if (work_node == nullptr) { + GELOGE(FAILED, "Data: %s not found, parent: %u, index: %zu", const_node->GetName().c_str(), parent_index, index); + return false; + } + + if (!IsSameOpNode(const_node, work_node)) { + GELOGI("OpDesc not same: %s %s, parent: %u, index: %zu", + const_node->GetName().c_str(), work_node->GetName().c_str(), parent_index, index); + return false; + } + } + + return true; +} + +/// +/// @ingroup ge +/// @brief Migration subgraph Node to Root +/// @param [in] graph: Root compute graph. +/// @param [in] func_node: functional Node of Case. +/// @param [in] graph_nodes: Data groups of subgraph. +/// @param [in] data_base: Data Node for migration. +/// @param [in] data_idx: Data groups of subgraph. +/// @return 0: SUCCESS / others: FAILED +/// +Status SubgraphConstMigrationPass::GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node, + map> &graph_datas, + const NodePtr &data_node, uint32_t parent_index) { + bool can_extrapolation = false; + do { + can_extrapolation = false; + const auto &out_anchor = data_node->GetOutControlAnchor(); + const auto &in_anchors = out_anchor->GetPeerInControlAnchors(); + for (size_t i = in_anchors.size(); i > 0; --i) { + const auto &in_anchor = in_anchors.at(i - 1); + const auto &work_node = in_anchor->GetOwnerNode(); + GELOGD("Data: %s, node: %s, parent: %u, index: %zu", + data_node->GetName().c_str(), work_node->GetName().c_str(), parent_index, i); + if (work_node->GetType() != CONSTANT) { + continue; + } + + // Get associated Data, if Data feed other nodes, need append new Data. + map inputs; + map outputs; + if (!GetAssociatedNodes(work_node, inputs, outputs)) { + continue; + } + + if (!IsParallelNodeSame(graph_datas, work_node, parent_index, i - 1)) { + continue; + } + + GELOGI("Move node: %s, parent: %u, index: %zu", work_node->GetName().c_str(), parent_index, i); + if (AppendParallelNode(graph_datas, func_node, outputs) != SUCCESS) { + return FAILED; + } + + if (MoveNodeToParent(graph, func_node, graph_datas, parent_index, i - 1, inputs, outputs) != SUCCESS) { + return FAILED; + } + can_extrapolation = true; + break; + } + } while (can_extrapolation); + + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief Append Input Tensor for functional node. +/// @param [in] graph_nodes: Data groups of subgraph. +/// @param [in] func_node: functional Node of Case. +/// @param [in] outputs: Parent index of Node output. +/// @return 0: SUCCESS / others: FAILED +/// +Status SubgraphConstMigrationPass::AppendParallelNode(map> &graph_datas, + const NodePtr &func_node, map &outputs) { + // If outputs index invalid, add Data and Input Tensor. + for (auto &item : outputs) { + if (item.second != kInvalidParent) { + continue; + } + + // Add Data to subgraph. + map append_num; + for (auto &groups : graph_datas) { + const auto &subgraph = groups.first; + auto &data_nodes = groups.second; + + item.second = func_node->GetAllInDataAnchorsSize() + append_num[subgraph]; // Update to valid parent index. + const auto data_name = subgraph->GetName() + "_data_" + std::to_string(item.second); + + OpDescBuilder op_builder(data_name, DATA); + const OpDescPtr op_desc = op_builder.AddInput("x").AddOutput("y").Build(); + if (op_desc == nullptr) { + GELOGE(OUT_OF_MEMORY, "Create multi-batch subgraph data desc failed"); + return OUT_OF_MEMORY; + } + + uint32_t data_index = item.second - kCaseInputBase; + if (!AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index)) { + GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str()); + return FAILED; + } + + if (!AttrUtils::SetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, item.second)) { + GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str()); + return FAILED; + } + + append_num[subgraph]++; + data_nodes[item.second] = subgraph->AddNode(op_desc); + GELOGI("Add Node: %s, parent index: %u", op_desc->GetName().c_str(), item.second); + } + + // Add InputTensor to functional Node. + NodeUtils::AppendInputAnchor(func_node, item.second + 1); + } + + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief Delete Node from all subgraph. +/// @param [in] graph_nodes: Data groups of subgraph. +/// @param [in] detach: Node will move to parent. +/// @param [in] outputs: Parent index of Node output. +/// @return 0: SUCCESS / others: FAILED +/// +Status SubgraphConstMigrationPass::DetachParallelNode(const map &graph_datas, const NodePtr &detach, + const map &outputs) { + // Break Data and Move node. + const auto &in_anchor = detach->GetInControlAnchor(); + const auto &out_anchors = in_anchor->GetPeerOutControlAnchors(); + for (size_t i = out_anchors.size(); i > 0; --i) { + const auto &out_anchor = out_anchors.at(i - 1); + GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); + const auto &owner_node = out_anchor->GetOwnerNode(); + GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), detach->GetName().c_str()); + } + + // Break Move and follow, Link Data and follow. + for (uint32_t i = 0; i < detach->GetAllOutDataAnchorsSize(); ++i) { + auto it_idx = outputs.find(i); + if (it_idx == outputs.end()) { + GELOGE(FAILED, "Node: %s parent index %u not found", detach->GetName().c_str(), i); + return FAILED; + } + + auto it_data = graph_datas.find(it_idx->second); + if (it_data == graph_datas.end()) { + GELOGE(FAILED, "Node: %s parent index %u not found", detach->GetName().c_str(), i); + return FAILED; + } + + const auto &data_node = it_data->second; + const auto &out_anchor = detach->GetOutDataAnchor(i); + + const auto &out_desc = detach->GetOpDesc()->GetOutputDesc(i); + const auto &data_desc = data_node->GetOpDesc(); + (void)data_desc->UpdateInputDesc(kDataOutIndex, out_desc); // Set Data Input to new connect Node. + (void)data_desc->UpdateOutputDesc(kDataOutIndex, out_desc); // Set Data Output to new connect Node. + + for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { + if (in_anchor == nullptr) { + continue; + } + GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); + const auto &owner_node = in_anchor->GetOwnerNode(); + GELOGI("Remove Edge: %s %s", detach->GetName().c_str(), owner_node->GetName().c_str()); + + const auto &data_out_anchor = data_node->GetOutDataAnchor(kDataOutIndex); + GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(data_out_anchor, in_anchor), "Add edge failed"); + GELOGI("Add Edge: %s %s", data_node->GetName().c_str(), owner_node->GetName().c_str()); + } + } + + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief Move Node to Parent Graph. +/// @param [in] graph: Parent compute graph. +/// @param [in] func_node: functional Node of Case. +/// @param [in] attach: Node will move to parent. +/// @param [in] inputs: Parent index of Node input. +/// @param [in] outputs: Parent index of Node output. +/// @return 0: SUCCESS / others: FAILED +/// +Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node, + const NodePtr &attach, const map &inputs, + const map &outputs) { + GE_CHECK_NOTNULL(attach); + for (const auto item : inputs) { + if (item.second == kInvalidParent) { // Not connect, Skip. + continue; + } + + const auto &in_anchor = func_node->GetInDataAnchor(item.second); + const auto &out_anchor = in_anchor->GetPeerOutAnchor(); + const auto &owner_node = out_anchor->GetOwnerNode(); + const auto &in_control = attach->GetInControlAnchor(); + GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(owner_node->GetOutControlAnchor(), in_control), "Add edge failed"); + GELOGI("Add Edge: %s %s", owner_node->GetName().c_str(), attach->GetName().c_str()); + } + + for (const auto &item : outputs) { + const auto &func_desc = func_node->GetOpDesc(); + const auto &out_desc = attach->GetOpDesc()->GetOutputDesc(item.second); + (void)func_desc->UpdateInputDesc(item.second, out_desc); // Set Data Input to new connect Node. + + const auto &in_anchor = func_node->GetInDataAnchor(item.second); + const auto &out_anchor = in_anchor->GetPeerOutAnchor(); + if (out_anchor != nullptr) { + GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); + const auto &owner_node = out_anchor->GetOwnerNode(); + GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str()); + } + GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(attach->GetOutDataAnchor(item.first), in_anchor), "Add edge failed"); + GELOGI("Add Edge: %s %s", attach->GetName().c_str(), func_node->GetName().c_str()); + } + + (void)graph->AddNode(attach); + (void)attach->SetOwnerComputeGraph(graph); + GELOGI("Add Node: %s %s", graph->GetName().c_str(), attach->GetName().c_str()); + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief Move node to Parent graph. +/// @param [in] graph: Root compute graph. +/// @param [in] func_node: functional Node of Case. +/// @param [in] graph_nodes: Data groups of subgraph. +/// @param [in] index: anchor index of move Node. +/// @param [in] inputs: Parent index of Node input. +/// @param [in] outputs: Parent index of Node output. +/// @return 0: SUCCESS / others: FAILED +/// +Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph, const NodePtr &func_node, + const map> &graph_datas, + uint32_t parent_index, uint32_t index, + const map &inputs, + const map &outputs) { + if (inputs.empty()) { + GELOGE(FAILED, "Graph: %s, inputs is empty", graph->GetName().c_str()); + return FAILED; + } + + NodePtr move_node; + for (auto &groups : graph_datas) { + const auto &subgraph = groups.first; + const auto &data_nodes = groups.second; + auto it = data_nodes.find(parent_index); + if (it == data_nodes.end()) { + GELOGE(FAILED, "Graph: %s, Data: %u node not found", subgraph->GetName().c_str(), parent_index); + return FAILED; + } + + const auto &base_data = it->second; + const auto &out_anchor = base_data->GetOutControlAnchor(); + const auto &in_anchors = out_anchor->GetPeerInControlAnchors(); + if (in_anchors.size() <= index || in_anchors.at(index) == nullptr) { + GELOGE(FAILED, "Data: %s, anchor size: %zu, index: %u not found", + base_data->GetName().c_str(), in_anchors.size(), index); + return FAILED; + } + + const auto &in_anchor = in_anchors.at(index); + move_node = in_anchor->GetOwnerNode(); + if (move_node == nullptr) { + GELOGE(FAILED, "Data: %s not found, index: %u", base_data->GetName().c_str(), parent_index); + return FAILED; + } + + if (DetachParallelNode(data_nodes, move_node, outputs) != SUCCESS) { + GELOGE(FAILED, "Data: %s not found, index: %u", base_data->GetName().c_str(), parent_index); + return FAILED; + } + + GE_CHK_GRAPH_STATUS_RET(subgraph->RemoveNode(move_node), "Remove node failed"); + GELOGI("Remove Node: %s %s", subgraph->GetName().c_str(), move_node->GetName().c_str()); + } + + if (AttachParallelNode(graph, func_node, move_node, inputs, outputs) != SUCCESS) { + return FAILED; + } + + migration_append_ = true; + return SUCCESS; +} +} // namespace ge diff --git a/ge/graph/passes/subgraph_const_migration_pass.h b/ge/graph/passes/subgraph_const_migration_pass.h new file mode 100755 index 00000000..3c087852 --- /dev/null +++ b/ge/graph/passes/subgraph_const_migration_pass.h @@ -0,0 +1,138 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_COMMON_SUBGRAPH_CONST_MIGRATION_H_ +#define GE_COMMON_SUBGRAPH_CONST_MIGRATION_H_ + +#include "graph/types.h" +#include "inc/graph_pass.h" + +#include +#include +#include +#include + +using std::set; +using std::map; + +namespace ge { +class SubgraphConstMigrationPass : public GraphPass { + public: + Status Run(ComputeGraphPtr graph) override; + + private: + /// + /// @ingroup ge + /// @brief Get all Data nodes for all subgraph. + /// @param [in] graph: Root compute graph. + /// @param [in] func_desc: functional OpDesc of Case. + /// @param [out] graph_datas: Data groups of subgraph. + /// @return 0: SUCCESS / others: FAILED + /// + Status ClassifyDataNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc, + map> &graph_datas); + + /// + /// @ingroup ge + /// @brief Get all Data nodes for all subgraph. + /// @param [in] node: Const node of subgraph. + /// @param [in] func_desc: functional OpDesc of Case. + /// @param [out] graph_nodes: Data groups of subgraph. + /// @return true: SUCCESS / false: FAILED + /// + bool GetAssociatedNodes(const NodePtr &node, map &inputs, map &outputs); + + /// + /// @ingroup ge + /// @brief Get all Data nodes for all subgraph. + /// @param [in] graph_nodes: Data groups of subgraph. + /// @param [in] data_base: Data Node for migration. + /// @param [in] data_idx: Data groups of subgraph. + /// @param [in] data_idx: Data groups of subgraph. + /// @return true: Same / false: not same + /// + bool IsParallelNodeSame(const map> &graph_nodes, + const NodePtr &const_node, uint32_t parent_index, size_t index); + + /// + /// @ingroup ge + /// @brief Migration subgraph Node to Root + /// @param [in] graph: Root compute graph. + /// @param [in] func_node: functional Node of Case. + /// @param [in] graph_nodes: Data groups of subgraph. + /// @param [in] data_base: Data Node for migration. + /// @param [in] data_idx: Data groups of subgraph. + /// @return 0: SUCCESS / others: FAILED + /// + Status GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node, + map> &graph_nodes, + const NodePtr &data_base, uint32_t data_idx); + + /// + /// @ingroup ge + /// @brief Move node to Parent graph. + /// @param [in] graph: Root compute graph. + /// @param [in] func_node: functional Node of Case. + /// @param [in] graph_nodes: Data groups of subgraph. + /// @param [in] anchor_idx: anchor index of move Node. + /// @param [in] inputs: Parent index of Node input. + /// @param [in] outputs: Parent index of Node output. + /// @return 0: SUCCESS / others: FAILED + /// + Status MoveNodeToParent(const ComputeGraphPtr &graph, const NodePtr &func_node, + const map> &graph_nodes, + uint32_t parent_index, uint32_t anchor_idx, + const map &inputs, const map &outputs); + + /// + /// @ingroup ge + /// @brief Append Input Tensor for functional node. + /// @param [in] graph_nodes: Data groups of subgraph. + /// @param [in] func_node: functional Node of Case. + /// @param [in] outputs: Parent index of Node output. + /// @return 0: SUCCESS / others: FAILED + /// + Status AppendParallelNode(map> &graph_nodes, + const NodePtr &func_node, map &outputs); + + /// + /// @ingroup ge + /// @brief Delete Node from all subgraph. + /// @param [in] graph_nodes: Data groups of subgraph. + /// @param [in] detach: Node will move to parent. + /// @param [in] outputs: Parent index of Node output. + /// @return 0: SUCCESS / others: FAILED + /// + Status DetachParallelNode(const map &graph_datas, const NodePtr &detach, + const map &outputs); + + /// + /// @ingroup ge + /// @brief Move Node to Parent Graph. + /// @param [in] graph: Parent compute graph. + /// @param [in] func_node: functional Node of Case. + /// @param [in] attach: Node will move to parent. + /// @param [in] inputs: Parent index of Node input. + /// @param [in] outputs: Parent index of Node output. + /// @return 0: SUCCESS / others: FAILED + /// + Status AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node, const NodePtr &attach, + const map &inputs, const map &outputs); + + bool migration_append_{false}; +}; +} // namespace ge +#endif // GE_COMMON_SUBGRAPH_CONST_MIGRATION_H_ \ No newline at end of file diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc old mode 100644 new mode 100755 index fbf444fb..04e28aaf --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,7 +79,7 @@ Status SubgraphPass::SubgraphInputNode(const ComputeGraphPtr &graph, const NodeP std::vector in_anchors; for (const InDataAnchorPtr &peer_in_anchor : out_data_anchor->GetPeerInDataAnchors()) { input_continues_required_flag = - input_continues_required_flag || IsInputContinuesRequired(peer_in_anchor->GetOwnerNode()); + input_continues_required_flag || IsInputContinuesRequired(peer_in_anchor->GetOwnerNode()); in_anchors.emplace_back(peer_in_anchor); } // Data->InputContinuesRequiredOp in subgraph need memcpy. @@ -176,6 +176,9 @@ Status SubgraphPass::WhileInputNodes(const ComputeGraphPtr &graph, const NodePtr GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); NodePtr in_node = peer_out_anchor->GetOwnerNode(); GE_CHECK_NOTNULL(in_node); + if (in_node->GetType() == VARIABLE || in_node->GetType() == VARHANDLEOP || in_node->GetType() == VARIABLEV2) { + continue; + } // Input->While and Input link to other nodes need insert memcpy if (peer_out_anchor->GetPeerInDataAnchors().size() > 1) { GELOGD("Input %s of While %s links to other nodes.", in_node->GetName().c_str(), node->GetName().c_str()); @@ -259,7 +262,7 @@ Status SubgraphPass::InsertInputMemcpy(const ComputeGraphPtr &graph, const std:: for (size_t i = 0; i < data_nodes.size(); i++) { // Data node has and only has one output in_builder.AddInput("x" + std::to_string(i), data_nodes[i]->GetOpDesc()->GetOutputDesc(0)) - .AddOutput("y" + std::to_string(i), data_nodes[i]->GetOpDesc()->GetOutputDesc(0)); + .AddOutput("y" + std::to_string(i), data_nodes[i]->GetOpDesc()->GetOutputDesc(0)); } GELOGD("Insert memcpy after data_nodes of while_body %s.", graph->GetName().c_str()); NodePtr in_memcpy = graph->AddNode(in_builder.Build()); @@ -301,7 +304,7 @@ Status SubgraphPass::InsertOutputMemcpy(const ComputeGraphPtr &graph, const Node for (size_t i = 0; i < output_node->GetAllInDataAnchorsSize(); i++) { if (bypass_index.count(i) == 0) { out_builder.AddInput("x" + std::to_string(i), output_node->GetOpDesc()->GetInputDesc(i)) - .AddOutput("y" + std::to_string(i), output_node->GetOpDesc()->GetInputDesc(i)); + .AddOutput("y" + std::to_string(i), output_node->GetOpDesc()->GetInputDesc(i)); } } GELOGD("Insert memcpy before NetOutput of while_body %s.", graph->GetName().c_str()); @@ -437,8 +440,8 @@ Status SubgraphPass::InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDat NodePtr in_node = out_anchor->GetOwnerNode(); OpDescBuilder op_desc_builder(name, IDENTITY); OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) - .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) - .Build(); + .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) + .Build(); (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) { GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str()); @@ -460,8 +463,8 @@ Status SubgraphPass::InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDat Status SubgraphPass::InsertNodeBetween(const OutDataAnchorPtr &src, const std::vector &dsts, const NodePtr &insert_node, uint32_t input_index, uint32_t output_index) { if (GraphUtils::AddEdge(src, insert_node->GetInDataAnchor(input_index)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Add data_edge %s:%d->%s:%u failed.", src->GetOwnerNode()->GetName().c_str(), src->GetIdx(), - insert_node->GetName().c_str(), input_index); + GELOGE(FAILED, "Add data_edge %s:%d->%s:%u failed.", + src->GetOwnerNode()->GetName().c_str(), src->GetIdx(), insert_node->GetName().c_str(), input_index); return FAILED; } for (const auto &dst : dsts) { @@ -469,9 +472,11 @@ Status SubgraphPass::InsertNodeBetween(const OutDataAnchorPtr &src, const std::v dst->GetOwnerNode()->GetName().c_str()); if ((GraphUtils::RemoveEdge(src, dst) != GRAPH_SUCCESS) || (GraphUtils::AddEdge(insert_node->GetOutDataAnchor(output_index), dst) != GRAPH_SUCCESS)) { - GELOGE(FAILED, "Replace data_edge %s:%d->%s:%d by %s:%u->%s:%d failed.", src->GetOwnerNode()->GetName().c_str(), - src->GetIdx(), dst->GetOwnerNode()->GetName().c_str(), dst->GetIdx(), insert_node->GetName().c_str(), - output_index, dst->GetOwnerNode()->GetName().c_str(), dst->GetIdx()); + GELOGE(FAILED, "Replace data_edge %s:%d->%s:%d by %s:%u->%s:%d failed.", + src->GetOwnerNode()->GetName().c_str(), src->GetIdx(), + dst->GetOwnerNode()->GetName().c_str(), dst->GetIdx(), + insert_node->GetName().c_str(), output_index, + dst->GetOwnerNode()->GetName().c_str(), dst->GetIdx()); return FAILED; } } diff --git a/ge/graph/passes/subgraph_pass.h b/ge/graph/passes/subgraph_pass.h index 7ff2019f..6e518ace 100644 --- a/ge/graph/passes/subgraph_pass.h +++ b/ge/graph/passes/subgraph_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/switch_data_edges_bypass.cc b/ge/graph/passes/switch_data_edges_bypass.cc index d7f5d90f..ce2b715b 100644 --- a/ge/graph/passes/switch_data_edges_bypass.cc +++ b/ge/graph/passes/switch_data_edges_bypass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,7 +102,7 @@ NodePtr AddIdentityAfterNode(const NodePtr &node, int index) { } auto identity_opdesc = - MakeShared("SwitchDataEdgesByPass_Identity_" + std::to_string(identity_counter), IDENTITY); + MakeShared("SwitchDataEdgesByPass_Identity_" + std::to_string(identity_counter), IDENTITY); if (identity_opdesc == nullptr) { GELOGE(OUT_OF_MEMORY, "Failed to add identity after node %s index %d", node->GetName().c_str(), index); return nullptr; diff --git a/ge/graph/passes/switch_data_edges_bypass.h b/ge/graph/passes/switch_data_edges_bypass.h index 8c2f492a..25f71d20 100644 --- a/ge/graph/passes/switch_data_edges_bypass.h +++ b/ge/graph/passes/switch_data_edges_bypass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_SWITCH_DATA_EDGES_BYPASS_H_ #define GE_SWITCH_DATA_EDGES_BYPASS_H_ @@ -23,10 +22,9 @@ namespace ge { class SwitchDataEdgesBypass : public GraphPass { public: Status Run(ComputeGraphPtr graph) override; - private: Status BypassSwitch(const NodePtr &node); }; } // namespace ge -#endif // GE_SWITCH_DATA_EDGES_BYPASS_H_ \ No newline at end of file +#endif //GE_SWITCH_DATA_EDGES_BYPASS_H_ \ No newline at end of file diff --git a/ge/graph/passes/switch_dead_branch_elimination.cc b/ge/graph/passes/switch_dead_branch_elimination.cc index dd7ace60..9358c9c3 100644 --- a/ge/graph/passes/switch_dead_branch_elimination.cc +++ b/ge/graph/passes/switch_dead_branch_elimination.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/switch_dead_branch_elimination.h b/ge/graph/passes/switch_dead_branch_elimination.h index 4f2b9f02..fdefb5c0 100644 --- a/ge/graph/passes/switch_dead_branch_elimination.h +++ b/ge/graph/passes/switch_dead_branch_elimination.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_SWITCH_DEAD_BRANCH_ELIMINATION_H_ #define GE_GRAPH_PASSES_SWITCH_DEAD_BRANCH_ELIMINATION_H_ @@ -25,7 +24,8 @@ class SwitchDeadBranchElimination : public BaseNodePass { Status Run(NodePtr &node) override; private: - Status DeleteSwitchNode(NodePtr &node, NodePtr &pred_node, const OutDataAnchorPtr &active_out_data_anchor); + Status DeleteSwitchNode(NodePtr &node, NodePtr &pred_node, + const OutDataAnchorPtr &active_out_data_anchor); }; } // namespace ge diff --git a/ge/graph/passes/switch_logic_remove_pass.cc b/ge/graph/passes/switch_logic_remove_pass.cc index dafa3ae1..a6758e86 100644 --- a/ge/graph/passes/switch_logic_remove_pass.cc +++ b/ge/graph/passes/switch_logic_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,9 @@ char const *GetOutputNameFromIndex(int index) { return "UNKNOWN"; } -inline bool IsSwitch(const std::string &type) { return type == SWITCH || type == REFSWITCH; } +inline bool IsSwitch(const std::string &type) { + return type == SWITCH || type == REFSWITCH; +} Status GetPredNode(const NodePtr &switch_node, PredNodeAndOut &pred_node_index) { GE_CHECK_NOTNULL(switch_node); @@ -48,13 +50,16 @@ Status GetPredNode(const NodePtr &switch_node, PredNodeAndOut &pred_node_index) } auto pred_node_anchor = pred_in_anchor->GetPeerOutAnchor(); if (pred_node_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to get pred node for switch %s, node peer out anchor", + GELOGE(INTERNAL_ERROR, + "Failed to get pred node for switch %s, node peer out anchor", switch_node->GetName().c_str()); return INTERNAL_ERROR; } auto pred_node = pred_node_anchor->GetOwnerNode(); if (pred_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to get pred node for switch %s, null node", switch_node->GetName().c_str()); + GELOGE(INTERNAL_ERROR, + "Failed to get pred node for switch %s, null node", + switch_node->GetName().c_str()); return INTERNAL_ERROR; } pred_node_index.first = pred_node; @@ -106,8 +111,8 @@ Status SwitchLogicRemovePass::Run(NodePtr &node) { continue; } GELOGI("The switch nodes cascaded %s and %s have the save pred node %s, the %s can be remove", - node->GetName().c_str(), dst_node->GetName().c_str(), pred_node_and_out.first->GetName().c_str(), - dst_node->GetName().c_str()); + node->GetName().c_str(), dst_node->GetName().c_str(), + pred_node_and_out.first->GetName().c_str(), dst_node->GetName().c_str()); ret = RemoveSwitchNodeLogically(i, dst_node); if (ret != SUCCESS) { return ret; @@ -132,8 +137,8 @@ Status SwitchLogicRemovePass::RemoveSwitchNodeLogically(int parent_index, NodePt continue; } - GELOGI("Remove inactivate branch %s(%d) from switch %s", GetOutputNameFromIndex(i), i, - switch_node->GetName().c_str()); + GELOGI("Remove inactivate branch %s(%d) from switch %s", + GetOutputNameFromIndex(i), i, switch_node->GetName().c_str()); std::vector deleted_nodes; std::vector end_nodes; auto ret = PassUtils::RemoveInactiveBranchToMerge(out_anchor, deleted_nodes, end_nodes); @@ -143,18 +148,20 @@ Status SwitchLogicRemovePass::RemoveSwitchNodeLogically(int parent_index, NodePt for (auto &node : deleted_nodes) { GE_CHECK_NOTNULL(node); - GELOGD("Remove node %s from inactivate branch from switch %s", node->GetName().c_str(), - switch_node->GetName().c_str()); + GELOGD("Remove node %s from inactivate branch from switch %s", + node->GetName().c_str(), switch_node->GetName().c_str()); AddNodeDeleted(node); } for (auto &node : end_nodes) { GE_CHECK_NOTNULL(node); - GELOGD("Add end node %s to re-pass list, for inactivate branch from switch %s", node->GetName().c_str(), - switch_node->GetName().c_str()); + GELOGD("Add end node %s to re-pass list, for inactivate branch from switch %s", + node->GetName().c_str(), switch_node->GetName().c_str()); AddRePassNode(node); } } - GELOGI("Remove switch node cascaded %s, replace out index %d", switch_node->GetName().c_str(), parent_index); + GELOGI("Remove switch node cascaded %s, replace out index %d", + switch_node->GetName().c_str(), parent_index); return IsolateAndDeleteNode(switch_node, isolate_map); } } // namespace ge + diff --git a/ge/graph/passes/switch_logic_remove_pass.h b/ge/graph/passes/switch_logic_remove_pass.h index b711cc73..dc679978 100644 --- a/ge/graph/passes/switch_logic_remove_pass.h +++ b/ge/graph/passes/switch_logic_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_SWITCH_LOGIC_REMOVE_PASS_H_ #define GE_GRAPH_PASSES_SWITCH_LOGIC_REMOVE_PASS_H_ #include "graph/passes/base_pass.h" @@ -22,7 +21,6 @@ namespace ge { class SwitchLogicRemovePass : public BaseNodePass { public: Status Run(NodePtr &node) override; - private: Status RemoveSwitchNodeLogically(int parent_index, NodePtr &switch_node); }; diff --git a/ge/graph/passes/switch_to_stream_switch_pass.cc b/ge/graph/passes/switch_to_stream_switch_pass.cc index 6c0d545d..529480a6 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.cc +++ b/ge/graph/passes/switch_to_stream_switch_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,7 +85,7 @@ Status SwitchToStreamSwitchPass::CheckCycleDependence(const ComputeGraphPtr &gra NodePtr cond_node = peer_out_anchor->GetOwnerNode(); auto iter = cond_switch_map.find(cond_node); if (iter == cond_switch_map.end()) { - cond_switch_map[cond_node] = {node}; + cond_switch_map[cond_node] = { node }; } else { iter->second.emplace_back(node); } @@ -104,7 +104,7 @@ Status SwitchToStreamSwitchPass::CheckCycleDependence(const ComputeGraphPtr &gra /// @return void /// void SwitchToStreamSwitchPass::MarkCycleDependence( - const std::unordered_map> &cond_switch_map) { + const std::unordered_map> &cond_switch_map) { std::stack out_nodes; NodePtr tmp_node = nullptr; std::unordered_set visited; @@ -130,8 +130,8 @@ void SwitchToStreamSwitchPass::MarkCycleDependence( out_nodes.push(out_node); continue; } - GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS, GELOGW("set cyclic dependence attr failed."); - return ); + GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS, + GELOGW("set cyclic dependence attr failed."); return ); auto map_iter = switch_cyclic_map_.find(out_node); if (map_iter == switch_cyclic_map_.end()) { switch_cyclic_map_[out_node] = {tmp_node->GetName()}; @@ -442,7 +442,7 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph) GE_CHK_BOOL_EXEC(active_node != nullptr, return FAILED, "Create StreamActive node failed."); GE_CHK_STATUS(GraphUtils::AddEdge(cast_node->GetOutControlAnchor(), active_node->GetInControlAnchor()), "StreamActive add ctl edge failed."); - if (SetActiveLabelList(active_node, {cast_node->GetName()}) != SUCCESS) { + if (SetActiveLabelList(active_node, { cast_node->GetName() }) != SUCCESS) { GELOGE(FAILED, "Set active_label_list attr for node %s failed.", active_node->GetName().c_str()); return FAILED; } @@ -541,7 +541,8 @@ NodePtr SwitchToStreamSwitchPass::CreateCastOp(const ComputeGraphPtr &graph, con GeTensorDesc tensor_desc = cond_desc->GetOutputDesc(peer_cond_anchor->GetIdx()); tensor_desc.SetDataType(DT_BOOL); - GE_CHK_BOOL_EXEC(cast_desc->AddInputDesc(tensor_desc) == SUCCESS, return nullptr, "Cast_node add input desc failed."); + GE_CHK_BOOL_EXEC(cast_desc->AddInputDesc(tensor_desc) == SUCCESS, return nullptr, + "Cast_node add input desc failed."); tensor_desc.SetDataType(DT_INT32); GE_CHK_BOOL_EXEC(cast_desc->AddOutputDesc(tensor_desc) == SUCCESS, return nullptr, "Cast_node add output desc failed."); @@ -577,7 +578,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons auto resize_value = (int32_t)value; GeTensorDesc data_desc = op_desc->GetInputDesc(1); GeTensorPtr const_value = - MakeShared(data_desc, reinterpret_cast(&resize_value), sizeof(int32_t)); + MakeShared(data_desc, reinterpret_cast(&resize_value), sizeof(int32_t)); if (const_value == nullptr) { GELOGE(FAILED, "Create tensor failed."); return FAILED; @@ -736,7 +737,8 @@ void SwitchToStreamSwitchPass::MoveCtrlEdges(const NodePtr &old_node, const Node } } else { GE_IF_BOOL_EXEC(!out_ctrl_anchor->IsLinkedWith(new_node->GetInControlAnchor()), { - GE_CHK_STATUS(GraphUtils::AddEdge(out_ctrl_anchor, new_node->GetInControlAnchor()), "Add in ctrl edge failed."); + GE_CHK_STATUS(GraphUtils::AddEdge(out_ctrl_anchor, new_node->GetInControlAnchor()), + "Add in ctrl edge failed."); }); } GE_CHK_STATUS(GraphUtils::RemoveEdge(out_ctrl_anchor, old_node->GetInControlAnchor()), diff --git a/ge/graph/passes/switch_to_stream_switch_pass.h b/ge/graph/passes/switch_to_stream_switch_pass.h index 15fe9dce..48725230 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.h +++ b/ge/graph/passes/switch_to_stream_switch_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_breadth_fusion_pass.cc b/ge/graph/passes/transop_breadth_fusion_pass.cc index 5c754f4f..21fb1eaf 100644 --- a/ge/graph/passes/transop_breadth_fusion_pass.cc +++ b/ge/graph/passes/transop_breadth_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,9 +41,9 @@ Status TransOpBreadthFusionPass::Run(ge::ComputeGraphPtr graph) { for (auto const &id_to_trans_nodes : ids_to_trans_nodes) { if (id_to_trans_nodes.second.size() > 1) { GELOGI( - "Begin to breath fusion output trans-op-nodes for %s, " - "trans id %s, trans-op count %zu", - node->GetName().c_str(), id_to_trans_nodes.first.c_str(), id_to_trans_nodes.second.size()); + "Begin to breath fusion output trans-op-nodes for %s, " + "trans id %s, trans-op count %zu", + node->GetName().c_str(), id_to_trans_nodes.first.c_str(), id_to_trans_nodes.second.size()); graphStatus status = Fusion(id_to_trans_nodes.second, graph); if (status != GRAPH_SUCCESS) { return FAILED; diff --git a/ge/graph/passes/transop_breadth_fusion_pass.h b/ge/graph/passes/transop_breadth_fusion_pass.h old mode 100644 new mode 100755 index 8e7799e1..9a82259c --- a/ge/graph/passes/transop_breadth_fusion_pass.h +++ b/ge/graph/passes/transop_breadth_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_depth_fusion_pass.cc b/ge/graph/passes/transop_depth_fusion_pass.cc old mode 100644 new mode 100755 index afeca3c4..85106e08 --- a/ge/graph/passes/transop_depth_fusion_pass.cc +++ b/ge/graph/passes/transop_depth_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,11 +71,11 @@ graphStatus TransOpDepthFusionPass::RecursiveInDepth(const InDataAnchorPtr &dst_ temp_depth++; if (temp_depth >= max_recursive_depth) { GELOGI( - "Caution: recursive depth is become %u." - "It's abnormally to have so many trans ops between two normal ops" - "Please check your graph in detail!" - "The search terminate here and continue to another branch.", - temp_depth); + "Caution: recursive depth is become %u." + "It's abnormally to have so many trans ops between two normal ops" + "Please check your graph in detail!" + "The search terminate here and continue to another branch.", + temp_depth); temp_depth--; return GRAPH_SUCCESS; } @@ -166,7 +166,8 @@ bool TransOpDepthFusionPass::CheckNodeCanBeDeleted(const NodePtr &node) { bool is_shape_unknown = false; if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { if (is_shape_unknown) { - GELOGI("op:%s is unknown shape, can not be deleted.", node->GetName().c_str()); + GELOGI("op:%s is unknown shape, can not be deleted.", + node->GetName().c_str()); return false; } } @@ -267,15 +268,15 @@ graphStatus TransOpDepthFusionPass::RelinkEdges(const OutDataAnchorPtr &new_out_ GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(old_out_anchor, in_data_anchor), "remove edge failed"); GE_CHK_STATUS_RET(GraphUtils::AddEdge(new_out_anchor, in_data_anchor), "add edge failed"); GELOGD( - "relink edges before remove node, remove data edge between node: %s, " - "type: %s and node: %s, type: %s.", - old_out_anchor->GetOwnerNode()->GetName().c_str(), old_out_anchor->GetOwnerNode()->GetType().c_str(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); + "relink edges before remove node, remove data edge between node: %s, " + "type: %s and node: %s, type: %s.", + old_out_anchor->GetOwnerNode()->GetName().c_str(), old_out_anchor->GetOwnerNode()->GetType().c_str(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); GELOGD( - "relink edges before remove node, add data edge between node: %s, " - "type: %s and node: %s, type: %s.", - new_out_anchor->GetOwnerNode()->GetName().c_str(), new_out_anchor->GetOwnerNode()->GetType().c_str(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); + "relink edges before remove node, add data edge between node: %s, " + "type: %s and node: %s, type: %s.", + new_out_anchor->GetOwnerNode()->GetName().c_str(), new_out_anchor->GetOwnerNode()->GetType().c_str(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); bool is_linked = false; auto dst_node = in_data_anchor->GetOwnerNode(); @@ -290,10 +291,10 @@ graphStatus TransOpDepthFusionPass::RelinkEdges(const OutDataAnchorPtr &new_out_ auto in_ctrl_anchor = dst_node->GetInControlAnchor(); GE_CHK_STATUS_RET(GraphUtils::AddEdge(out_ctrl_anchor, in_ctrl_anchor), "add edge failed"); GELOGD( - "relink edges before remove node, add control edge between node: %s," - " type: %s and node: %s, type: %s.", - src_node->GetName().c_str(), src_node->GetType().c_str(), dst_node->GetName().c_str(), - dst_node->GetType().c_str()); + "relink edges before remove node, add control edge between node: %s," + " type: %s and node: %s, type: %s.", + src_node->GetName().c_str(), src_node->GetType().c_str(), dst_node->GetName().c_str(), + dst_node->GetType().c_str()); } return GRAPH_SUCCESS; } diff --git a/ge/graph/passes/transop_depth_fusion_pass.h b/ge/graph/passes/transop_depth_fusion_pass.h old mode 100644 new mode 100755 index cc449893..831e7138 --- a/ge/graph/passes/transop_depth_fusion_pass.h +++ b/ge/graph/passes/transop_depth_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc index 53c9deca..b207abe9 100644 --- a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc +++ b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,16 +66,18 @@ bool TransOpNearbyAllreduceFusionPass::IsSymmetricTransOps(const NodePtr &node1, GE_CHECK_NOTNULL_EXEC(node2_output_desc, return false); // two symmetric trans ops should have symmetric input/output datatype - GELOGD("format: nod1_input=%d, nod1_output=%d, nod2_input=%d, nod2_output=%d", node1_input_desc->GetFormat(), - node1_output_desc->GetFormat(), node2_input_desc->GetFormat(), node2_output_desc->GetFormat()); + GELOGD("format: nod1_input=%d, nod1_output=%d, nod2_input=%d, nod2_output=%d", + node1_input_desc->GetFormat(), node1_output_desc->GetFormat(), node2_input_desc->GetFormat(), + node2_output_desc->GetFormat()); if (node1_input_desc->GetFormat() != node2_output_desc->GetFormat() || node1_output_desc->GetFormat() != node2_input_desc->GetFormat()) { return false; } // two symmetric trans ops should have symmetric input/output format - GELOGD("datatype: nod1_input=%d, nod1_output=%d, nod2_input=%d, nod2_output=%d", node1_input_desc->GetDataType(), - node1_output_desc->GetDataType(), node2_input_desc->GetDataType(), node2_output_desc->GetDataType()); + GELOGD("datatype: nod1_input=%d, nod1_output=%d, nod2_input=%d, nod2_output=%d", + node1_input_desc->GetDataType(), node1_output_desc->GetDataType(), node2_input_desc->GetDataType(), + node2_output_desc->GetDataType()); if (node1_input_desc->GetDataType() != node2_output_desc->GetDataType() || node1_output_desc->GetDataType() != node2_input_desc->GetDataType()) { return false; @@ -134,8 +136,8 @@ Status TransOpNearbyAllreduceFusionPass::RemoveNearbyPairedTransOps(const NodePt GELOGI("in_node=%s, out_node=%s", in_node->GetName().c_str(), out_node->GetName().c_str()); if (!IsSymmetricTransOps(in_node, out_node)) { - GELOGD("ignore asymmetric transop %s and %s for node %s", in_node->GetName().c_str(), out_node->GetName().c_str(), - node->GetName().c_str()); + GELOGD("ignore asymmetric transop %s and %s for node %s", + in_node->GetName().c_str(), out_node->GetName().c_str(), node->GetName().c_str()); continue; } @@ -165,8 +167,8 @@ Status TransOpNearbyAllreduceFusionPass::RemoveNearbyPairedTransOps(const NodePt if (node->GetOpDesc()->UpdateOutputDesc(static_cast(i), output_desc) != GRAPH_SUCCESS) { GELOGE(FAILED, "UpdateOutputDesc"); } - GELOGI("successfully remove paired transop (%s and %s) for node %s", in_node->GetName().c_str(), - out_node->GetName().c_str(), node->GetName().c_str()); + GELOGI("successfully remove paired transop (%s and %s) for node %s", + in_node->GetName().c_str(), out_node->GetName().c_str(), node->GetName().c_str()); } GELOGI("successfully remove %zu pair of transops in total for node %s", removed_node_count, node->GetName().c_str()); return SUCCESS; diff --git a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h old mode 100644 new mode 100755 index 1cd1eeec..0cacf062 --- a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h +++ b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_symmetry_elimination_pass.cc b/ge/graph/passes/transop_symmetry_elimination_pass.cc index e217656c..9db3aea1 100644 --- a/ge/graph/passes/transop_symmetry_elimination_pass.cc +++ b/ge/graph/passes/transop_symmetry_elimination_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,9 +32,7 @@ namespace ge { Status TransOpSymmetryEliminationPass::Run(NodePtr &node) { GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); - if (white_list_op.find(node->GetType()) == white_list_op.end()) { - return SUCCESS; - } + if (white_list_op.find(node->GetType()) == white_list_op.end()) { return SUCCESS; } GELOGD("Symmetry Elimination Pass in."); for (const auto &out_anchor : node->GetAllOutDataAnchors()) { GE_CHECK_NOTNULL(out_anchor); @@ -42,9 +40,7 @@ Status TransOpSymmetryEliminationPass::Run(NodePtr &node) { GE_CHECK_NOTNULL(peer_in_anchor); GE_CHECK_NOTNULL(peer_in_anchor->GetOwnerNode()); GE_CHECK_NOTNULL(peer_in_anchor->GetOwnerNode()->GetOpDesc()); - if (!CheckCanBeEliminated(node, peer_in_anchor)) { - continue; - } + if (!CheckCanBeEliminated(node, peer_in_anchor)) { continue; } auto dst_node = peer_in_anchor->GetOwnerNode(); Status ret = EliminateTransOp(node, out_anchor, dst_node, peer_in_anchor); if (ret != SUCCESS) { @@ -76,10 +72,9 @@ bool TransOpSymmetryEliminationPass::CheckCanBeEliminated(const ge::NodePtr &src GE_CHECK_NOTNULL(src_node->GetOpDesc()); auto unknown_dims_num = GetUnknownDimsNum(src_node->GetOpDesc()->GetInputDesc(0)); if (unknown_dims_num != 0 && (unknown_dims_num == UNKNOWN_DIM_NUM || unknown_dims_num > 1)) { - GELOGD( - "Pre node %s is reshape op which input is dynamic shape and has more than one unknown dimension. " - "Ignore pass.", - src_node->GetName().c_str()); + GELOGD("Pre node %s is reshape op which input is dynamic shape and has more than one unknown dimension. " + "Ignore pass.", + src_node->GetName().c_str()); return false; } } else if (src_node->GetType() == ge::TRANSPOSED) { @@ -114,26 +109,26 @@ bool TransOpSymmetryEliminationPass::DescAreSymmetry(const NodePtr &src_node, co bool is_symmetry = true; if (src_node->GetType() == CAST && dst_node->GetType() == CAST) { bool is_format_symmetry = - (src_input_format == dst_output_format) || (dst_output_format == FORMAT_ND) || (src_input_format == FORMAT_ND); + (src_input_format == dst_output_format) || (dst_output_format == FORMAT_ND) || (src_input_format == FORMAT_ND); is_symmetry = (src_input_dtype == dst_output_dtype) && is_format_symmetry; } else { - is_symmetry = (src_input_dtype == dst_output_dtype) && (src_input_shape == dst_output_shape) && - (src_input_format == dst_output_format); + is_symmetry = (src_input_dtype == dst_output_dtype) && (src_input_shape == dst_output_shape) + && (src_input_format == dst_output_format); } if (!is_symmetry) { - GELOGD( - "Not satisfied symmetry. ignore pass.\n" - "Src node %s input type: %s format: %s shape: %s, " - "dst node %s output type: %s format: %s shape: %s. ", - src_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(src_input_dtype).c_str(), - TypeUtils::FormatToSerialString(src_input_format).c_str(), formats::ShapeToString(src_input_shape).c_str(), - dst_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(dst_output_dtype).c_str(), - TypeUtils::FormatToSerialString(dst_output_format).c_str(), formats::ShapeToString(dst_output_shape).c_str()); + GELOGD("Not satisfied symmetry. ignore pass.\n" + "Src node %s input type: %s format: %s shape: %s, " + "dst node %s output type: %s format: %s shape: %s. ", + src_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(src_input_dtype).c_str(), + TypeUtils::FormatToSerialString(src_input_format).c_str(), formats::ShapeToString(src_input_shape).c_str(), + dst_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(dst_output_dtype).c_str(), + TypeUtils::FormatToSerialString(dst_output_format).c_str(), + formats::ShapeToString(dst_output_shape).c_str()); } return is_symmetry; } -int TransOpSymmetryEliminationPass::GetUnknownDimsNum(const GeTensorDesc &node_desc) { +int TransOpSymmetryEliminationPass::GetUnknownDimsNum(const GeTensorDesc& node_desc){ // // unknown_dims_num != 0 , is dynamic shape // unknown_dims_num = UNKNOWN_DIM_NUM , all dims are unknown @@ -142,12 +137,8 @@ int TransOpSymmetryEliminationPass::GetUnknownDimsNum(const GeTensorDesc &node_d int unknown_dims_num = 0; auto ge_shape = node_desc.GetShape(); for (const auto dim : ge_shape.GetDims()) { - if (dim == UNKNOWN_DIM_NUM) { - return UNKNOWN_DIM_NUM; - } - if (dim == UNKNOWN_DIM) { - ++unknown_dims_num; - } + if (dim == UNKNOWN_DIM_NUM) { return UNKNOWN_DIM_NUM; } + if (dim == UNKNOWN_DIM) { ++unknown_dims_num; } } return unknown_dims_num; } @@ -167,16 +158,10 @@ bool TransOpSymmetryEliminationPass::JudgeTransposeDBack2Raw(const NodePtr &src_ vector dst_node_perm; (void)AttrUtils::GetListInt(dst_node->GetOpDesc(), ge::PERMUTE_ATTR_PERM, dst_node_perm); - if (src_node_perm.size() != dst_node_perm.size()) { - return false; - } + if (src_node_perm.size() != dst_node_perm.size()) { return false; } for (size_t src_index = 0; src_index < src_node_perm.size(); ++src_index) { - if (dst_node_perm[src_index] >= static_cast(src_node_perm.size())) { - return false; - } - if (static_cast(src_index) != src_node_perm[dst_node_perm[src_index]]) { - return false; - } + if (dst_node_perm[src_index] >= static_cast(src_node_perm.size())) { return false; } + if (static_cast(src_index) != src_node_perm[dst_node_perm[src_index]]) { return false; } } return true; } @@ -210,9 +195,7 @@ Status TransOpSymmetryEliminationPass::EliminateTransOp(NodePtr &src_node, const } // 4.Add control edge from T1 other input to T2, like reshape second input for (const auto &in_node : src_node->GetInDataNodes()) { - if (in_node->GetName() == pre_normal_node->GetName()) { - continue; - } + if (in_node->GetName() == pre_normal_node->GetName()) { continue; } ret = GraphUtils::AddEdge(in_node->GetOutControlAnchor(), dst_node->GetInControlAnchor()); if (ret != GRAPH_SUCCESS) { GELOGE(FAILED, "Add control edge from %s to %s failed.", in_node->GetName().c_str(), dst_node->GetName().c_str()); diff --git a/ge/graph/passes/transop_symmetry_elimination_pass.h b/ge/graph/passes/transop_symmetry_elimination_pass.h index 2c89ed48..3a80ada5 100644 --- a/ge/graph/passes/transop_symmetry_elimination_pass.h +++ b/ge/graph/passes/transop_symmetry_elimination_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_SYMMETRY_ELIMINATION_PASS_H #define GE_SYMMETRY_ELIMINATION_PASS_H @@ -48,7 +47,8 @@ class TransOpSymmetryEliminationPass : public BaseNodePass { /// @param node_desc: node to be checked /// @return 0 , is not dynamic shape; UNKNOWN_DIM_NUM , all dims are unknown; n , n > 0 , has n dims unknown /// - static int GetUnknownDimsNum(const GeTensorDesc &node_desc); + static int GetUnknownDimsNum(const GeTensorDesc& node_desc); + /// /// judge after two transposed op transform the raw data will be the same diff --git a/ge/graph/passes/transop_without_reshape_fusion_pass.cc b/ge/graph/passes/transop_without_reshape_fusion_pass.cc index 61bca6b8..c1eaf0f9 100644 --- a/ge/graph/passes/transop_without_reshape_fusion_pass.cc +++ b/ge/graph/passes/transop_without_reshape_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "graph/passes/transop_without_reshape_fusion_pass.h" #include #include diff --git a/ge/graph/passes/transop_without_reshape_fusion_pass.h b/ge/graph/passes/transop_without_reshape_fusion_pass.h old mode 100644 new mode 100755 index 4d037957..2aa2d0f7 --- a/ge/graph/passes/transop_without_reshape_fusion_pass.h +++ b/ge/graph/passes/transop_without_reshape_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_TRANSOP_WITHOUT_RESHAPE_FUSION_PASS_H_ #define GE_GRAPH_PASSES_TRANSOP_WITHOUT_RESHAPE_FUSION_PASS_H_ @@ -38,13 +37,19 @@ class TransOpWithoutReshapeFusionPass : public GraphPass { void RemoveNousedNodes(const ComputeGraphPtr &graph); void GetBeginOutDescAndEndInDesc(const int index, GeTensorDesc &out_desc, GeTensorDesc &in_desc); - void GetFormatTransferDesc(const GeTensorDesc &out_desc, const GeTensorDesc &in_desc, - GeTensorDesc &format_transfer_input, GeTensorDesc &format_transfer_output); + void GetFormatTransferDesc(const GeTensorDesc &out_desc, + const GeTensorDesc &in_desc, + GeTensorDesc &format_transfer_input, + GeTensorDesc &format_transfer_output); - void GetCastOpDesc(const GeTensorDesc &out_desc, const GeTensorDesc &in_desc, GeTensorDesc &cast_input, + void GetCastOpDesc(const GeTensorDesc &out_desc, + const GeTensorDesc &in_desc, + GeTensorDesc &cast_input, GeTensorDesc &cast_output); - graphStatus FormatFusion(const int index, OpDescPtr &format_transfer_op, int32_t &fusion_op_count, + graphStatus FormatFusion(const int index, + OpDescPtr &format_transfer_op, + int32_t &fusion_op_count, bool &fusion_continue); graphStatus DataTypeFusion(const int index, OpDescPtr &cast_op, int32_t &fusion_op_count); @@ -52,27 +57,33 @@ class TransOpWithoutReshapeFusionPass : public GraphPass { void GetOutDataPeerInControlAnchors(const size_t index, vector> &out_data_peer_in_control_anchors); - void GetInControlPeerOutControlAnchors(const size_t index, - vector> &in_control_peer_out_control_anchors); + void GetInControlPeerOutControlAnchors( + const size_t index, + vector> &in_control_peer_out_control_anchors); - void GetOutControlPeerAnchors(const size_t index, - vector> &out_control_peer_in_control_anchors, - vector> &out_control_peer_in_data_anchors); + void GetOutControlPeerAnchors( + const size_t index, + vector> &out_control_peer_in_control_anchors, + vector> &out_control_peer_in_data_anchors); graphStatus TransOpFuse(const ComputeGraphPtr &graph); bool OpAccuracyAbilityCheck(const OpDescPtr &op_desc); graphStatus GetSubGraphsBetweenNormalNode( - const OutDataAnchorPtr &out_anchor, vector>> &sub_graphs_out, - vector> &nodes_list); + const OutDataAnchorPtr &out_anchor, + vector> + >& sub_graphs_out, + vector> &nodes_list + ); graphStatus GetSubGraphNodesInfo(); void GetControlAnchors(); graphStatus InsertNewTransOp(const ComputeGraphPtr &graph, const OpDescPtr &cast_op, - const OpDescPtr &format_transfer_op, const int index, const bool insert_cast_first); + const OpDescPtr &format_transfer_op, const int index, + const bool insert_cast_first); void EraseInvalidAnchorsPair(); @@ -97,8 +108,11 @@ class TransOpWithoutReshapeFusionPass : public GraphPass { graphStatus RelinkControlEdge(const int index, const OutDataAnchorPtr &out_anchor, const vector &new_trans_nodes); - graphStatus GetTransNode(const ComputeGraphPtr &graph, const OpDescPtr &cast_op, const OpDescPtr &format_transfer_op, - const bool insert_cast_first, std::vector &new_trans_nodes); + graphStatus GetTransNode(const ComputeGraphPtr &graph, + const OpDescPtr &cast_op, + const OpDescPtr &format_transfer_op, + const bool insert_cast_first, + std::vector &new_trans_nodes); void UpdateOutputName(const OutDataAnchorPtr &out_anchor, const InDataAnchorPtr &old_peer_in_anchor, const NodePtr &in_owner_node); @@ -121,7 +135,8 @@ class TransOpWithoutReshapeFusionPass : public GraphPass { static bool FusionFormatSupport(Format format); - vector>> sub_graph_anchors_; + vector>> + sub_graph_anchors_; vector> sub_graph_nodes_; vector transop_num_count_; vector sub_graph_has_reshape_node_; @@ -135,3 +150,4 @@ class TransOpWithoutReshapeFusionPass : public GraphPass { } // namespace ge #endif // GE_GRAPH_PASSES_TRANSOP_WITHOUT_RESHAPE_FUSION_PASS_H_ + diff --git a/ge/graph/passes/transpose_transdata_pass.cc b/ge/graph/passes/transpose_transdata_pass.cc index b9bd59be..19bff563 100644 --- a/ge/graph/passes/transpose_transdata_pass.cc +++ b/ge/graph/passes/transpose_transdata_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transpose_transdata_pass.h b/ge/graph/passes/transpose_transdata_pass.h index bf42f5de..a72893f6 100644 --- a/ge/graph/passes/transpose_transdata_pass.h +++ b/ge/graph/passes/transpose_transdata_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/unused_args_clean_pass.cc b/ge/graph/passes/unused_args_clean_pass.cc old mode 100644 new mode 100755 index 62094631..83fd0438 --- a/ge/graph/passes/unused_args_clean_pass.cc +++ b/ge/graph/passes/unused_args_clean_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "unused_args_clean_pass.h" #include "graph/utils/node_utils.h" @@ -70,7 +69,7 @@ bool UnusedArgsCleanPass::UnusedInputTensor(const mapsecond; @@ -161,9 +160,13 @@ Status UnusedArgsCleanPass::UpdateInputTensor(const mapGetPeerOutAnchor(); const auto &out_node = out_anchor->GetOwnerNode(); + const auto &func_desc = func_node->GetOpDesc(); + const auto &old_desc = func_desc->GetInputDesc(parent_index); + (void)func_desc->UpdateInputDesc(update_index, old_desc); + GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(out_anchor, new_anchor), "Add edge failed"); - GELOGI("Add edge success, func node: %s, node: %s, parent index: %u, update index: %u", func_node->GetName().c_str(), - out_node->GetName().c_str(), parent_index, update_index); + GELOGI("Add edge success, func node: %s, node: %s, parent index: %u, update index: %u", + func_node->GetName().c_str(), out_node->GetName().c_str(), parent_index, update_index); GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, old_anchor), "Remove edge failed"); GELOGI("Remove edge success, func node: %s, node: %s", func_node->GetName().c_str(), out_node->GetName().c_str()); @@ -185,7 +188,7 @@ Status UnusedArgsCleanPass::RemoveInputTensor(const map #include -using std::map; using std::set; +using std::map; namespace ge { class UnusedArgsCleanPass : public GraphPass { @@ -42,8 +41,8 @@ class UnusedArgsCleanPass : public GraphPass { /// @param [in] parent_index: parent index for check. /// @return true: unused / false: used /// - bool UnusedInputTensor(const map> &graph_nodes, const NodePtr &func_node, - uint32_t parent_index); + bool UnusedInputTensor(const map> &graph_nodes, + const NodePtr &func_node, uint32_t parent_index); /// /// @ingroup ge @@ -64,8 +63,8 @@ class UnusedArgsCleanPass : public GraphPass { /// @param [in] parent_index: parent index for remove. /// @return 0: SUCCESS / others: FAILED /// - Status RemoveInputTensor(const map> &graph_nodes, const NodePtr &func_node, - uint32_t parent_index); + Status RemoveInputTensor(const map> &graph_nodes, + const NodePtr &func_node, uint32_t parent_index); /// /// @ingroup ge @@ -76,8 +75,8 @@ class UnusedArgsCleanPass : public GraphPass { /// @param [in] unused_num: unused args num. /// @return 0: SUCCESS / others: FAILED /// - Status UpdateInputTensor(const map> &graph_nodes, const NodePtr &func_node, - uint32_t parent_index, uint32_t unused_num); + Status UpdateInputTensor(const map> &graph_nodes, + const NodePtr &func_node, uint32_t parent_index, uint32_t unused_num); }; } // namespace ge #endif // GE_COMMON_CASE_ARGS_CLEAN_H_ diff --git a/ge/graph/passes/unused_const_pass.cc b/ge/graph/passes/unused_const_pass.cc index 386633b5..7c57c53e 100644 --- a/ge/graph/passes/unused_const_pass.cc +++ b/ge/graph/passes/unused_const_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/unused_const_pass.h b/ge/graph/passes/unused_const_pass.h old mode 100644 new mode 100755 index 3c7f3460..6b99f058 --- a/ge/graph/passes/unused_const_pass.h +++ b/ge/graph/passes/unused_const_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/unused_op_remove_pass.cc b/ge/graph/passes/unused_op_remove_pass.cc index 45bbc291..41f7c828 100644 --- a/ge/graph/passes/unused_op_remove_pass.cc +++ b/ge/graph/passes/unused_op_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -122,10 +122,11 @@ bool UnusedOpRemovePass::IsExceptions(const NodePtr &node) { GE_CHK_BOOL_EXEC(op_def != nullptr, return false, "opdesc is nullptr"); // permute optimised in permute_pass.cpp if (op_def->GetType() == PERMUTE) { - GE_IF_BOOL_EXEC((node->GetInDataNodes().size() != 0 && - (node->GetInDataNodes().at(0) != nullptr && node->GetInDataNodes().at(0)->GetOpDesc() != nullptr && - node->GetInDataNodes().at(0)->GetOpDesc()->GetType() == ATTENTIONDECODER)), - return false); + GE_IF_BOOL_EXEC( + (node->GetInDataNodes().size() != 0 && + (node->GetInDataNodes().at(0) != nullptr && node->GetInDataNodes().at(0)->GetOpDesc() != nullptr && + node->GetInDataNodes().at(0)->GetOpDesc()->GetType() == ATTENTIONDECODER)), + return false); return true; } return false; diff --git a/ge/graph/passes/unused_op_remove_pass.h b/ge/graph/passes/unused_op_remove_pass.h old mode 100644 new mode 100755 index bbc43af5..b9429cfd --- a/ge/graph/passes/unused_op_remove_pass.h +++ b/ge/graph/passes/unused_op_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/var_is_initialized_op_pass.cc b/ge/graph/passes/var_is_initialized_op_pass.cc index 73456a7b..b9c752d8 100644 --- a/ge/graph/passes/var_is_initialized_op_pass.cc +++ b/ge/graph/passes/var_is_initialized_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,10 +48,12 @@ Status VarIsInitializedOpPass::Run(NodePtr &node) { if (CheckSrcNode(node, inited) != SUCCESS) { return FAILED; } - GELOGI("The variable inited status %s on node %s", inited ? "true" : "false", node->GetName().c_str()); + GELOGI("The variable inited status %s on node %s", + inited ? "true" : "false", node->GetName().c_str()); ret = ChangeNodeToConstant(node, inited); - GELOGI("Change VarIsInitializedOp %s to be Constant %s end.", node->GetName().c_str(), inited ? "true" : "false"); + GELOGI("Change VarIsInitializedOp %s to be Constant %s end.", + node->GetName().c_str(), inited ? "true" : "false"); return ret; } @@ -59,7 +61,9 @@ Status VarIsInitializedOpPass::CheckSrcNode(const NodePtr &node, bool &inited) c GE_CHECK_NOTNULL(node); auto input_nodes = node->GetInDataNodes(); if (input_nodes.size() != kVarIsInitializedIOCnt) { - GELOGE(FAILED, "[%s] Node input data nodes size [%zu] is not equal 1.", node->GetName().c_str(), + GELOGE(FAILED, + "[%s] Node input data nodes size [%zu] is not equal 1.", + node->GetName().c_str(), input_nodes.size()); return FAILED; } @@ -125,7 +129,8 @@ Status VarIsInitializedOpPass::ProcessInAnchor(NodePtr &node, NodePtr &new_node) GE_CHECK_NOTNULL(new_node); auto in_anchors = node->GetAllInDataAnchors(); auto out_anchors = node->GetAllOutDataAnchors(); - if ((in_anchors.size() != kVarIsInitializedIOCnt) || (out_anchors.size() != kVarIsInitializedIOCnt)) { + if ((in_anchors.size() != kVarIsInitializedIOCnt) || + (out_anchors.size() != kVarIsInitializedIOCnt)) { GELOGE(FAILED, "[%s] Node input/output data anchors" " size [%lu][%lu] is not all equal 1.", @@ -144,8 +149,8 @@ Status VarIsInitializedOpPass::ProcessInAnchor(NodePtr &node, NodePtr &new_node) } auto src_node = peer_out_anchor->GetOwnerNode(); if (GraphUtils::AddEdge(src_node->GetOutControlAnchor(), new_node->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to link control edges from var %s to new const %s", src_node->GetName().c_str(), - new_node->GetName().c_str()); + GELOGE(FAILED, "Failed to link control edges from var %s to new const %s", + src_node->GetName().c_str(), new_node->GetName().c_str()); return FAILED; } @@ -248,15 +253,15 @@ Status VarIsInitializedOpPass::UpdateInitedVars(const NodePtr &node) { if (inited_vars != nullptr) { GE_CHECK_NOTNULL(node->GetOpDesc()); nodes_to_inited_vars_[node->GetOpDesc()->GetId()] = inited_vars; - GELOGD("Inited vars on this graph when node %s, inited vars count %zu", node->GetName().c_str(), - inited_vars->size()); + GELOGD("Inited vars on this graph when node %s, inited vars count %zu", + node->GetName().c_str(), inited_vars->size()); } return SUCCESS; } std::set *VarIsInitializedOpPass::CreateInitedVars() { - std::unique_ptr> inited_vars_keeper(new (std::nothrow) std::set()); + std::unique_ptr> inited_vars_keeper(new(std::nothrow) std::set()); if (inited_vars_keeper == nullptr) { GELOGE(OUT_OF_MEMORY, "Failed to alloc set memory"); return nullptr; diff --git a/ge/graph/passes/var_is_initialized_op_pass.h b/ge/graph/passes/var_is_initialized_op_pass.h old mode 100644 new mode 100755 index 37b3f49b..9cfa7b99 --- a/ge/graph/passes/var_is_initialized_op_pass.h +++ b/ge/graph/passes/var_is_initialized_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_format_pass.cc b/ge/graph/passes/variable_format_pass.cc index 28f6a4f7..bd5300a5 100644 --- a/ge/graph/passes/variable_format_pass.cc +++ b/ge/graph/passes/variable_format_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_format_pass.h b/ge/graph/passes/variable_format_pass.h old mode 100644 new mode 100755 index 1a0abe2e..e2c32903 --- a/ge/graph/passes/variable_format_pass.h +++ b/ge/graph/passes/variable_format_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_op_pass.cc b/ge/graph/passes/variable_op_pass.cc index 8c34cd36..f1843d94 100644 --- a/ge/graph/passes/variable_op_pass.cc +++ b/ge/graph/passes/variable_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,9 +73,9 @@ Status ByPassTransNode(NodePtr &trans_node, NodePtr &ref_node) { auto prev_trans_node_out_anchor = trans_in_anchor->GetPeerOutAnchor(); if (prev_trans_node_out_anchor == nullptr) { GELOGW( - "The trans node %s does not have an input, so the ref node %s does" - " not have any inputs after bypass", - trans_node->GetName().c_str(), trans_node->GetName().c_str()); + "The trans node %s does not have an input, so the ref node %s does" + " not have any inputs after bypass", + trans_node->GetName().c_str(), trans_node->GetName().c_str()); } else { ret = GraphUtils::AddEdge(prev_trans_node_out_anchor, ref_in_anchor); if (ret != GRAPH_SUCCESS) { @@ -162,14 +162,14 @@ Status VariableOpPass::Run(ge::ComputeGraphPtr graph) { auto start_iter = fusion_road.begin(); auto end_iter = fusion_road.rbegin(); GELOGD( - "Trans variable data for %s from format %s to %s, shape %s to %s " - "data-type %s to %s, path len %zu success", - node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(), - TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(), - formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(), - formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(), - TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(), - TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size()); + "Trans variable data for %s from format %s to %s, shape %s to %s " + "data-type %s to %s, path len %zu success", + node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(), + TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(), + formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(), + formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(), + TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(), + TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size()); ret = VarManager::Instance(graph->GetSessionID())->SetTransRoad(node->GetName(), fusion_road); if (ret != SUCCESS) { @@ -230,9 +230,9 @@ Status VariableOpPass::DealFusion(const ge::NodePtr &var_node) { trans_node->GetType().c_str(), var_node->GetName().c_str()); if (trans_node->GetOutDataNodes().size() > 1) { GELOGD( - "The trans node %s type %s connecting with var-ref %s has more" - " than one output data nodes, unlink the edge between them", - trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); + "The trans node %s type %s connecting with var-ref %s has more" + " than one output data nodes, unlink the edge between them", + trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); if (ByPassTransNode(trans_node, ref_node) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to bypass trans node %s to ref %s", trans_node->GetName().c_str(), ref_node->GetName().c_str()); @@ -240,9 +240,9 @@ Status VariableOpPass::DealFusion(const ge::NodePtr &var_node) { } } else { GELOGD( - "The trans node %s type %s connecting with var-ref %s has only" - " one output data nodes, isolate and remove it.", - trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); + "The trans node %s type %s connecting with var-ref %s has only" + " one output data nodes, isolate and remove it.", + trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); if (GraphUtils::IsolateNode(trans_node, {0}) != SUCCESS) { return GE_GRAPH_VARIABLE_OP_PASS_FAILED; } @@ -280,9 +280,9 @@ Status VariableOpPass::CheckSameAndTransOp(const ge::NodePtr &var_node, bool &is } if (data_index != in_anchor->GetIdx()) { GELOGD( - "Variables only can be fusion with trans nodes, the next node %s" - " type %s index %d does not trans anything(correct index %d)", - out_node->GetName().c_str(), out_node->GetType().c_str(), in_anchor->GetIdx(), data_index); + "Variables only can be fusion with trans nodes, the next node %s" + " type %s index %d does not trans anything(correct index %d)", + out_node->GetName().c_str(), out_node->GetType().c_str(), in_anchor->GetIdx(), data_index); return SUCCESS; } @@ -312,9 +312,9 @@ Status VariableOpPass::CheckSameAndTransOp(const ge::NodePtr &var_node, bool &is } GELOGW( - "trans_op type size for var Node(%s) is over 1, Currently not" - " supported, dataTypeAndFormats is %s.", - var_node->GetName().c_str(), type_and_formats_stream.str().c_str()); + "trans_op type size for var Node(%s) is over 1, Currently not" + " supported, dataTypeAndFormats is %s.", + var_node->GetName().c_str(), type_and_formats_stream.str().c_str()); return SUCCESS; } @@ -591,7 +591,7 @@ Status VariableOpPass::RenewVarDesc(ge::ComputeGraphPtr &graph) { Status ret = SUCCESS; for (auto &node : graph->GetDirectNode()) { bool is_var_node = - (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP); + (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP); if (is_var_node) { if (!ge::VarManager::Instance(graph->GetSessionID())->IsVarExist(node->GetName())) { GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); diff --git a/ge/graph/passes/variable_op_pass.h b/ge/graph/passes/variable_op_pass.h old mode 100644 new mode 100755 index e17980e9..3b18882c --- a/ge/graph/passes/variable_op_pass.h +++ b/ge/graph/passes/variable_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_prepare_op_pass.cc b/ge/graph/passes/variable_prepare_op_pass.cc index f0e11735..9231e4eb 100644 --- a/ge/graph/passes/variable_prepare_op_pass.cc +++ b/ge/graph/passes/variable_prepare_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ namespace ge { std::map>> VariablePrepareOpPass::ref_node_without_prototype_map_ = { - {REFSWITCH, {{0, {0, 1}}}}}; + {REFSWITCH, {{0, {0, 1}}}}}; Status VariablePrepareOpPass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); @@ -261,13 +261,13 @@ Status VariablePrepareOpPass::InsertVariableRef(ge::NodePtr &node, int in_index, // Add edge from ref identity node to variable ref node. CHECK_FALSE_EXEC( - ge::GraphUtils::AddEdge(ref_identity_node->GetOutDataAnchor(0), variable_ref_node->GetInDataAnchor(0)) == SUCCESS, - GELOGE(FAILED, "Add data edge between ref_identity and variable_ref failed"); - return FAILED); + ge::GraphUtils::AddEdge(ref_identity_node->GetOutDataAnchor(0), variable_ref_node->GetInDataAnchor(0)) == SUCCESS, + GELOGE(FAILED, "Add data edge between ref_identity and variable_ref failed"); + return FAILED); CHECK_FALSE_EXEC( - ge::GraphUtils::AddEdge(node->GetOutControlAnchor(), variable_ref_node->GetInControlAnchor()) == SUCCESS, - GELOGE(FAILED, "Add control edge between ref_identity and variable_ref failed"); - return FAILED); + ge::GraphUtils::AddEdge(node->GetOutControlAnchor(), variable_ref_node->GetInControlAnchor()) == SUCCESS, + GELOGE(FAILED, "Add control edge between ref_identity and variable_ref failed"); + return FAILED); return SUCCESS; } @@ -280,9 +280,9 @@ Status VariablePrepareOpPass::AddControlEdge(const ge::NodePtr &node, const ge:: NodePtr peer_node = peer_in_anchor->GetOwnerNode(); GE_CHECK_NOTNULL(peer_node); CHECK_FALSE_EXEC( - ge::GraphUtils::AddEdge(variable_ref_node->GetOutControlAnchor(), peer_node->GetInControlAnchor()) == SUCCESS, - GELOGE(FAILED, "Add control edge between variable_ref and ref node's peer node failed"); - return FAILED); + ge::GraphUtils::AddEdge(variable_ref_node->GetOutControlAnchor(), peer_node->GetInControlAnchor()) == SUCCESS, + GELOGE(FAILED, "Add control edge between variable_ref and ref node's peer node failed"); + return FAILED); } } return SUCCESS; diff --git a/ge/graph/passes/variable_prepare_op_pass.h b/ge/graph/passes/variable_prepare_op_pass.h index 563a9be5..4cef5b59 100644 --- a/ge/graph/passes/variable_prepare_op_pass.h +++ b/ge/graph/passes/variable_prepare_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_ref_delete_op_pass.cc b/ge/graph/passes/variable_ref_delete_op_pass.cc index 90cfd747..8e625857 100644 --- a/ge/graph/passes/variable_ref_delete_op_pass.cc +++ b/ge/graph/passes/variable_ref_delete_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_ref_delete_op_pass.h b/ge/graph/passes/variable_ref_delete_op_pass.h old mode 100644 new mode 100755 index 43db2703..7f6d1274 --- a/ge/graph/passes/variable_ref_delete_op_pass.h +++ b/ge/graph/passes/variable_ref_delete_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,8 @@ class VariableRefDeleteOpPass : public GraphPass { Status Run(ge::ComputeGraphPtr graph); private: - Status DealVariableRef(ge::ComputeGraphPtr &graph, ge::NodePtr &variable_ref, + Status DealVariableRef(ge::ComputeGraphPtr &graph, + ge::NodePtr &variable_ref, const std::string &ref_var_src_var_name); }; } // namespace ge diff --git a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc index 1321cf20..1c8eb0ec 100644 --- a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc +++ b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "variable_ref_useless_control_out_delete_pass.h" namespace ge { @@ -29,8 +28,8 @@ Status VariableRefUselessControlOutDeletePass::Run(ge::ComputeGraphPtr graph) { } auto src_nodes = node->GetInDataNodes(); if (src_nodes.empty()) { - GELOGW("The variable ref name %s(ref %s) does not has a input node", node->GetName().c_str(), - src_var_name.c_str()); + GELOGW("The variable ref name %s(ref %s) does not has a input node", + node->GetName().c_str(), src_var_name.c_str()); continue; } auto &src_node = src_nodes.at(0); @@ -40,12 +39,14 @@ Status VariableRefUselessControlOutDeletePass::Run(ge::ComputeGraphPtr graph) { auto out_control_anchor = node->GetOutControlAnchor(); for (const auto &dst_node_anchor : out_control_anchor->GetPeerInControlAnchors()) { if (controlled_nodes.count(dst_node_anchor->GetOwnerNode()) > 0) { - GELOGI("Unlink the duplicated control edge from variable ref %s to %s, prev node %s", node->GetName().c_str(), - dst_node_anchor->GetOwnerNode()->GetName().c_str(), src_node->GetName().c_str()); + GELOGI("Unlink the duplicated control edge from variable ref %s to %s, prev node %s", + node->GetName().c_str(), + dst_node_anchor->GetOwnerNode()->GetName().c_str(), + src_node->GetName().c_str()); out_control_anchor->Unlink(dst_node_anchor); } } } return SUCCESS; } -} // namespace ge \ No newline at end of file +} \ No newline at end of file diff --git a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h index 307754da..fd9dbb00 100644 --- a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h +++ b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_VARIABLE_REF_USELESS_CONTROL_OUT_DELETE_PASS_H_ #define GE_VARIABLE_REF_USELESS_CONTROL_OUT_DELETE_PASS_H_ @@ -29,4 +28,4 @@ class VariableRefUselessControlOutDeletePass : public GraphPass { Status Run(ge::ComputeGraphPtr graph); }; } // namespace ge -#endif // GE_VARIABLE_REF_USELESS_CONTROL_OUT_DELETE_PASS_H_ +#endif //GE_VARIABLE_REF_USELESS_CONTROL_OUT_DELETE_PASS_H_ diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 20216941..f90c0d80 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -118,12 +118,13 @@ #include "graph/passes/variable_prepare_op_pass.h" #include "graph/passes/variable_ref_delete_op_pass.h" + namespace ge { namespace { static std::map output_type_str_to_datatype = { - {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"INT8", ge::DT_INT8}, {"INT16", ge::DT_INT16}, - {"UINT16", ge::DT_UINT16}, {"UINT8", ge::DT_UINT8}, {"INT32", ge::DT_INT32}, {"INT64", ge::DT_INT64}, - {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"INT8", ge::DT_INT8}, {"INT16", ge::DT_INT16}, + {"UINT16", ge::DT_UINT16}, {"UINT8", ge::DT_UINT8}, {"INT32", ge::DT_INT32}, {"INT64", ge::DT_INT64}, + {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; const char *const kMbatchSwitchnName = "mbatch-switch-name"; @@ -159,9 +160,9 @@ OpDescPtr CreateTensorShape(const GeTensorDesc &data_tensor) { } GE_IF_BOOL_EXEC( - tensor->SetData(reinterpret_cast(dst_shape.get()), dim_cnt * sizeof(int32_t)) != GRAPH_SUCCESS, - GELOGE(INTERNAL_ERROR, "tensor set data failed"); - return nullptr;) + tensor->SetData(reinterpret_cast(dst_shape.get()), dim_cnt * sizeof(int32_t)) != GRAPH_SUCCESS, + GELOGE(INTERNAL_ERROR, "tensor set data failed"); + return nullptr;) } GELOGD("Create shape input dim [%s]", dst_ge_shape.ToString().c_str()); @@ -173,11 +174,11 @@ void AddTransNodeAttr(const std::string &node_type, const GeTensorDesc &input, c // For format transfer node, the IR definition has src/dst format attrs if (node_type == TRANSDATA) { GE_IF_BOOL_EXEC( - !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_SRC_FORMAT, TypeUtils::FormatToSerialString(input.GetFormat())), - GELOGW("SetStr FORMAT_TRANSFER_SRC_FORMAT failed");) + !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_SRC_FORMAT, TypeUtils::FormatToSerialString(input.GetFormat())), + GELOGW("SetStr FORMAT_TRANSFER_SRC_FORMAT failed");) GE_IF_BOOL_EXEC( - !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_DST_FORMAT, TypeUtils::FormatToSerialString(output.GetFormat())), - GELOGW("SetStr FORMAT_TRANSFER_DST_FORMAT failed");) + !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_DST_FORMAT, TypeUtils::FormatToSerialString(output.GetFormat())), + GELOGW("SetStr FORMAT_TRANSFER_DST_FORMAT failed");) } // For TransposeD node, the IR definition has perm attrs @@ -228,8 +229,8 @@ NodePtr CreateTransNode(const std::string &name, const std::string &node_type, c // for data dump GE_IF_BOOL_EXEC( - !AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())), - GELOGW("CreateTransNode: SetListStr failed");) + !AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())), + GELOGW("CreateTransNode: SetListStr failed");) // Default single input and single output auto ret = op_desc->AddInputDesc(input); @@ -554,12 +555,11 @@ Status ModifyDataNetOutputFormatAndShape(OpDescPtr &op_desc, uint32_t index, For ge::TensorUtils::SetSize(*input, size); ge::TensorUtils::SetSize(*output, size); - GELOGI( - "Modify Data NetOutput format and shape success, node:%s, index:%d, old_shape:%s, old_Format:%s, " - "new_shape:%s, new_format:%s, new_size:%lu", - op_desc->GetName().c_str(), index, formats::JoinToString(old_shape).c_str(), - ge::TypeUtils::FormatToSerialString(old_format).c_str(), formats::JoinToString(dst_shape_dims).c_str(), - ge::TypeUtils::FormatToSerialString(storage_format).c_str(), size); + GELOGI("Modify Data NetOutput format and shape success, node:%s, index:%d, old_shape:%s, old_Format:%s, " + "new_shape:%s, new_format:%s, new_size:%lu", + op_desc->GetName().c_str(), index, formats::JoinToString(old_shape).c_str(), + ge::TypeUtils::FormatToSerialString(old_format).c_str(), formats::JoinToString(dst_shape_dims).c_str(), + ge::TypeUtils::FormatToSerialString(storage_format).c_str(), size); } return SUCCESS; @@ -742,8 +742,8 @@ Status ProcessDataNodeDynShape(NodePtr &node_ptr) { return SUCCESS; } -Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tensor_desc_ptr, Format &storage_format, - vector &dst_shape_dims) { +Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tensor_desc_ptr, + Format &storage_format, vector &dst_shape_dims) { GE_CHECK_NOTNULL(op_desc); GE_CHECK_NOTNULL(tensor_desc_ptr); @@ -761,8 +761,7 @@ Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tenso op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str(), formats::JoinToString(storage_shape).c_str()); } else { - GELOGE(PARAM_INVALID, - "Update node by storage format failed, storage_shape not set. " + GELOGE(PARAM_INVALID, "Update node by storage format failed, storage_shape not set. " "node: [%s], storage_format [%s]", op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str()); return FAILED; @@ -1066,7 +1065,7 @@ Status GraphPrepare::CheckRefOp() { GELOGE(PARAM_INVALID, "CheckRefInputNode failed."); return PARAM_INVALID; } - (void)ref_nodes.insert(node); // no need to check value + (void)ref_nodes.insert(node); // no need to check value } } } @@ -1099,8 +1098,8 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { int64_t tensor_size = 0; graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(output, tensor_size); if (graph_status != GRAPH_SUCCESS) { - ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, - {"GetTensorMemorySizeInBytes", "opname is " + node->GetName()}); + ErrorManager::GetInstance().ATCReportErrMessage( + "E19012", {"function", "reason"}, {"GetTensorMemorySizeInBytes", "opname is " + node->GetName()}); GELOGE(graph_status, "GetTensorMemorySizeInBytes failed!"); return FAILED; } diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h old mode 100644 new mode 100755 index 7c6e4013..ef0f3ed3 --- a/ge/graph/preprocess/graph_preprocess.h +++ b/ge/graph/preprocess/graph_preprocess.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,8 +45,10 @@ class GraphPrepare { virtual ~GraphPrepare(); GraphPrepare(const GraphPrepare &in) = delete; GraphPrepare &operator=(const GraphPrepare &in) = delete; - Status PrepareDynShape(ConstGraphPtr graph, const std::vector &user_input, - ge::ComputeGraphPtr &compute_graph, uint64_t session_id = 0); + Status PrepareDynShape(ConstGraphPtr graph, + const std::vector &user_input, + ge::ComputeGraphPtr &compute_graph, + uint64_t session_id = 0); Status RecordAIPPInfo(ge::ComputeGraphPtr &compute_graph); Status PrepareRunningFormatRefiner(); void SetOptions(const GraphManagerOptions &options); @@ -56,7 +58,8 @@ class GraphPrepare { private: Status Init(const ge::Graph &graph, uint64_t session_id = 0); Status CheckGraph(); - Status CheckRefInputNode(const NodePtr &node, const std::string &input_name, const std::set &ref_nodes); + Status CheckRefInputNode(const NodePtr &node, const std::string &input_name, + const std::set &ref_nodes); Status CheckRefOp(); Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); Status AdjustDataOpOutput(const NodePtr &node); diff --git a/ge/graph/preprocess/insert_op/base_insert_op.h b/ge/graph/preprocess/insert_op/base_insert_op.h index 355aaae6..b0d7a7a6 100644 --- a/ge/graph/preprocess/insert_op/base_insert_op.h +++ b/ge/graph/preprocess/insert_op/base_insert_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,7 +51,8 @@ class InsertOpBase { /// @param [in] graph /// @param [in] aippConfigPath aipp /// - virtual Status InsertAippToGraph(ge::ComputeGraphPtr &graph, std::string &aippConfigPath, const uint32_t index) = 0; + virtual Status InsertAippToGraph(ge::ComputeGraphPtr &graph, std::string &aippConfigPath, + const uint32_t index) = 0; /// /// @ingroup ge_omg diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc old mode 100644 new mode 100755 index eb936282..960a19b8 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -124,7 +124,13 @@ Status GetDataDimN(const ge::NodePtr &data_node, ge::Format format, int64_t &bat return PARAM_INVALID; } } - GELOGE(PARAM_INVALID, "when dynamic aipp, shape must be in range [3, 4], but is %zu", shape.size()); + string errormsg = "its shape size must be in range[3,4] which dynamic aipp is linked, " + "maybe this input is not suitable for dynamic aipp"; + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {data_node->GetName() + " shape size", + to_string(shape.size()), errormsg}); + GELOGE(PARAM_INVALID, "The shape size of this node [%s] which linked dynamic aipp must be in range[3, 4], but is %zu", + data_node->GetName().c_str(), shape.size()); return PARAM_INVALID; } @@ -217,8 +223,8 @@ Status AippOp::InsertAippToGraph(ComputeGraphPtr &graph, std::string &aippConfig return SUCCESS; } -NodePtr AippOp::CreateAipp(const OutDataAnchorPtr &out_anchor, const std::string &aippConfigPath, - const uint32_t &index) { +NodePtr AippOp::CreateAipp(const OutDataAnchorPtr &out_anchor, + const std::string &aippConfigPath, const uint32_t &index) { const auto &node = out_anchor->GetOwnerNode(); std::string current_name = node->GetName() + "_" + std::to_string(out_anchor->GetIdx()) + "_huawei_aipp"; auto aipp_opdesc_ptr = MakeShared(current_name, AIPP); @@ -260,19 +266,18 @@ NodePtr AippOp::CreateAipp(const OutDataAnchorPtr &out_anchor, const std::string Status AippOp::AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aipp_cfg_path, const uint32_t &index) { GeAttrValue::NAMED_ATTRS aipp_attr; ConvertParamToAttr(aipp_attr); - GE_CHK_BOOL_RET_STATUS(AttrUtils::SetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), INTERNAL_ERROR, - "Set name attrs for aipp node failed"); + GE_CHK_BOOL_RET_STATUS(AttrUtils::SetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), + INTERNAL_ERROR, "Set name attrs for aipp node failed"); - GE_CHK_BOOL_RET_STATUS(AttrUtils::SetStr(op_desc, kAippConfigPath, aipp_cfg_path), INTERNAL_ERROR, - "Set config file path attr for aipp node failed"); + GE_CHK_BOOL_RET_STATUS(AttrUtils::SetStr(op_desc, kAippConfigPath, aipp_cfg_path), + INTERNAL_ERROR, "Set config file path attr for aipp node failed"); std::vector empty_names; GE_CHK_BOOL_RET_STATUS(AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, empty_names), INTERNAL_ERROR, "Set ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES attr for aipp node failed"); - GE_CHK_BOOL_RET_STATUS(AttrUtils::SetInt(op_desc, kCurrentAippIndex, index), INTERNAL_ERROR, - "Set kCurrentAippIndex attr for aipp node failed"); - + GE_CHK_BOOL_RET_STATUS(AttrUtils::SetInt(op_desc, kCurrentAippIndex, index), + INTERNAL_ERROR, "Set kCurrentAippIndex attr for aipp node failed"); // add input/output desc GeTensorDesc tensor; GE_CHK_GRAPH_STATUS_RET(op_desc->AddInputDesc("images", tensor), "Failed to add input images for aipp node"); @@ -318,6 +323,7 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr GELOGE(PARAM_INVALID, "Get target input node for rank %d failed", rank); return PARAM_INVALID; } + data_node_linked_aipp = data_node; auto data_opdesc = data_node->GetOpDesc(); GE_CHECK_NOTNULL(data_opdesc); string set_dt_str; @@ -330,10 +336,17 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr return PARAM_INVALID; } + // add dynamic or static attr memsage to data + if (GetAippMode() == domi::AippOpParams::static_) { + (void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "static_aipp"); + } else if (GetAippMode() == domi::AippOpParams::dynamic) { + (void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp"); + } + // In scenario AIPP+CONV2D+POOLING, keep the aipp info to Data, since AIPP disappear after subgraph optimize GeAttrValue::NAMED_ATTRS aipp_attr; ConvertParamToAttr(aipp_attr); - if (!AttrUtils::SetNamedAttrs(data_node->GetOpDesc(), ATTR_NAME_AIPP, aipp_attr)) { + if (!AttrUtils::SetNamedAttrs(data_opdesc, ATTR_NAME_AIPP, aipp_attr)) { GELOGE(INTERNAL_ERROR, "Set name attrs for Data node failed. id: %d", rank); return INTERNAL_ERROR; } @@ -375,10 +388,9 @@ Status AippOp::GetStaticTargetNode(const ComputeGraphPtr &graph, NodePtr &data_n return INTERNAL_ERROR; } target = switchn; - GELOGI( - "Multi-batch/image size and static aipp for data %s, " - "the aipp node will be insert after %s instead of origin data node", - data_node->GetName().c_str(), switchn->GetName().c_str()); + GELOGI("Multi-batch/image size and static aipp for data %s, " + "the aipp node will be insert after %s instead of origin data node", + data_node->GetName().c_str(), switchn->GetName().c_str()); return SUCCESS; } @@ -541,8 +553,8 @@ Status AippOp::ValidateParams() { const domi::AippOpParams::AippMode aipp_mode = aipp_params_->aipp_mode(); if (aipp_mode == domi::AippOpParams::dynamic) { AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( - aipp_params_->max_src_image_size() > 0, PARAM_INVALID, - "For dynamic AIPP params, max_src_image_size must be set which number should be greater than 0"); + aipp_params_->max_src_image_size() > 0, PARAM_INVALID, + "For dynamic AIPP params, max_src_image_size must be set which number should be greater than 0"); } else { AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID, "Input format of AIPP conf is undefined"); @@ -737,7 +749,7 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { data_shape_n = data_op_desc->MutableInputDesc(0)->GetShape().GetDim(0); } vector dynamic_aipp_linked_data_shape{data_shape_n, kDynamicDim, kDynamicDim, kDynamicDim}; - (void)AttrUtils::SetListInt(data_op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_linked_data_shape); + (void)AttrUtils::SetListInt(data_op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_linked_data_shape); int64_t batch_count = -1; if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { @@ -759,7 +771,24 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { return AddNodeToGraph(aipp_node, max_dynamic_aipp_size); } +Status AippOp::AddAttrToAippData(const OpDescPtr &aipp_data_op_desc) { + // Add dynamic aipp config to aipp_data + GeAttrValue::NAMED_ATTRS aipp_attr; + ConvertParamToAttr(aipp_attr); + (void)AttrUtils::SetNamedAttrs(aipp_data_op_desc, ATTR_NAME_AIPP, aipp_attr); + (void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_conf"); + + // add node name attr to data linked aipp_data, it can be queried by acl. + GE_CHECK_NOTNULL(data_node_linked_aipp); + auto data_op_desc = data_node_linked_aipp->GetOpDesc(); + GE_CHECK_NOTNULL(data_op_desc); + (void)AttrUtils::SetStr(data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, aipp_data_op_desc->GetName()); + (void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, data_op_desc->GetName()); + return SUCCESS; +} + Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { + static int index = 0; std::vector input_shape_dim(1, max_dynamic_aipp_size); GeShape input_shape(input_shape_dim); // construct input tensor @@ -767,18 +796,21 @@ Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp TensorUtils::SetReuseInput(input_tensor, false); TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); - // Only flush subgraph name const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); - string node_name = (graph->GetParentGraph() == nullptr) ? kDynamicAippData : (graph->GetName() + "_" + node_name); - + string node_name; + if (index == 0) { + node_name = kDynamicAippData; + } else { + node_name = string(kDynamicAippData) + "_" + to_string(index); + } + ++index; // new add aipp_data ops for dynamic aipp param input OpDescPtr op_desc_ptr_data = MakeShared(node_name, AIPPDATA); GE_CHECK_NOTNULL(op_desc_ptr_data); - // Add dynamic aipp config to aipp_data - GeAttrValue::NAMED_ATTRS aipp_attr; - ConvertParamToAttr(aipp_attr); - (void)AttrUtils::SetNamedAttrs(op_desc_ptr_data, ATTR_NAME_AIPP, aipp_attr); + if (AddAttrToAippData(op_desc_ptr_data) != SUCCESS) { + return INTERNAL_ERROR; + } auto stat1 = op_desc_ptr_data->AddInputDesc(input_tensor); diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.h b/ge/graph/preprocess/insert_op/ge_aipp_op.h old mode 100644 new mode 100755 index c98935ee..22ae0cea --- a/ge/graph/preprocess/insert_op/ge_aipp_op.h +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,6 +44,7 @@ class AippOp : public InsertOpBase { Status ValidateParams() override; protected: + /// /// @ingroup domi_omg /// @brief Generate Op Desc @@ -60,13 +61,15 @@ class AippOp : public InsertOpBase { Status GetTargetPosition(ge::ComputeGraphPtr graph, ge::NodePtr &target_input, std::vector> &target_edges) override; - Status InsertAippToGraph(ge::ComputeGraphPtr &graph, std::string &aippConfigPath, const uint32_t index) override; + Status InsertAippToGraph(ge::ComputeGraphPtr &graph, + std::string &aippConfigPath, + const uint32_t index) override ; domi::AippOpParams::AippMode GetAippMode() override; private: - AippOp &operator=(const AippOp &aipp_op); - AippOp(const AippOp &aipp_op); + AippOp& operator=(const AippOp& aipp_op); + AippOp(const AippOp& aipp_op); void ConvertParamToAttr(ge::GeAttrValue::NAMED_ATTRS &aipp_attrs); void SetCscDefaultValue(); @@ -78,10 +81,13 @@ class AippOp : public InsertOpBase { Status CreateAippData(const NodePtr &aipp); Status AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size); Status AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aipp_cfg_path, const uint32_t &index); + Status AddAttrToAippData(const OpDescPtr &aipp_data_op_desc); domi::AippOpParams *aipp_params_ = nullptr; ge::NodePtr aipp_node_ = nullptr; + ge::NodePtr data_node_linked_aipp = nullptr; }; } // namespace ge #endif // GE_GRAPH_PREPROCESS_INSERT_OP_GE_AIPP_OP_H_ + diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc old mode 100644 new mode 100755 index c55be013..8274ce8c --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include "common/ge/ge_util.h" #include "common/op/ge_op_utils.h" #include "common/util.h" +#include "common/util/error_manager/error_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" #include "framework/common/ge_inner_error_codes.h" @@ -120,18 +121,16 @@ Status InsertNewOpUtil::CheckPositionNotRepeat() { for (int j = i + 1; j < insert_op_conf_->aipp_op_size(); j++) { const domi::AippOpParams *another_item = insert_op_conf_->mutable_aipp_op(j); - - GE_IF_BOOL_EXEC(item->related_input_rank() != another_item->related_input_rank(), continue;); - - GE_IF_BOOL_EXEC( - item->input_edge_idx_size() == 0 || another_item->input_edge_idx_size() == 0 || - item->input_edge_idx(0) == another_item->input_edge_idx(0), - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same postion! please check related_input_rank and input_edge_idx."); - return PARAM_INVALID;); + GE_IF_BOOL_EXEC(item->related_input_rank() == another_item->related_input_rank(), + string errormsg = "Can not insert aipp to the same postion! Please ensure related_input_rank" + " param is different in different aipp config."; + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); + GELOGE(PARAM_INVALID, + "Can not insert aipp op to the same postion! Please ensure related_input_rank param " + "is different in different aipp config."); + return PARAM_INVALID;); } } - return SUCCESS; } @@ -162,30 +161,25 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { std::unique_ptr aippParams(new (std::nothrow) domi::AippOpParams()); GE_CHECK_NOTNULL(aippParams); - GE_IF_BOOL_EXEC(aippNodes.size() > 0, GE_CHK_STATUS(GetAippParams(aippParams, aippNodes[0])); - aippMode = (aippMode == domi::AippOpParams::undefined) ? aippParams->aipp_mode() : aippMode; - GE_CHK_BOOL_RET_STATUS(aippMode == aippParams->aipp_mode(), PARAM_INVALID, - "The aipp_mode of all aipp_op must be the same");); GE_IF_BOOL_EXEC( - aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { - std::unique_ptr currAippParam(new (std::nothrow) domi::AippOpParams()); - GE_CHECK_NOTNULL(currAippParam); - GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); - - GE_CHK_BOOL_RET_STATUS(aippMode == currAippParam->aipp_mode(), PARAM_INVALID, - "The aipp_mode of all aipp_op must be the same"); - if (aippMode == domi::AippOpParams::static_) { - GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, - "The input_format of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID, - "The src_image_size_w of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID, - "The src_image_size_h of all aipp_ops after one Data should be the same"); - } else { - GE_CHK_BOOL_RET_STATUS(aippParams->max_src_image_size() == currAippParam->max_src_image_size(), PARAM_INVALID, - "The max_src_image_size of all aipp_ops after one Data should be the same"); - } - }); + aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { + std::unique_ptr currAippParam(new (std::nothrow) domi::AippOpParams()); + GE_CHECK_NOTNULL(currAippParam); + GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); + + if (aippMode == domi::AippOpParams::static_) { + GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, + "The input_format of all aipp_ops after one Data should be the same"); + GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID, + "The src_image_size_w of all aipp_ops after one Data should be the same"); + GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID, + "The src_image_size_h of all aipp_ops after one Data should be the same"); + } else { + GE_CHK_BOOL_RET_STATUS(aippParams->max_src_image_size() == currAippParam->max_src_image_size(), + PARAM_INVALID, + "The max_src_image_size of all aipp_ops after one Data should be the same"); + } + }); } return SUCCESS; diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.h b/ge/graph/preprocess/insert_op/util_insert_aipp_op.h index ae431c32..e785da98 100644 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.h +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index 298e7749..e05d1810 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "graph/preprocess/multi_batch_copy_graph.h" #include @@ -44,6 +43,7 @@ using std::set; using std::string; using std::vector; +using std::map; namespace ge { namespace multibatch { @@ -55,9 +55,7 @@ const int kDataOutIndex = 0; const int kDataInIndex = 0; const int kMergeDataOutIndex = 0; const int kStaticOutput = -1; -const int kDynmaicDims = -1; -const int kDynamicBatchDynamicDimsNum = 1; -const int kDynamicImgSizeDynamciDimsNum = 2; + inline bool IsDataLikeType(const std::string &node_type) { return (node_type == DATA) || (node_type == AIPP); } @@ -113,8 +111,9 @@ NodePtr InsertCopyNode(const NodePtr &node, size_t n) { desc->CopyAttrsFrom(*src_op_desc); for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) { auto input_desc = desc->MutableInputDesc(i); - GE_IF_BOOL_EXEC(input_desc == nullptr, GELOGW("Get null input desc by index %u from node %s when copy from %s", i, - desc->GetName().c_str(), node->GetName().c_str()); + GE_IF_BOOL_EXEC(input_desc == nullptr, + GELOGW("Get null input desc by index %u from node %s when copy from %s", i, + desc->GetName().c_str(), node->GetName().c_str()); continue); input_desc->CopyAttrsFrom(src_op_desc->GetInputDesc(i)); @@ -213,16 +212,16 @@ Status MultiBatchGraphCopyer::CopyGraph() { return ret; } - ret = CheckDataShape(origin_data_nodes_); - if (ret != SUCCESS) { - return ret; - } - if (LabelStatus() != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to label status for all nodes."); return INTERNAL_ERROR; } + ret = CheckAndParseDynamicData(); + if (ret != SUCCESS) { + return ret; + } + ret = CreateNewNodes(); if (ret != SUCCESS) { return ret; @@ -265,27 +264,24 @@ Status MultiBatchGraphCopyer::Init() { } Status MultiBatchGraphCopyer::LabelStatus() { - for (const auto &data : origin_data_nodes_) { - auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); - if (!IsAllDimsPositive(data_shape.GetDims())) { - origin_nodes_status_[data.get()] = kNodeInBatchBranch; - } - } + map> frame_enters; + InitStatus(frame_enters); + bool changed = true; // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch while (changed) { changed = false; for (const auto &node : origin_all_nodes_) { - auto iter = origin_nodes_status_.find(node.get()); - if (iter != origin_nodes_status_.end()) { - continue; - } for (auto &in_node : node->GetInAllNodes()) { bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; if (is_in_batch) { - origin_nodes_status_[node.get()] = kNodeInBatchBranch; - changed = true; + if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end() || + origin_nodes_status_[node.get()] != kNodeInBatchBranch) { + origin_nodes_status_[node.get()] = kNodeInBatchBranch; + ResetEnterStatus(frame_enters, node); + changed = true; + } break; } } @@ -316,6 +312,101 @@ Status MultiBatchGraphCopyer::LabelStatus() { return SUCCESS; } +void MultiBatchGraphCopyer::InitStatus(map> &frame_enters) { + for (const auto &node : origin_all_nodes_) { + if (node->GetType() != ENTER && node->GetType() != REFENTER) { + continue; + } + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + string frame_name; + if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { + frame_enters[frame_name].emplace_back(node); + } + } + + for (const auto &data : origin_data_nodes_) { + auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); + if (!IsAllDimsPositive(data_shape.GetDims())) { + origin_nodes_status_[data.get()] = kNodeInBatchBranch; + } + } +} + +void MultiBatchGraphCopyer::ResetEnterStatus(map> &frame_enters, const NodePtr &node) { + if (node->GetType() != ENTER && node->GetType() != REFENTER) { + return; + } + + for (const auto &frame_enter : frame_enters) { + auto &enters = frame_enter.second; + if (std::find(enters.begin(), enters.end(), node) != enters.end()) { + for (const auto &enter : enters) { + origin_nodes_status_[enter.get()] = kNodeInBatchBranch; + } + break; + } + } +} + +Status MultiBatchGraphCopyer::CheckAndParseDynamicData(){ + size_t unknown_shape_count = 0; + auto data_name_and_shape = GetLocalOmgContext().user_input_dims; + GELOGD("raw data_name_and_shape size: %zu", data_name_and_shape.size()); + for (const auto &node : origin_all_nodes_) { + auto data_desc = NodeUtils::GetOutputDesc(*node, kDataOutIndex); + auto data_shape = data_desc.GetShape(); + auto data_format = data_desc.GetFormat() == Format::FORMAT_NCHW ? "NCHW" : + data_desc.GetFormat() == Format::FORMAT_NHWC ? "NHWC" : "Others"; + + auto data_name = node->GetName(); + auto branch_status = GetNodeStatus(node); + if (branch_status != kNodeStartNode) { + continue; + } + if (IsAllDimsPositive(data_shape.GetDims())) { + continue; + } + ++unknown_shape_count; + auto iter = find(data_name_order_.begin(), data_name_order_.end(), data_name); + if (iter == data_name_order_.end()) { + if (dynamic_type_ == DynamicType::kDynamicBatch) { + auto ret = CheckDynamicBatchShape(data_shape.GetDims(), data_name); + if (!ret) { + return PARAM_INVALID; + } + } else if (dynamic_type_ == DynamicType::kDynamicImageSize) { + auto ret = CheckDynamicImageSizeShape(data_shape.GetDims(), data_name, data_format); + if (!ret) { + return PARAM_INVALID; + } + } else if (dynamic_type_ == DynamicType::kDynamicDims) { + ErrorManager::GetInstance().ATCReportErrMessage("E10001", + {"parameter", "reason"}, + {"--input_shape", + "all dynamic data must be set in --input_shape"}); + GELOGE(INTERNAL_ERROR, "data: %s shape:%s must be set int --input_shape", + node->GetName().c_str(), data_shape.ToString().c_str()); + return INTERNAL_ERROR; + } + data_name_and_shape.emplace_back(data_name, data_shape.GetDims()); + } + } + auto ret = ParserDataToDynmaicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_); + if (ret != SUCCESS){ + return ret; + } + if (unknown_shape_count == 0) { + ErrorManager::GetInstance().ATCReportErrMessage("E10040"); + GELOGE(PARAM_INVALID, + "Need unknow shape data when user set --dynamic_batch_size, --dynamic_image_size or --dynamic_dims"); + return PARAM_INVALID; + } + return SUCCESS; +} + Status MultiBatchGraphCopyer::CreateNewNodes() { shape_data_ = InsertShapeDataNode(); if (shape_data_ == nullptr) { @@ -331,10 +422,6 @@ Status MultiBatchGraphCopyer::CreateNewNodes() { switch (branch_status) { case kNodeStartNode: GELOGD("Name: %s, type: %s, status: kNodeStartNode.", node->GetName().c_str(), node->GetType().c_str()); - ret = UpdateDataToDynamicInfo(node); - if (ret != SUCCESS) { - break; - } ret = InsertSwitchNForData(node); if (ret == SUCCESS) { ret = UpdateMaxShapeToData(node); @@ -652,7 +739,6 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); auto data_name = data->GetName(); (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims()); - if (IsAllDimsPositive(data_shape.GetDims())) { GELOGI("The shape of data %s are positive(%s), skip the multi batch process", data->GetName().c_str(), data_shape.ToString().c_str()); @@ -697,7 +783,7 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { GELOGE(INTERNAL_ERROR, "Failed to add attr value on output %zu tensor", i); return INTERNAL_ERROR; } - (void)AttrUtils::SetListInt(tensor, ATTR_NAME_COMBINED_DYNAMIC_DIMS, shape.GetDims()); + (void) AttrUtils::SetListInt(tensor, ATTR_NAME_COMBINED_DYNAMIC_DIMS, shape.GetDims()); if (switchn_desc->AddOutputDesc("output" + std::to_string(i), tensor) != GRAPH_SUCCESS) { GELOGE(GRAPH_FAILED, "Opdesc AddOutputDesc failed"); return GRAPH_FAILED; @@ -731,57 +817,6 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { data_nodes_to_switchn_[data.get()] = switchn; return SUCCESS; } -Status MultiBatchGraphCopyer::UpdateDataToDynamicInfo(const NodePtr &data) { - auto data_desc = NodeUtils::GetOutputDesc(*data, kDataOutIndex); - auto data_shape = data_desc.GetShape(); - auto data_format = data_desc.GetFormat(); - auto data_name = data->GetName(); - if (IsAllDimsPositive(data_shape.GetDims())) { - return SUCCESS; - } - if (data_to_dynamic_info_.find(data_name) == data_to_dynamic_info_.end()) { - auto data_shape_dims = data_shape.GetDims(); - auto dynamic_dims_num = std::count_if(data_shape_dims.begin(), data_shape_dims.end(), - [&data_shape_dims](int64_t dim) { return dim < 0; }); - if (dynamic_type_ == DynamicType::kDynamicBatch) { - if (dynamic_dims_num != kDynamicBatchDynamicDimsNum || data_shape.GetDim(0) != kDynmaicDims) { - GELOGE(INTERNAL_ERROR, "data: %s shape:%s do not satisfy dynamic batch rule", data->GetName().c_str(), - data_shape.ToString().c_str()); - return INTERNAL_ERROR; - } - } else if (dynamic_type_ == DynamicType::kDynamicImageSize) { - int64_t height = 0; - int64_t width = 0; - if (data_format == FORMAT_NCHW) { - height = data_shape.GetDim(NCHW_DIM_H); - width = data_shape.GetDim(NCHW_DIM_W); - } else if (data_format == FORMAT_NHWC) { - height = data_shape.GetDim(NHWC_DIM_H); - width = data_shape.GetDim(NHWC_DIM_W); - } - if (dynamic_dims_num != kDynamicImgSizeDynamciDimsNum || height != kDynmaicDims || width != kDynmaicDims) { - GELOGE(INTERNAL_ERROR, "data: %s shape:%s do not satisfy dynamic image size rule", data->GetName().c_str(), - data_shape.ToString().c_str()); - return INTERNAL_ERROR; - } - } else if (dynamic_type_ == DynamicType::kDynamicDims) { - GELOGE(INTERNAL_ERROR, "data: %s shape:%s must be set int --input_shape", data->GetName().c_str(), - data_shape.ToString().c_str()); - return INTERNAL_ERROR; - } - // all data has dynamic dims are not in atc parameter --input_shape - if (data_to_dynamic_info_.empty()) { - vector>> tmp_data_name_and_shape{std::make_pair(data_name, data_shape_dims)}; - auto ret = ParserDataToDynmaicInfo(shapes_, tmp_data_name_and_shape, data_to_dynamic_info_); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "parse data : %s dynamic gear info failed", data_name.c_str()); - return INTERNAL_ERROR; - } - } - data_to_dynamic_info_[data_name] = data_to_dynamic_info_.begin()->second; - } - return SUCCESS; -} Status MultiBatchGraphCopyer::InsertMergeForEdgeNode(const NodePtr &node) { for (auto &in_data_anchor : node->GetAllInDataAnchors()) { auto src_out_anchor = in_data_anchor->GetPeerOutAnchor(); @@ -854,7 +889,7 @@ Status MultiBatchGraphCopyer::LinkEdges() { Status MultiBatchGraphCopyer::LinkDataToSwitchN(const NodePtr &data) { auto switchn = data_nodes_to_switchn_[data.get()]; auto ret = - GraphUtils::AddEdge(shape_data_->GetOutDataAnchor(kDataOutIndex), switchn->GetInDataAnchor(kSwitchNPredIndex)); + GraphUtils::AddEdge(shape_data_->GetOutDataAnchor(kDataOutIndex), switchn->GetInDataAnchor(kSwitchNPredIndex)); GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link shape data %s to switchn %s", shape_data_->GetName().c_str(), switchn->GetName().c_str()); return INTERNAL_ERROR); @@ -1032,18 +1067,11 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) { GELOGD("There is no multi-batch options, no need to process multi-batch copy"); return SUCCESS; } - map>> data_to_dynamic_info; - // parser data dynamic info from atc parameter --input_shape - if (ParserDataToDynmaicInfo(shapes, GetLocalOmgContext().user_input_dims, data_to_dynamic_info) != SUCCESS) { - GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed"); - return PARAM_INVALID; - } DynamicType dynamic_type = DynamicType::kDynamicUnknown; if (!GetLocalOmgContext().dynamic_batch_size.empty()) { dynamic_type = DynamicType::kDynamicBatch; } else if (!GetLocalOmgContext().dynamic_image_size.empty()) { - dynamic_type = DynamicType::kDynamicImageSize; - ; + dynamic_type = DynamicType::kDynamicImageSize;; } else if (!GetLocalOmgContext().dynamic_dims.empty()) { dynamic_type = DynamicType::kDynamicDims; } @@ -1057,7 +1085,6 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) { } copyer.SetDynamicType(dynamic_type); copyer.SetUserDesignateShape(user_designate_shape); - copyer.SetDataToDynamicInfo(data_to_dynamic_info); return copyer.CopyGraph(); } @@ -1077,8 +1104,8 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) { // +-----------+ / // | Data | --------------->/ // +-----------+ -void GetDynamicShapeByGraph(const ComputeGraphPtr &graph, const NodePtr &node, set &dynamic_output_index, - vector &dynamic_output_dims) { +void GetDynamicShapeByGraph(const ComputeGraphPtr &graph, const NodePtr &node, + set &dynamic_output_index, vector &dynamic_output_dims) { GELOGD("Try get dynamic shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); const auto &func_desc = node->GetOpDesc(); if (!func_desc->HasAttr(ATTR_NAME_BATCH_NUM)) { @@ -1139,8 +1166,8 @@ void GetDynamicShapeByGraph(const ComputeGraphPtr &graph, const NodePtr &node, s // +-----------+ /. // | Data | --------------------------------------------------------------------------->/. j = 1 // +-----------+ -void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, set &dynamic_output_index, - vector &dynamic_output_dims) { +void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, + set &dynamic_output_index, vector &dynamic_output_dims) { GELOGD("Try get dynamic shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); const auto &netoutput_desc = node->GetOpDesc(); const auto &inputnode_to_netoutput = node->GetInAllNodes(); @@ -1164,8 +1191,8 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, s } // Connect NetOutput directly: DTS2020070612498 -void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set &dynamic_output_index, - vector &dynamic_output_dims) { +void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, + const set &dynamic_output_index, vector &dynamic_output_dims) { GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); const auto &netoutput_desc = node->GetOpDesc(); const auto &inputnode_to_netoutput = node->GetInAllNodes(); diff --git a/ge/graph/preprocess/multi_batch_copy_graph.h b/ge/graph/preprocess/multi_batch_copy_graph.h index 062b98d2..edd79ada 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/ge/graph/preprocess/multi_batch_copy_graph.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GRAPH_PREPROCESS_MULTI_BATCH_COPY_GRAPH_H_ #define GE_GRAPH_PREPROCESS_MULTI_BATCH_COPY_GRAPH_H_ #include @@ -59,7 +58,9 @@ class MultiBatchGraphCopyer { void SetDataToDynamicInfo(const map>> &designate_shape) { data_to_dynamic_info_ = designate_shape; } - void SetDynamicType(const DynamicType dynamic_type) { dynamic_type_ = dynamic_type; } + void SetDynamicType(const DynamicType dynamic_type) { + dynamic_type_ = dynamic_type; + } Status CopyGraph(); private: @@ -68,6 +69,8 @@ class MultiBatchGraphCopyer { // label status for origin_all_nodes_ Status LabelStatus(); + void InitStatus(std::map> &frame_enters); + void ResetEnterStatus(std::map> &frame_enters, const NodePtr &node); // add nodes functions Status CreateNewNodes(); @@ -102,7 +105,7 @@ class MultiBatchGraphCopyer { Status LinkNodeToMerge(const NodePtr &node, int out_index, const NodePtr &merge); Status CopyInDataEdges(const NodePtr &origin_node, int batch_num, const NodePtr ©ed_node); Status CopyInControlEdges(const NodePtr &node, int batch_num, const NodePtr ©ed_node); - Status UpdateDataToDynamicInfo(const NodePtr &node); + Status CheckAndParseDynamicData(); bool IsInBatchBranch(const NodePtr &node); NodeStatus GetNodeStatus(const NodePtr &node) { return origin_nodes_status_[node.get()]; }; Status CheckCopyResult(const std::vector &start_nodes); diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index 005240ca..9909b0dc 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,12 +26,18 @@ #include "graph/utils/node_utils.h" #include "graph/ge_context.h" #include "graph/common/local_context.h" +#include "framework/common/types.h" namespace ge { namespace multibatch { constexpr int kDecimal = 10; constexpr uint8_t kMaxShapesCount = 100; constexpr uint8_t kMinShapesCount = 2; +const int kDynmaicDims = -1; +const int kDynamicBatchDynamicDimsNum = 1; +const int kDynamicImgSizeDynamciDimsNum = 2; +const size_t kMaxNDDimNum = 4; +const size_t kMinNDDimNum = 1; void ParseDynamicSize(string dynamic_size, vector> &shapes) { std::vector shape_strs = ge::StringUtils::Split(dynamic_size, ';'); @@ -101,15 +107,15 @@ bool InitDynamicParams(vector> &shapes) { /// Status ParserDataToDynmaicInfo(const vector> &shapes, vector>> &data_name_and_shape, - map>> &data_to_dynamic_info) { + map> > &data_to_dynamic_info) { size_t cur_data_index = 0; for (size_t index = 0; index < data_name_and_shape.size(); ++index) { auto &cur_item = data_name_and_shape[index]; auto &data_name = cur_item.first; auto &data_shape = cur_item.second; - auto dynamic_dims_num = - std::count_if(data_shape.begin(), data_shape.end(), [&data_shape](int64_t dim) { return dim < 0; }); - vector> dynamic_info; + auto dynamic_dims_num = std::count_if(data_shape.begin(), data_shape.end(), + [&data_shape](int64_t dim){ return dim < 0; }); + vector > dynamic_info; for (auto &dynamic_gear_info : shapes) { vector one_gear; if (dynamic_gear_info.size() == static_cast(dynamic_dims_num)) { @@ -137,6 +143,7 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, return SUCCESS; } + /// /// @ingroup ge /// @brief Check Dynamic Param is invalid. @@ -146,7 +153,7 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, Status CheckDynamicParams(const vector> &shapes) { if (shapes.size() < kMinShapesCount) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10035", {"shapesize", "minshapesize"}, {std::to_string(shapes.size()), std::to_string(kMinShapesCount - 1)}); + "E10035", {"shapesize", "minshapesize"}, {std::to_string(shapes.size()), std::to_string(kMinShapesCount - 1)}); GELOGE(PARAM_INVALID, "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s " "value size [%zu] must be greater than [%zu].", @@ -155,7 +162,7 @@ Status CheckDynamicParams(const vector> &shapes) { } if (shapes.size() > kMaxShapesCount) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10036", {"shapesize", "maxshapesize"}, {std::to_string(shapes.size()), std::to_string(kMaxShapesCount + 1)}); + "E10036", {"shapesize", "maxshapesize"}, {std::to_string(shapes.size()), std::to_string(kMaxShapesCount + 1)}); GELOGE(PARAM_INVALID, "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s " "value size [%zu] must be less than [%zu].", @@ -205,9 +212,9 @@ Status CalcShape(const std::vector &batch_shape, GeShape &data_shape) { if (data_shape.GetDim(i) < 0) { if (batch_shape_index >= batch_shape.size()) { ErrorManager::GetInstance().ATCReportErrMessage( - "E19012", {"function", "reason"}, - {"CalcShape", "the batch shape count " + std::to_string(batch_shape.size()) + - " does not match the data shape " + data_shape.ToString()}); + "E19012", {"function", "reason"}, + {"CalcShape", "the batch shape count " + std::to_string(batch_shape.size()) + + " does not match the data shape " + data_shape.ToString()}); GELOGE(PARAM_INVALID, "Failed to calc tensor shape, the batch shape count %zu, does not match the data shape %s", batch_shape.size(), data_shape.ToString().c_str()); @@ -218,9 +225,8 @@ Status CalcShape(const std::vector &batch_shape, GeShape &data_shape) { } if (batch_shape_index != batch_shape.size()) { ErrorManager::GetInstance().ATCReportErrMessage( - "E19012", {"function", "reason"}, - {"CalcShape", "the batch shape count " + std::to_string(batch_shape.size()) + " does not match the data shape " + - data_shape.ToString()}); + "E19012", {"function", "reason"}, {"CalcShape", "the batch shape count " + std::to_string(batch_shape.size()) + + " does not match the data shape " + data_shape.ToString()}); GELOGE(PARAM_INVALID, "Failed to calc tensor shape, the batch shape count %zu, does not match the data shape %s", batch_shape.size(), data_shape.ToString().c_str()); return PARAM_INVALID; @@ -252,5 +258,62 @@ Status StampDynamicType(const OpDescPtr &op_desc) { } return SUCCESS; } + +/// +/// @ingroup ge +/// @brief Check dynamic batch Shape. +/// @param [in] const vector &shape: data_shape to be checked. +/// @param [in] const string &data_name: cur data name. +/// @return 0: true/false +/// +bool CheckDynamicBatchShape(const vector &shape, const string &data_name) { + if (shape[0] == kDynmaicDims) { + for (size_t i = 1; i < shape.size(); ++i) { + if (shape[i] < 1) { + ErrorManager::GetInstance().ATCReportErrMessage("E10018", {"index", "shape"}, + {std::to_string(i), std::to_string(shape[i])}); + GELOGE(ge::PARAM_INVALID, + "Only batch N can be -1 when set --dynamic_batch_size, current data: %s shape[%zu] is %ld", + data_name.c_str(), i, shape[i]); + return false; + } + } + return true; + } else { + return false; + } +} + +/// +/// @ingroup ge +/// @brief Check Dynamic image size shape. +/// @param [in] unordered_map> &shape_map: map of data_name and data_shape. +/// @param [in] const std::string &input_format: format of input. +/// @return 0: true/false +/// +bool CheckDynamicImageSizeShape(const vector &shape, const string &data_name, + const std::string &input_format) { + int64_t height = 0; + int64_t width = 0; + if (input_format == "NCHW") { + height = shape[NCHW_DIM_H]; + width = shape[NCHW_DIM_W]; + } + + if (input_format == "NHWC") { + height = shape[NHWC_DIM_H]; + width = shape[NHWC_DIM_W]; + } + + if (height == kDynmaicDims && width == kDynmaicDims && + std::count(shape.begin(), shape.end(), kDynmaicDims) == kDynamicImgSizeDynamciDimsNum) { + return true; + } else { + ErrorManager::GetInstance().ATCReportErrMessage("E10019"); + GELOGE(ge::PARAM_INVALID, + "--input_shape's shape is invalid, only height and width can be -1 when set --dynamic_image_size."); + return false; + } +} } // namespace multibatch } // namespace ge diff --git a/ge/graph/preprocess/multi_batch_options.h b/ge/graph/preprocess/multi_batch_options.h index 18f667ae..8563f2f1 100644 --- a/ge/graph/preprocess/multi_batch_options.h +++ b/ge/graph/preprocess/multi_batch_options.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,6 +70,28 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, /// @return 0: SUCCESS / others: INTERNAL_ERROR /// Status StampDynamicType(const OpDescPtr &op_desc); + +/// +/// @ingroup ge +/// @brief Check dynamic batch Shape. +/// @param [in] const vector &shape: data_shape to be checked. +/// @param [in] const string &data_name: cur data name. +/// @return 0: true/false +/// +bool CheckDynamicBatchShape(const vector &shape, const string &data_name); + +/// +/// @ingroup ge +/// @brief Check Dynamic image size shape. +/// @param [in] unordered_map> &shape_map: map of data_name and data_shape. +/// @param [in] const string &data_name: cur data name. +/// @param [in] const std::string &input_format: cur data format. +/// @param [in] const std::string &input_format: format of input. +/// @return 0: true/false +/// +bool CheckDynamicImageSizeShape(const vector &shape, const string &data_name, + const std::string &input_format); + } // namespace multibatch } // namespace ge -#endif // GE_GRAPH_PREPROCESS_MULTI_BATCH_OPTIONS_H_ +#endif // GE_GRAPH_PREPROCESS_MULTI_BATCH_OPTIONS_H_ diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt new file mode 100644 index 00000000..e78eee2e --- /dev/null +++ b/ge/host_cpu_engine/CMakeLists.txt @@ -0,0 +1,204 @@ +set(PROTO_LIST + "${METADEF_DIR}/proto/task.proto" +) + +protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) + +set(SRC_LIST + "engine/host_cpu_engine.cc" + "ops_kernel_store/host_cpu_ops_kernel_info.cc" + "ops_kernel_store/op/op_factory.cc" + "ops_kernel_store/op/host_op.cc" +) + +set(CPU_OPS_KERNEL_LIST + "ops_kernel_store/host_cpu_ops_kernel_builder.cc" +) + +############ libhost_cpu_engine.so ############ +add_library(host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(host_cpu_engine PRIVATE + -Werror +) + +target_include_directories(host_cpu_engine PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) + +target_link_libraries(host_cpu_engine PRIVATE + $ + -Wl,--no-as-needed + protobuf + c_sec + graph + register + slog + runtime + -Wl,--as-needed +) + +############ atcstub/libhost_cpu_engine.so ############ +add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(atc_host_cpu_engine PRIVATE + -Werror +) + +target_compile_definitions(atc_host_cpu_engine PRIVATE + COMPILE_OMG_PACKAGE +) + +target_include_directories(atc_host_cpu_engine PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) + +target_link_libraries(atc_host_cpu_engine PRIVATE + $ + -Wl,--no-as-needed + protobuf + c_sec + graph + register + slog + runtime_compile + -Wl,--as-needed +) + +set_target_properties(atc_host_cpu_engine PROPERTIES + OUTPUT_NAME host_cpu_engine + LIBRARY_OUTPUT_DIRECTORY atclib +) + +############ libhost_cpu_opskernel_builder.so ############ +add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) + +target_compile_options(host_cpu_opskernel_builder PRIVATE + -Werror +) + +target_include_directories(host_cpu_opskernel_builder PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) + +target_link_libraries(host_cpu_opskernel_builder PRIVATE + $ + -Wl,--no-as-needed + protobuf + c_sec + slog + graph + register + -Wl,--as-needed +) + +############ atclib/libhost_cpu_opskernel_builder.so ############ +add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) + +target_compile_options(atc_host_cpu_opskernel_builder PRIVATE + -Werror +) + +target_include_directories(atc_host_cpu_opskernel_builder PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) + +target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE + $ + -Wl,--no-as-needed + protobuf + c_sec + slog + graph + register + -Wl,--as-needed +) + +set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES + OUTPUT_NAME host_cpu_opskernel_builder + LIBRARY_OUTPUT_DIRECTORY atclib +) + +############ libhost_cpu_opskernel_builder.a ############ +add_library(host_cpu_opskernel_builder_static SHARED ${CPU_OPS_KERNEL_LIST}) + +target_compile_options(host_cpu_opskernel_builder_static PRIVATE + -Werror +) + +target_include_directories(host_cpu_opskernel_builder_static PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) + +target_link_libraries(host_cpu_opskernel_builder_static PRIVATE + $ + protobuf + c_sec +) + +############ install ############ +set(INSTALL_BASE_DIR "") +set(INSTALL_LIBRARY_DIR lib) + +install(TARGETS host_cpu_engine host_cpu_opskernel_builder OPTIONAL + LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} +) + +install(TARGETS atc_host_cpu_engine atc_host_cpu_opskernel_builder OPTIONAL + LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}/atclib +) diff --git a/ge/host_cpu_engine/common/constant/constant.h b/ge/host_cpu_engine/common/constant/constant.h index a3cabdc4..b9603b6a 100644 --- a/ge/host_cpu_engine/common/constant/constant.h +++ b/ge/host_cpu_engine/common/constant/constant.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/engine/host_cpu_engine.cc b/ge/host_cpu_engine/engine/host_cpu_engine.cc index 648e13b1..cdbad1ed 100644 --- a/ge/host_cpu_engine/engine/host_cpu_engine.cc +++ b/ge/host_cpu_engine/engine/host_cpu_engine.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/engine/host_cpu_engine.h b/ge/host_cpu_engine/engine/host_cpu_engine.h index ecafd98b..c8d5608f 100644 --- a/ge/host_cpu_engine/engine/host_cpu_engine.h +++ b/ge/host_cpu_engine/engine/host_cpu_engine.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/module.mk b/ge/host_cpu_engine/module.mk index 41de4503..2212b5ee 100644 --- a/ge/host_cpu_engine/module.mk +++ b/ge/host_cpu_engine/module.mk @@ -55,3 +55,63 @@ LOCAL_SRC_FILES := $(local_lib_src_files) LOCAL_C_INCLUDES := $(local_lib_inc_path) include ${BUILD_HOST_SHARED_LIBRARY} + +#compiler for host ops kernel builder +include $(CLEAR_VARS) +LOCAL_MODULE := libhost_cpu_opskernel_builder +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +LOCAL_SHARED_LIBRARIES := libprotobuf \ + libc_sec \ + libslog \ + libgraph \ + libregister \ + +LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc + +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_HOST_SHARED_LIBRARY} + +#compiler for host static lib +include $(CLEAR_VARS) +LOCAL_MODULE := libhost_cpu_opskernel_builder +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := libprotobuf \ + libgraph \ + libregister \ + +LOCAL_SHARED_LIBRARIES := libc_sec \ + libslog \ + +LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc + +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_HOST_STATIC_LIBRARY} + +#compiler for atc ops kernel builder +include $(CLEAR_VARS) +LOCAL_MODULE := atclib/libhost_cpu_opskernel_builder +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +LOCAL_SHARED_LIBRARIES := libprotobuf \ + libc_sec \ + libslog \ + libgraph \ + libregister \ + +LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc + +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_HOST_SHARED_LIBRARY} diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc new file mode 100644 index 00000000..adb252bc --- /dev/null +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc @@ -0,0 +1,102 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "host_cpu_ops_kernel_builder.h" +#include +#include "common/ge_inner_error_codes.h" +#include "ge/ge_api_types.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" +#include "framework/common/debug/ge_log.h" +#include "host_cpu_engine/common/constant/constant.h" +#include "register/ops_kernel_builder_registry.h" + +namespace ge { +namespace host_cpu { +REGISTER_OPS_KERNEL_BUILDER(kHostCpuOpKernelLibName, HostCpuOpsKernelBuilder); + +Status HostCpuOpsKernelBuilder::Finalize() { + return SUCCESS; +} +Status HostCpuOpsKernelBuilder::Initialize(const map &options) { + return SUCCESS; +} + +Status HostCpuOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { + OpDescPtr op_desc = ge_node.GetOpDesc(); + if (op_desc == nullptr) { + GELOGE(FAILED, "CalcOpRunningParam failed, as op desc is null"); + return FAILED; + } + + bool is_shape_unknown = false; + if (NodeUtils::GetNodeUnknownShapeStatus(ge_node, is_shape_unknown) == GRAPH_SUCCESS) { + if (is_shape_unknown) { + GELOGI("op:%s is unknown shape, does not need to calc output size.", ge_node.GetName().c_str()); + return SUCCESS; + } + } + + const string name = ge_node.GetName(); + const string type = ge_node.GetType(); + GELOGD("Calc op[%s:%s] running param, output size=%zu.", name.c_str(), type.c_str(), op_desc->GetOutputsSize()); + + for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { + GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast(i)); + Format format = output_tensor.GetFormat(); + DataType data_type = output_tensor.GetDataType(); + + int64_t mem_size = 0; + // If mem size has been set, no need reset. + if ((TensorUtils::GetSize(output_tensor, mem_size) == GRAPH_SUCCESS) && (mem_size > 0)) { + GELOGD("Op[%s:%s] out[%zu] mem size has been set, no need calc again, format=%s, data_type=%s, mem_size=%ld.", + name.c_str(), type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), mem_size); + continue; + } + + int64_t output_mem_size = 0; + GeShape output_shape = output_tensor.GetShape(); + if ((TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size) != GRAPH_SUCCESS) || + (output_mem_size < 0)) { + GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, mem_size=%ld, format=%s, data_type=%s.", + name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); + return FAILED; + } + GELOGI("Calc op[%s:%s] out[%zu] mem size is %ld, format=%s, data_type=%s.", + name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); + + TensorUtils::SetSize(output_tensor, output_mem_size); + if (op_desc->UpdateOutputDesc(static_cast(i), output_tensor) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Update op[%s:%s] out[%zu] desc failed, format=%s, data_type=%s.", name.c_str(), type.c_str(), i, + TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); + return FAILED; + } + } + + GELOGD("Calc op[%s:%s] running param success.", name.c_str(), type.c_str()); + return SUCCESS; +} + +Status HostCpuOpsKernelBuilder::GenerateTask(const Node &node, RunContext &context, vector &tasks) { + // no need to generate device task + return SUCCESS; +} +} // namespace host_cpu +} // namespace ge \ No newline at end of file diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h new file mode 100644 index 00000000..82375b9f --- /dev/null +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h @@ -0,0 +1,37 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ +#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ + +#include "common/opskernel/ops_kernel_builder.h" + +namespace ge { +namespace host_cpu { +class HostCpuOpsKernelBuilder : public OpsKernelBuilder { + public: + Status Initialize(const map &options) override; + + Status Finalize() override; + + Status CalcOpRunningParam(Node &node) override; + + Status GenerateTask(const Node &node, RunContext &context, std::vector &tasks) override; +}; +} // namespace host_cpu +} // namespace ge + +#endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc index 4e7be2d5..2d7798a4 100644 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -95,13 +95,13 @@ Status HostCpuOpsKernelInfoStore::CalcOpRunningParam(Node &ge_node) { GeShape output_shape = output_tensor.GetShape(); if ((TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size) != GRAPH_SUCCESS) || (output_mem_size < 0)) { - GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, mem_size=%ld, format=%s, data_type=%s.", name.c_str(), - type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, mem_size=%ld, format=%s, data_type=%s.", + name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); return FAILED; } - GELOGI("Calc op[%s:%s] out[%zu] mem size is %ld, format=%s, data_type=%s.", name.c_str(), type.c_str(), i, - output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + GELOGI("Calc op[%s:%s] out[%zu] mem size is %ld, format=%s, data_type=%s.", + name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); TensorUtils::SetSize(output_tensor, output_mem_size); diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h index 1202cc8a..d29e0c65 100644 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc index 472fca45..a6e00f4a 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc +++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.h b/ge/host_cpu_engine/ops_kernel_store/op/host_op.h index 757b96a6..0f560485 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op.h b/ge/host_cpu_engine/ops_kernel_store/op/op.h index c1e1619c..c094f080 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/op.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc index efe44f80..176ae579 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc +++ b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h index 92f627fd..3a235ffd 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/add_kernel.cc b/ge/host_kernels/add_kernel.cc index afef1c37..1c206018 100644 --- a/ge/host_kernels/add_kernel.cc +++ b/ge/host_kernels/add_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -138,7 +138,8 @@ Status AddKernel::AddCheck(const OpDescPtr &op_desc_ptr, const std::vectorGetOutputsSize() != kAddOutputSize)) { - GELOGW("The number of input for add must be %zu, output number must be %zu.", kAddInputSize, kAddOutputSize); + GELOGW("The number of input for add must be %zu, output number must be %zu.", kAddInputSize, + kAddOutputSize); return PARAM_INVALID; } // input vector elements must not be null diff --git a/ge/host_kernels/add_kernel.h b/ge/host_kernels/add_kernel.h old mode 100644 new mode 100755 index f8fd272e..70800b66 --- a/ge/host_kernels/add_kernel.h +++ b/ge/host_kernels/add_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/broadcast_args_kernel.cc b/ge/host_kernels/broadcast_args_kernel.cc index 545d4f8e..d8880db9 100644 --- a/ge/host_kernels/broadcast_args_kernel.cc +++ b/ge/host_kernels/broadcast_args_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,13 +41,12 @@ Status BroadcastArgsKernel::Compute(const OpDescPtr op_desc_ptr, const std::vect } // check input size bool size_check = - (op_desc_ptr->GetAllInputsDesc().size() != kBCastArgsInputsSize || input.size() != kBCastArgsInputsSize || - op_desc_ptr->GetAllOutputsDesc().size() != kBCastArgsOutputsSize); + (op_desc_ptr->GetAllInputsDesc().size() != kBCastArgsInputsSize || input.size() != kBCastArgsInputsSize || + op_desc_ptr->GetAllOutputsDesc().size() != kBCastArgsOutputsSize); if (size_check) { - GELOGW( - "input/output size error. InDesc size:%zu," - "OutDesc size:%zu, in size:%zu ", - op_desc_ptr->GetAllInputsDesc().size(), op_desc_ptr->GetAllOutputsDesc().size(), input.size()); + GELOGW("input/output size error. InDesc size:%zu," + "OutDesc size:%zu, in size:%zu ", + op_desc_ptr->GetAllInputsDesc().size(), op_desc_ptr->GetAllOutputsDesc().size(), input.size()); return NOT_CHANGED; } diff --git a/ge/host_kernels/broadcast_args_kernel.h b/ge/host_kernels/broadcast_args_kernel.h old mode 100644 new mode 100755 index 6d57976c..eb9a46f4 --- a/ge/host_kernels/broadcast_args_kernel.h +++ b/ge/host_kernels/broadcast_args_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/broadcast_gradient_args_kernel.cc b/ge/host_kernels/broadcast_gradient_args_kernel.cc index ed790dab..51ff4a4c 100644 --- a/ge/host_kernels/broadcast_gradient_args_kernel.cc +++ b/ge/host_kernels/broadcast_gradient_args_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "host_kernels/broadcast_gradient_args_kernel.h" #include diff --git a/ge/host_kernels/broadcast_gradient_args_kernel.h b/ge/host_kernels/broadcast_gradient_args_kernel.h old mode 100644 new mode 100755 index 8f183653..84764228 --- a/ge/host_kernels/broadcast_gradient_args_kernel.h +++ b/ge/host_kernels/broadcast_gradient_args_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/cast_kernel.cc b/ge/host_kernels/cast_kernel.cc index 106aa1c2..056081a1 100644 --- a/ge/host_kernels/cast_kernel.cc +++ b/ge/host_kernels/cast_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,21 +59,22 @@ Status CastKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorGetOutputDesc(0); GeTensorDesc op_desc_in = op_desc_ptr->GetInputDesc(0); auto src_data_type = op_desc_in.GetDataType(); - auto src_shape = op_desc_in.GetShape(); - auto src_format = op_desc_in.GetFormat(); - auto data_type = op_desc.GetDataType(); - auto data_shape = op_desc.GetShape(); + auto src_shape = op_desc_in.GetShape(); + auto src_format = op_desc_in.GetFormat(); + auto data_type = op_desc.GetDataType(); + auto data_shape = op_desc.GetShape(); auto data_format = op_desc.GetFormat(); - GELOGD( - "Current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " - "output format %s, shape %s, data type %s", - op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), - formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), - TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), - formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), - TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), - TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); + GELOGD("Current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " + "output format %s, shape %s, data type %s", op_desc_ptr->GetName().c_str(), + TypeUtils::FormatToSerialString(src_format).c_str(), + formats::ShapeToString(src_shape).c_str(), + TypeUtils::DataTypeToSerialString(src_data_type).c_str(), + TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), + formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), + TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), + TypeUtils::FormatToSerialString(data_format).c_str(), + formats::ShapeToString(data_shape).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); // const_weight_ptr->GetData().GetSize() == 0 is supported auto src_data_size = src_shape.GetShapeSize(); @@ -85,16 +86,17 @@ Status CastKernel::Compute(const OpDescPtr op_desc_ptr, const std::vector(src_data_size), src_data_type, data_type}; formats::TransResult trans_result; GELOGD("Trans data type from %s to %s, shape %s, data size %ld", - TypeUtils::DataTypeToSerialString(src_data_type).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str(), + TypeUtils::DataTypeToSerialString(src_data_type).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), formats::ShapeToString(src_shape).c_str(), src_data_size); if ((src_format != data_format) || (src_shape.GetDims() != data_shape.GetDims()) || (!formats::IsTransDataTypeSupport(cast_args))) { GELOGW("Transfer from data type %s to %s, format %s to %s, shape %s to %s is not supported", TypeUtils::DataTypeToSerialString(src_data_type).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), - TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(src_shape).c_str(), - formats::ShapeToString(data_shape).c_str()); + TypeUtils::DataTypeToSerialString(data_type).c_str(), + TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(data_format).c_str(), + formats::ShapeToString(src_shape).c_str(), formats::ShapeToString(data_shape).c_str()); return NOT_CHANGED; } if (!KernelUtils::CheckSizeForTransOp(const_weight_ptr, op_desc_ptr)) { @@ -104,8 +106,8 @@ Status CastKernel::Compute(const OpDescPtr op_desc_ptr, const std::vector(input[i + kConcatOffsetInputIndexOne]->GetData().data()); + reinterpret_cast(input[i + kConcatOffsetInputIndexOne]->GetData().data()); int64_t input_dim = input_shape[concat_dim]; // this index is valid, checked before if (input_dim > (INT64_MAX - offset)) { GELOGE(PARAM_INVALID, " %d and %ld addition can result in overflow!.", offset, input_dim); diff --git a/ge/host_kernels/concat_offset_kernel.h b/ge/host_kernels/concat_offset_kernel.h old mode 100644 new mode 100755 index b1e0958a..d2f9422b --- a/ge/host_kernels/concat_offset_kernel.h +++ b/ge/host_kernels/concat_offset_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/concat_v2_kernel.cc b/ge/host_kernels/concat_v2_kernel.cc index c46b4277..a9f0da81 100644 --- a/ge/host_kernels/concat_v2_kernel.cc +++ b/ge/host_kernels/concat_v2_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,12 +21,12 @@ #include "common/debug/log.h" #include "common/fp16_t.h" -#include "common/ge_inner_error_codes.h" #include "common/op/ge_op_utils.h" #include "framework/common/debug/ge_log.h" #include "host_kernels/kernel_utils.h" #include "graph/utils/type_utils.h" #include "inc/kernel_factory.h" +#include "framework/common/types.h" namespace ge { namespace { @@ -116,7 +116,8 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector &input, int &tidx, +Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &input, + int &tidx, ConstGeTensorPtr &tensor) { size_t input_size = input.size(); // N >= 2 and N + 1 >= 3 @@ -137,7 +138,7 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &i continue; } if (tensor == nullptr) { - tensor = input.at(i); // get first valid tensor with data + tensor = input.at(i); // get first valid tensor with data } } @@ -160,7 +161,7 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &i GE_CHECK_NOTNULL(tensor_axis); const int *axis = reinterpret_cast(tensor_axis->GetData().data()); GE_CHECK_NOTNULL(axis); - tidx = axis[0]; // [-rank(values), rank(values)) + tidx = axis[0]; // [-rank(values), rank(values)) int rank = static_cast(tensor->GetTensorDesc().GetShape().GetDimNum()); // rank if (tidx < 0) { tidx += rank; @@ -169,8 +170,8 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &i // 2. empty tensor only support case: [n],[m],[] // case: [[],[]] ,[[],[]] ,[] or other case when rank >=2 is not supported if (tidx < 0 || tidx >= rank || (has_empty_tensor && rank > kSupportEmptyTensorRank)) { - GELOGW("ConcatV2 info: tidx[%d]_rank[%d]_has_empty_tensor[bool:%d] cannot be supported, skip fold.", tidx, rank, - has_empty_tensor); + GELOGW("ConcatV2 info: tidx[%d]_rank[%d]_has_empty_tensor[bool:%d] cannot be supported, skip fold.", + tidx, rank, has_empty_tensor); return NOT_CHANGED; } diff --git a/ge/host_kernels/concat_v2_kernel.h b/ge/host_kernels/concat_v2_kernel.h old mode 100644 new mode 100755 index 353b7ed5..90f1899b --- a/ge/host_kernels/concat_v2_kernel.h +++ b/ge/host_kernels/concat_v2_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/dynamic_stitch_kernel.cc b/ge/host_kernels/dynamic_stitch_kernel.cc index c1245535..d26237f4 100644 --- a/ge/host_kernels/dynamic_stitch_kernel.cc +++ b/ge/host_kernels/dynamic_stitch_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -200,7 +200,7 @@ Status DynamicStitchKernel::StitchDataFollowIndices(int64_t data_unit, const vec dst_offset = input_indices[j] * data_unit; src_offset = j * data_unit; auto protected_size = - allowance < static_cast(SECUREC_MEM_MAX_LEN) ? allowance : static_cast(SECUREC_MEM_MAX_LEN); + allowance < static_cast(SECUREC_MEM_MAX_LEN) ? allowance : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(buf.get() + dst_offset, protected_size, input_data + src_offset, data_unit); if (ret != EOK) { GELOGW("Memory copy failed."); diff --git a/ge/host_kernels/dynamic_stitch_kernel.h b/ge/host_kernels/dynamic_stitch_kernel.h index 512c731b..2cca94e3 100644 --- a/ge/host_kernels/dynamic_stitch_kernel.h +++ b/ge/host_kernels/dynamic_stitch_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/empty_kernel.cc b/ge/host_kernels/empty_kernel.cc index a5e5fbcf..19e938ce 100644 --- a/ge/host_kernels/empty_kernel.cc +++ b/ge/host_kernels/empty_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,8 +43,8 @@ Status EmptyKernel::EmptyCheck(const OpDescPtr &op_desc_ptr, const std::vectorGetAllInputsDesc().size() != kEmptyInputsSize) || (input.size() != kEmptyInputsSize) || - (op_desc_ptr->GetAllOutputsDesc().size() != kEmptyOutputsSize)); + ((op_desc_ptr->GetAllInputsDesc().size() != kEmptyInputsSize) || (input.size() != kEmptyInputsSize) || + (op_desc_ptr->GetAllOutputsDesc().size() != kEmptyOutputsSize)); if (size_check) { GELOGW("Input/Output size error. InDesc size:%zu, OutDesc size:%zu, in size:%zu ", op_desc_ptr->GetAllInputsDesc().size(), op_desc_ptr->GetAllOutputsDesc().size(), input.size()); @@ -58,7 +58,8 @@ Status EmptyKernel::EmptyCheck(const OpDescPtr &op_desc_ptr, const std::vectorGetTensorDesc().GetShape().GetDimNum() > kShapeMaxDims) { - GELOGW("Check if the dimension is 1-D failed, dims:%zu", shape->GetTensorDesc().GetShape().GetDimNum()); + GELOGW("Check if the dimension is 1-D failed, dims:%zu", + shape->GetTensorDesc().GetShape().GetDimNum()); return PARAM_INVALID; } return SUCCESS; diff --git a/ge/host_kernels/empty_kernel.h b/ge/host_kernels/empty_kernel.h old mode 100644 new mode 100755 index bc426048..7fd2791c --- a/ge/host_kernels/empty_kernel.h +++ b/ge/host_kernels/empty_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/expanddims_kernel.cc b/ge/host_kernels/expanddims_kernel.cc index 15648573..f304fbdb 100644 --- a/ge/host_kernels/expanddims_kernel.cc +++ b/ge/host_kernels/expanddims_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,7 +50,8 @@ Status ExpanddimsKernel::Compute(const NodePtr &node_ptr) { GELOGI("Expanddims dimension kernel success."); return SUCCESS; } -Status ExpanddimsKernel::Compute(const ge::OpDescPtr op_desc_ptr, const std::vector &input, +Status ExpanddimsKernel::Compute(const ge::OpDescPtr op_desc_ptr, + const std::vector &input, std::vector &v_output) { GELOGI("Expanddims folding kernel in."); if (op_desc_ptr == nullptr) { diff --git a/ge/host_kernels/expanddims_kernel.h b/ge/host_kernels/expanddims_kernel.h old mode 100644 new mode 100755 index 4970d89c..77971a29 --- a/ge/host_kernels/expanddims_kernel.h +++ b/ge/host_kernels/expanddims_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/fill_kernel.cc b/ge/host_kernels/fill_kernel.cc index 27bcb9aa..4e3d4db5 100644 --- a/ge/host_kernels/fill_kernel.cc +++ b/ge/host_kernels/fill_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ #include "host_kernels/kernel_utils.h" #include "graph/utils/type_utils.h" #include "inc/kernel_factory.h" +#include "framework/common/types.h" namespace { const int kFillInputSize = 2; diff --git a/ge/host_kernels/fill_kernel.h b/ge/host_kernels/fill_kernel.h old mode 100644 new mode 100755 index a1b6b4ef..1a4546f2 --- a/ge/host_kernels/fill_kernel.h +++ b/ge/host_kernels/fill_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floordiv_kernel.cc b/ge/host_kernels/floordiv_kernel.cc index 5114122c..0574ca3b 100644 --- a/ge/host_kernels/floordiv_kernel.cc +++ b/ge/host_kernels/floordiv_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floordiv_kernel.h b/ge/host_kernels/floordiv_kernel.h old mode 100644 new mode 100755 index c8505731..d3dc3ff7 --- a/ge/host_kernels/floordiv_kernel.h +++ b/ge/host_kernels/floordiv_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floormod_kernel.cc b/ge/host_kernels/floormod_kernel.cc index 7ad746de..31e4e19b 100644 --- a/ge/host_kernels/floormod_kernel.cc +++ b/ge/host_kernels/floormod_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floormod_kernel.h b/ge/host_kernels/floormod_kernel.h old mode 100644 new mode 100755 index faa5c8e2..439fc0a6 --- a/ge/host_kernels/floormod_kernel.h +++ b/ge/host_kernels/floormod_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/gather_v2_kernel.cc b/ge/host_kernels/gather_v2_kernel.cc index 7413395a..e52b4534 100644 --- a/ge/host_kernels/gather_v2_kernel.cc +++ b/ge/host_kernels/gather_v2_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/gather_v2_kernel.h b/ge/host_kernels/gather_v2_kernel.h old mode 100644 new mode 100755 index 0bf4e3ee..17fcba59 --- a/ge/host_kernels/gather_v2_kernel.h +++ b/ge/host_kernels/gather_v2_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/greater_kernel.cc b/ge/host_kernels/greater_kernel.cc index f23eee2f..a245ec8d 100644 --- a/ge/host_kernels/greater_kernel.cc +++ b/ge/host_kernels/greater_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/greater_kernel.h b/ge/host_kernels/greater_kernel.h old mode 100644 new mode 100755 index 3697a8e8..6f136462 --- a/ge/host_kernels/greater_kernel.h +++ b/ge/host_kernels/greater_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,8 +37,8 @@ class GreaterKernel : public Kernel { Status GreaterCheck(const std::vector &input); const std::set greater_supported_type = { - DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, - DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE, + DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, + DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE, }; }; } // namespace ge diff --git a/ge/host_kernels/identity_kernel.cc b/ge/host_kernels/identity_kernel.cc index 16bd3138..702f5c93 100644 --- a/ge/host_kernels/identity_kernel.cc +++ b/ge/host_kernels/identity_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,10 +12,11 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #include "identity_kernel.h" #include "inc/kernel_factory.h" +#include "framework/common/types.h" namespace { constexpr uint32_t kInputDescIndex = 0; diff --git a/ge/host_kernels/identity_kernel.h b/ge/host_kernels/identity_kernel.h index 2164d880..84cd08bb 100644 --- a/ge/host_kernels/identity_kernel.h +++ b/ge/host_kernels/identity_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef GE_GRAPH_PASSES_FOLDING_KERNEL_IDENTITY_KERNEL_H_ #define GE_GRAPH_PASSES_FOLDING_KERNEL_IDENTITY_KERNEL_H_ diff --git a/ge/host_kernels/kernel_utils.cc b/ge/host_kernels/kernel_utils.cc old mode 100644 new mode 100755 index 9bcd9e3c..595f9517 --- a/ge/host_kernels/kernel_utils.cc +++ b/ge/host_kernels/kernel_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,7 +85,8 @@ bool KernelUtils::CheckFormatSupported(const NodePtr &node_ptr) { return true; } -bool KernelUtils::CheckSizeForTransOp(const ge::ConstGeTensorPtr &const_weight_ptr, const ge::OpDescPtr &op_desc_ptr) { +bool KernelUtils::CheckSizeForTransOp(const ge::ConstGeTensorPtr &const_weight_ptr, + const ge::OpDescPtr &op_desc_ptr) { if (const_weight_ptr == nullptr || op_desc_ptr == nullptr) { GELOGE(FAILED, "parameter invalid"); return false; diff --git a/ge/host_kernels/kernel_utils.h b/ge/host_kernels/kernel_utils.h old mode 100644 new mode 100755 index 17b645aa..c9c90634 --- a/ge/host_kernels/kernel_utils.h +++ b/ge/host_kernels/kernel_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ class KernelUtils { * @param [out] output the tensor for save sequence of numbers * @author */ - template + template static Status GenData(const int64_t data_num, const T value, const GeTensorPtr &output) { if (data_num > 0) { if (!CheckInt64MulOverflow(data_num, static_cast(sizeof(T)))) { @@ -70,12 +70,12 @@ class KernelUtils { } /** - * Calculate dimension - * @param [in] dims save the tensor of the dimension - * @param [in] vec_dim results of each dimension - * @param [out] data_num total size of data - * @author - */ + * Calculate dimension + * @param [in] dims save the tensor of the dimension + * @param [in] vec_dim results of each dimension + * @param [out] data_num total size of data + * @author + */ template static Status CalcDims(const ConstGeTensorPtr dims, std::vector &vec_dim, int64_t &data_num) { data_num = 1; diff --git a/ge/host_kernels/maximum_kernel.cc b/ge/host_kernels/maximum_kernel.cc index aca4ec2b..2ced113f 100644 --- a/ge/host_kernels/maximum_kernel.cc +++ b/ge/host_kernels/maximum_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/maximum_kernel.h b/ge/host_kernels/maximum_kernel.h old mode 100644 new mode 100755 index feaa91e7..d7e69f59 --- a/ge/host_kernels/maximum_kernel.h +++ b/ge/host_kernels/maximum_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/mul_kernel.cc b/ge/host_kernels/mul_kernel.cc index 8dbe83a5..b01a5c79 100644 --- a/ge/host_kernels/mul_kernel.cc +++ b/ge/host_kernels/mul_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/mul_kernel.h b/ge/host_kernels/mul_kernel.h old mode 100644 new mode 100755 index e7c74c41..2d06f676 --- a/ge/host_kernels/mul_kernel.h +++ b/ge/host_kernels/mul_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/pack_kernel.cc b/ge/host_kernels/pack_kernel.cc index 9b62a582..476005ef 100644 --- a/ge/host_kernels/pack_kernel.cc +++ b/ge/host_kernels/pack_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "host_kernels/pack_kernel.h" #include @@ -28,6 +27,7 @@ #include "host_kernels/kernel_utils.h" #include "graph/utils/type_utils.h" #include "inc/kernel_factory.h" +#include "framework/common/types.h" namespace { const int64_t kShapeItemNumMAX = 2000000000; @@ -79,7 +79,8 @@ Status PackKernel::ValidateKernelParams(const ge::OpDescPtr &op_desc_ptr, return NOT_CHANGED; } if (input.size() != static_cast(n_)) { - GELOGW("The number of input for Pack should be %d, in fact it is %ld ", static_cast(n_), input.size()); + GELOGW("The number of input for Pack should be %d, in fact it is %ld ", static_cast(n_), + input.size()); return PARAM_INVALID; } data_type_ = op_desc_ptr->GetInputDesc(0).GetDataType(); @@ -166,7 +167,8 @@ void PackKernel::ExpandDims(const int64_t axis, const std::vector &input, +Status PackKernel::CopyOutputData(const GeShape &final_shape, + const std::vector &input, ge::GeTensorPtr &output_ptr) { output_ptr->MutableTensorDesc().SetShape(final_shape); output_ptr->MutableTensorDesc().SetDataType(DataType(data_type_)); @@ -204,8 +206,8 @@ Status PackKernel::CopyOutputData(const GeShape &final_shape, const std::vector< for (int64_t j = 0; j < n_; j++) { // input range already check before. Range is [0,n_). const uint8_t *in_data = input[j]->GetData().data(); - auto ret = - memcpy_s(buf.get() + dst_offset, output_size * data_size - dst_offset, in_data + src_offset, data_size * unit); + auto ret = memcpy_s(buf.get() + dst_offset, output_size * data_size - dst_offset, in_data + src_offset, + data_size * unit); if (ret != EOK) { GELOGW("Memory copy failed."); return NOT_CHANGED; diff --git a/ge/host_kernels/pack_kernel.h b/ge/host_kernels/pack_kernel.h old mode 100644 new mode 100755 index 708e46c3..87b77a66 --- a/ge/host_kernels/pack_kernel.h +++ b/ge/host_kernels/pack_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_FOLDING_KERNEL_PACK_KERNEL_H_ #define GE_GRAPH_PASSES_FOLDING_KERNEL_PACK_KERNEL_H_ @@ -31,7 +30,6 @@ class PackKernel : public Kernel { public: Status Compute(const ge::OpDescPtr op_desc_ptr, const std::vector &input, std::vector &v_output) override; - private: Status ValidateKernelParams(const ge::OpDescPtr &op_desc_ptr, const std::vector &input); Status ValidateInputs(const ge::OpDescPtr &op_desc_ptr, const std::vector &input); diff --git a/ge/host_kernels/permute_kernel.cc b/ge/host_kernels/permute_kernel.cc old mode 100644 new mode 100755 index 24bed54d..327c94f8 --- a/ge/host_kernels/permute_kernel.cc +++ b/ge/host_kernels/permute_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ #include "host_kernels/kernel_utils.h" #include "framework/common/ge_inner_error_codes.h" + namespace ge { namespace { const char *const kAttrOrder = "order"; @@ -74,21 +75,21 @@ Status PermuteKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorGetOutputDesc(0); GeTensorDesc op_desc_in = op_desc_ptr->GetInputDesc(0); auto src_format = op_desc_in.GetFormat(); - auto src_shape = op_desc_in.GetShape().GetDims(); + auto src_shape = op_desc_in.GetShape().GetDims(); auto src_data_type = op_desc_in.GetDataType(); auto data_shape = op_desc.GetShape().GetDims(); auto data_format = op_desc.GetFormat(); auto data_type = op_desc.GetDataType(); GELOGD( - "current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " - "output format %s, shape %s, data type %s", - op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), - formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), - TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), - formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), - TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), - TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); + "current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " + "output format %s, shape %s, data type %s", + op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), + formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), + TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), + formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), + TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), + TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); vector perm_list; if (!AttrUtils::GetListInt(op_desc_ptr, kAttrOrder, perm_list) && diff --git a/ge/host_kernels/permute_kernel.h b/ge/host_kernels/permute_kernel.h old mode 100644 new mode 100755 index b022abd7..589ea49e --- a/ge/host_kernels/permute_kernel.h +++ b/ge/host_kernels/permute_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/range_kernel.cc b/ge/host_kernels/range_kernel.cc index 4ce3725d..32a72b47 100644 --- a/ge/host_kernels/range_kernel.cc +++ b/ge/host_kernels/range_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/range_kernel.h b/ge/host_kernels/range_kernel.h old mode 100644 new mode 100755 index 50b1c232..e58530d0 --- a/ge/host_kernels/range_kernel.h +++ b/ge/host_kernels/range_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/rank_kernel.cc b/ge/host_kernels/rank_kernel.cc old mode 100644 new mode 100755 index 7fb92039..1de9478c --- a/ge/host_kernels/rank_kernel.cc +++ b/ge/host_kernels/rank_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include "framework/common/debug/ge_log.h" #include "inc/kernel_factory.h" #include "omg/omg_inner_types.h" +#include "framework/common/types.h" namespace { const size_t kRankInputSize = 1; diff --git a/ge/host_kernels/rank_kernel.h b/ge/host_kernels/rank_kernel.h old mode 100644 new mode 100755 index 0de4960c..80c0bb7d --- a/ge/host_kernels/rank_kernel.h +++ b/ge/host_kernels/rank_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reduce_prod_kernel.cc b/ge/host_kernels/reduce_prod_kernel.cc index 0a3fad72..4837a921 100644 --- a/ge/host_kernels/reduce_prod_kernel.cc +++ b/ge/host_kernels/reduce_prod_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -159,7 +159,7 @@ void ReduceProdKernel::ShapeCal(const ge::OpDescPtr &op_desc_ptr, const std::vec vector data_dims = data_tensor->GetTensorDesc().GetShape().GetDims(); int32_t data_dim_size = static_cast(data_dims.size()); const uint8_t *axis_data = axis_tensor->GetData().GetData(); - GE_CHECK_NOTNULL_EXEC(axis_data, return ); + GE_CHECK_NOTNULL_EXEC(axis_data, return); int32_t axis = *(const_cast(reinterpret_cast(axis_data))); bool keep_dims = false; if (!AttrUtils::GetBool(op_desc_ptr, "keep_dims", keep_dims)) { diff --git a/ge/host_kernels/reduce_prod_kernel.h b/ge/host_kernels/reduce_prod_kernel.h old mode 100644 new mode 100755 index 326dd2f5..ccf33668 --- a/ge/host_kernels/reduce_prod_kernel.h +++ b/ge/host_kernels/reduce_prod_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reformat_kernel.cc b/ge/host_kernels/reformat_kernel.cc index c2dd1e17..c1942983 100644 --- a/ge/host_kernels/reformat_kernel.cc +++ b/ge/host_kernels/reformat_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,7 +85,8 @@ Status ReFormatKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorSetData(input.at(0)->GetData()) != GRAPH_SUCCESS, GELOGW("set data failed"); + GE_IF_BOOL_EXEC(output_ptr->SetData(input.at(0)->GetData()) != GRAPH_SUCCESS, + GELOGW("set data failed"); return NOT_CHANGED); v_output.emplace_back(output_ptr); GELOGD("ReFormatKernel success."); diff --git a/ge/host_kernels/reformat_kernel.h b/ge/host_kernels/reformat_kernel.h old mode 100644 new mode 100755 index e3d49acf..770b90b3 --- a/ge/host_kernels/reformat_kernel.h +++ b/ge/host_kernels/reformat_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reshape_kernel.cc b/ge/host_kernels/reshape_kernel.cc index dc7e4bb8..7c4f58f6 100644 --- a/ge/host_kernels/reshape_kernel.cc +++ b/ge/host_kernels/reshape_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reshape_kernel.h b/ge/host_kernels/reshape_kernel.h old mode 100644 new mode 100755 index c0100e51..37b12db9 --- a/ge/host_kernels/reshape_kernel.h +++ b/ge/host_kernels/reshape_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/rsqrt_kernel.cc b/ge/host_kernels/rsqrt_kernel.cc old mode 100644 new mode 100755 index 5184d885..74c78787 --- a/ge/host_kernels/rsqrt_kernel.cc +++ b/ge/host_kernels/rsqrt_kernel.cc @@ -1,5 +1,5 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd +/** + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "host_kernels/rsqrt_kernel.h" #include @@ -28,6 +27,7 @@ #include "host_kernels/kernel_utils.h" #include "inc/kernel_factory.h" #include "common/math/math_util.h" +#include "framework/common/types.h" namespace ge { namespace { @@ -51,13 +51,13 @@ Status ZeroCheck(T x, const DataType &data_type) { } return SUCCESS; } -#define SET_RSQRT_CASE(DTYPE, TYPE) \ - case (DTYPE): \ - ret = RsqrtKernel::RsqrtCompute(input_ptr, output_ptr); \ +#define SET_RSQRT_CASE(DTYPE, TYPE) \ + case (DTYPE): \ + ret = RsqrtKernel::RsqrtCompute(input_ptr, output_ptr); \ break; } // namespace -template +template Status RsqrtKernel::RsqrtCompute(ConstGeTensorPtr &input_tensor_ptr, GeTensorPtr &output_tensor_ptr) { GE_CHECK_NOTNULL(input_tensor_ptr); GE_CHECK_NOTNULL(output_tensor_ptr); @@ -65,12 +65,12 @@ Status RsqrtKernel::RsqrtCompute(ConstGeTensorPtr &input_tensor_ptr, GeTensorPtr size_t data_count = data_size / sizeof(T); auto data_type = input_tensor_ptr->GetTensorDesc().GetDataType(); if (data_count > 0) { - unique_ptr buf(new (std::nothrow) T[data_count]()); + unique_ptr buf(new(std::nothrow) T[data_count]()); if (buf == nullptr) { GELOGW("New buf failed"); return NOT_CHANGED; } - auto ptr = const_cast(reinterpret_cast(input_tensor_ptr->GetData().data())); + auto ptr = const_cast(reinterpret_cast(input_tensor_ptr->GetData().data())); for (size_t i = 0; i < data_count; i++) { if (ZeroCheck(*(ptr + i), data_type) != SUCCESS) { GELOGW("Rsqrt: The input data can not less than or equal to zero, rsqrt folding failed."); @@ -78,18 +78,18 @@ Status RsqrtKernel::RsqrtCompute(ConstGeTensorPtr &input_tensor_ptr, GeTensorPtr } switch (data_type) { case DT_FLOAT16: { - double val = static_cast(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); + double val = static_cast(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); double drSqrt = 1.0 / std::sqrt(val); buf[i] = drSqrt; break; } - case DT_FLOAT: { - float denominator = std::sqrt(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); - buf[i] = static_cast(1 / denominator); + case DT_FLOAT:{ + float denominator = std::sqrt(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); + buf[i] = static_cast(1 / denominator); break; } case DT_DOUBLE: { - double denominator = std::sqrt(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); + double denominator = std::sqrt(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); buf[i] = static_cast(1 / denominator); break; } @@ -99,8 +99,7 @@ Status RsqrtKernel::RsqrtCompute(ConstGeTensorPtr &input_tensor_ptr, GeTensorPtr } } GE_IF_BOOL_EXEC(output_tensor_ptr->SetData(reinterpret_cast(buf.get()), data_size) != GRAPH_SUCCESS, - GELOGW("Set data failed"); - return NOT_CHANGED); + GELOGW("Set data failed"); return NOT_CHANGED); output_tensor_ptr->MutableTensorDesc().SetDataType(data_type); output_tensor_ptr->MutableTensorDesc().SetShape(input_tensor_ptr->GetTensorDesc().GetShape()); } diff --git a/ge/host_kernels/rsqrt_kernel.h b/ge/host_kernels/rsqrt_kernel.h old mode 100644 new mode 100755 index 02b08252..e3733521 --- a/ge/host_kernels/rsqrt_kernel.h +++ b/ge/host_kernels/rsqrt_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,9 +27,8 @@ class RsqrtKernel : public Kernel { public: Status Compute(const ge::OpDescPtr op_desc_ptr, const std::vector &input, std::vector &v_output) override; - private: - template + template Status RsqrtCompute(ConstGeTensorPtr &input_tensor_ptr, GeTensorPtr &output_tensor_ptr); }; } // namespace ge diff --git a/ge/host_kernels/shape_kernel.cc b/ge/host_kernels/shape_kernel.cc index 2f20fb24..ecb0e082 100644 --- a/ge/host_kernels/shape_kernel.cc +++ b/ge/host_kernels/shape_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include "host_kernels/kernel_utils.h" #include "graph/passes/pass_utils.h" #include "inc/kernel_factory.h" +#include "framework/common/types.h" namespace ge { namespace { diff --git a/ge/host_kernels/shape_kernel.h b/ge/host_kernels/shape_kernel.h old mode 100644 new mode 100755 index 8e8791e5..6ef416bf --- a/ge/host_kernels/shape_kernel.h +++ b/ge/host_kernels/shape_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/shape_n_kernel.cc b/ge/host_kernels/shape_n_kernel.cc index 33b878cf..67d2eeff 100644 --- a/ge/host_kernels/shape_n_kernel.cc +++ b/ge/host_kernels/shape_n_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include "host_kernels/kernel_utils.h" #include "graph/passes/pass_utils.h" #include "inc/kernel_factory.h" +#include "framework/common/types.h" namespace ge { Status ShapeNKernel::Compute(const NodePtr &node, std::vector &v_output) { @@ -48,7 +49,7 @@ Status ShapeNKernel::Compute(const NodePtr &node, std::vector &v_ou } vector dims = input_desc->GetShape().GetDims(); Status ret = - PassUtils::ConstructTensorDescWithData(op_desc->GetOutputDesc(static_cast(i)), dims, v_output); + PassUtils::ConstructTensorDescWithData(op_desc->GetOutputDesc(static_cast(i)), dims, v_output); if (ret != SUCCESS) { GELOGE(PARAM_INVALID, "ShapeN kernel construct tensor desc failed, i:%zu", i); return ret; diff --git a/ge/host_kernels/shape_n_kernel.h b/ge/host_kernels/shape_n_kernel.h old mode 100644 new mode 100755 index 55829a39..51fd9393 --- a/ge/host_kernels/shape_n_kernel.h +++ b/ge/host_kernels/shape_n_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/size_kernel.cc b/ge/host_kernels/size_kernel.cc index 65bb21fc..caa5febc 100644 --- a/ge/host_kernels/size_kernel.cc +++ b/ge/host_kernels/size_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/size_kernel.h b/ge/host_kernels/size_kernel.h old mode 100644 new mode 100755 index 3a309bc7..43a00f2f --- a/ge/host_kernels/size_kernel.h +++ b/ge/host_kernels/size_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/slice_d_kernel.cc b/ge/host_kernels/slice_d_kernel.cc index 3b8fd0a0..b8572290 100644 --- a/ge/host_kernels/slice_d_kernel.cc +++ b/ge/host_kernels/slice_d_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -146,11 +146,11 @@ Status SliceDKernel::Compute(const OpDescPtr op_desc_ptr, const std::vector(tmp_value); - + if (ge::CheckIntMulOverflow(layer_width, layer_height) != SUCCESS) { GELOGW("Failed to get list param."); return PARAM_INVALID; diff --git a/ge/host_kernels/ssd_prior_box_kernel.h b/ge/host_kernels/ssd_prior_box_kernel.h old mode 100644 new mode 100755 index 96de2b85..0ebf221d --- a/ge/host_kernels/ssd_prior_box_kernel.h +++ b/ge/host_kernels/ssd_prior_box_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/strided_slice_kernel.cc b/ge/host_kernels/strided_slice_kernel.cc index 13c61666..e8fb658a 100644 --- a/ge/host_kernels/strided_slice_kernel.cc +++ b/ge/host_kernels/strided_slice_kernel.cc @@ -1,31 +1,32 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #include "host_kernels/strided_slice_kernel.h" +#include #include "common/fp16_t.h" #include "common/ge_inner_error_codes.h" #include "common/math/math_util.h" #include "common/op/ge_op_utils.h" #include "external/graph/types.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/types.h" #include "graph/utils/type_utils.h" #include "host_kernels/kernel_utils.h" #include "inc/kernel_factory.h" -#include namespace ge { namespace { @@ -208,7 +209,7 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector(i); bool new_axis_mask_flag = - (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); if (new_axis_mask_flag) { output_dims.push_back(1); input_dims.push_back(1); @@ -255,7 +256,7 @@ void StridedSliceKernel::ExpandDimsWithNewAxis(const ConstGeTensorPtr &begin_ten for (size_t i = 0; i < final_dim_num; i++) { auto i_temp = static_cast(i); bool new_axis_mask_flag = - (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); if (new_axis_mask_flag) { x_dims.insert(x_dims.begin() + i, 1); } @@ -266,9 +267,9 @@ Status StridedSliceKernel::MaskCal(const size_t i, int64_t &begin_i, int64_t &en bool begin_mask_flag = (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_BEGIN_MASK)) & (1 << i_temp)); bool end_mask_flag = (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_END_MASK)) & (1 << i_temp)); bool ellipsis_mask_flag = - (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK)) & (1 << i_temp)); + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK)) & (1 << i_temp)); bool shrink_mask_flag = - (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK)) & (1 << i_temp)); + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK)) & (1 << i_temp)); if (shrink_mask_flag) { begin_i = (begin_i < 0 ? (dim_i + begin_i) : begin_i); FMK_INT32_ADDCHECK(begin_i, kNumOne) diff --git a/ge/host_kernels/strided_slice_kernel.h b/ge/host_kernels/strided_slice_kernel.h old mode 100644 new mode 100755 index 5d130cd7..b8d11477 --- a/ge/host_kernels/strided_slice_kernel.h +++ b/ge/host_kernels/strided_slice_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/sub_kernel.cc b/ge/host_kernels/sub_kernel.cc index 70a14c9f..deb36cb3 100644 --- a/ge/host_kernels/sub_kernel.cc +++ b/ge/host_kernels/sub_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/sub_kernel.h b/ge/host_kernels/sub_kernel.h old mode 100644 new mode 100755 index 4143980c..32ab7084 --- a/ge/host_kernels/sub_kernel.h +++ b/ge/host_kernels/sub_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,6 @@ class SubKernel : public Kernel { public: Status Compute(const ge::OpDescPtr attr, const std::vector &input, vector &v_output) override; - private: std::vector y_data_int8_t_; std::vector y_data_int16_t_; diff --git a/ge/host_kernels/transdata_kernel.cc b/ge/host_kernels/transdata_kernel.cc index c5c9da6e..2b16b075 100644 --- a/ge/host_kernels/transdata_kernel.cc +++ b/ge/host_kernels/transdata_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ #include "graph/utils/type_utils.h" #include "inc/kernel_factory.h" + namespace ge { namespace { const size_t kTransdataInputSize = 1; @@ -82,15 +83,15 @@ Status TransdataKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorGetFormat(); const auto &data_type = op_desc->GetDataType(); GELOGD( - "current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " - "output format %s, shape %s, data type %s", - op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), - formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), - TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), - formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), - TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), - TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); + "current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " + "output format %s, shape %s, data type %s", + op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), + formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), + TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), + formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), + TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), + TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); const uint8_t *src_data = const_weight_ptr->GetData().data(); const formats::TransArgs trans_args{src_data, src_format, data_format, src_shape, data_shape, src_data_type}; diff --git a/ge/host_kernels/transdata_kernel.h b/ge/host_kernels/transdata_kernel.h old mode 100644 new mode 100755 index e4cf9b39..1d212cf5 --- a/ge/host_kernels/transdata_kernel.h +++ b/ge/host_kernels/transdata_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/transpose_kernel.cc b/ge/host_kernels/transpose_kernel.cc old mode 100644 new mode 100755 index 3f55539e..03d112aa --- a/ge/host_kernels/transpose_kernel.cc +++ b/ge/host_kernels/transpose_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -82,15 +82,15 @@ Status TransposeKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorGetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), - formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), - TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), - formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), - TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), - TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); + "current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " + "output format %s, shape %s, data type %s", + op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), + formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), + TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), + formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), + TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), + TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); ConstGeTensorPtr tensor_perm_ptr = input[kTransposeInputPerm]; DataType data_dtype = tensor_perm_ptr->GetTensorDesc().GetDataType(); diff --git a/ge/host_kernels/transpose_kernel.h b/ge/host_kernels/transpose_kernel.h old mode 100644 new mode 100755 index bb073c15..9e7c54d7 --- a/ge/host_kernels/transpose_kernel.h +++ b/ge/host_kernels/transpose_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/unpack_kernel.cc b/ge/host_kernels/unpack_kernel.cc old mode 100644 new mode 100755 index fbfd9e16..1c28151f --- a/ge/host_kernels/unpack_kernel.cc +++ b/ge/host_kernels/unpack_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -90,3 +90,4 @@ Status UnpackKernel::Compute(const OpDescPtr attr, const std::vector &input, + virtual Status Compute(const ge::OpDescPtr attr, + const std::vector &input, std::vector &v_output) override; }; } // namespace ge #endif // GE_GRAPH_PASSES_FOLDING_KERNEL_UNPACK_KERNEL_H_ + diff --git a/ge/host_kernels/unsqueeze_kernel.cc b/ge/host_kernels/unsqueeze_kernel.cc index d66a3e2c..4ceaba3f 100644 --- a/ge/host_kernels/unsqueeze_kernel.cc +++ b/ge/host_kernels/unsqueeze_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/unsqueeze_kernel.h b/ge/host_kernels/unsqueeze_kernel.h index c676586f..510a1ffa 100644 --- a/ge/host_kernels/unsqueeze_kernel.h +++ b/ge/host_kernels/unsqueeze_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc index cbb556e2..bf5af73b 100644 --- a/ge/hybrid/common/npu_memory_allocator.cc +++ b/ge/hybrid/common/npu_memory_allocator.cc @@ -70,8 +70,8 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { buffer = malloc(allocate_size); } else { buffer = MemManager::Instance() - .CachingInstance(RT_MEMORY_HBM) - .Malloc(allocate_size, reinterpret_cast(try_reuse_addr), device_id_); + .CachingInstance(RT_MEMORY_HBM) + .Malloc(allocate_size, reinterpret_cast(try_reuse_addr), device_id_); } if (buffer == nullptr) { GELOGE(MEMALLOC_FAILED, "Failed to malloc memory, device_id = %u, size = %zu", device_id_, allocate_size); @@ -117,4 +117,4 @@ void NpuMemoryAllocator::DestroyAllocator() { allocators_.erase(device_id); } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/ge/hybrid/common/npu_memory_allocator.h b/ge/hybrid/common/npu_memory_allocator.h index 99c01b34..55cb13ad 100644 --- a/ge/hybrid/common/npu_memory_allocator.h +++ b/ge/hybrid/common/npu_memory_allocator.h @@ -50,7 +50,7 @@ class NpuMemoryAllocator { static NpuMemoryAllocator *GetAllocator(uint32_t device_id); static NpuMemoryAllocator *GetAllocator(); static void DestroyAllocator(); - static AllocationAttr *AttrWithDefaultPadding() { + static AllocationAttr* AttrWithDefaultPadding() { static AllocationAttr attr(kDefaultPadding, nullptr); return &attr; } @@ -59,7 +59,6 @@ class NpuMemoryAllocator { void Deallocate(void *data, MemStorageType mem_type = HBM); static constexpr int kDefaultPadding = 32; - private: explicit NpuMemoryAllocator(uint32_t device_id); uint32_t device_id_; @@ -69,4 +68,4 @@ class NpuMemoryAllocator { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_COMMON_MEMORY_ALLOCATOR_H_ +#endif // GE_HYBRID_COMMON_MEMORY_ALLOCATOR_H_ diff --git a/ge/hybrid/common/tensor_value.cc b/ge/hybrid/common/tensor_value.cc index 11a96d13..4f1935b8 100644 --- a/ge/hybrid/common/tensor_value.cc +++ b/ge/hybrid/common/tensor_value.cc @@ -61,9 +61,11 @@ TensorBuffer::~TensorBuffer() { } } -TensorValue::TensorValue(std::shared_ptr buffer) : buffer_(std::move(buffer)) {} +TensorValue::TensorValue(std::shared_ptr buffer) : buffer_(std::move(buffer)) { +} -TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_size_(size) {} +TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_size_(size) { +} TensorValue::~TensorValue() { Destroy(); } diff --git a/ge/hybrid/common/tensor_value.h b/ge/hybrid/common/tensor_value.h index d720e0e0..9f68cf2c 100644 --- a/ge/hybrid/common/tensor_value.h +++ b/ge/hybrid/common/tensor_value.h @@ -29,18 +29,23 @@ class AllocationAttr; class TensorBuffer { public: - static std::unique_ptr Create(NpuMemoryAllocator *allocator, size_t size, + static std::unique_ptr Create(NpuMemoryAllocator *allocator, + size_t size, AllocationAttr *attr = nullptr); static std::unique_ptr Create(void *buffer, size_t size); TensorBuffer(const TensorBuffer &) = delete; - TensorBuffer &operator=(const TensorBuffer &) = delete; + TensorBuffer &operator = (const TensorBuffer &) = delete; ~TensorBuffer(); - void *GetData() { return buffer_; } + void *GetData() { + return buffer_; + } - size_t GetSize() const { return size_; } + size_t GetSize() const { + return size_; + } private: TensorBuffer(NpuMemoryAllocator *allocator, void *buffer, size_t size, MemStorageType mem_type = HBM); @@ -63,13 +68,17 @@ class TensorValue { void Destroy(); - bool IsEmpty() { return ref_buffer_ == nullptr && buffer_ == nullptr; } + bool IsEmpty() { + return ref_buffer_ == nullptr && buffer_ == nullptr; + } const void *GetData() const; std::string DebugString() const; - void SetName(const std::string &name) { name_ = name; } + void SetName(const std::string &name) { + name_ = name; + } void *MutableData(); diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index 37822039..05ed1157 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -53,32 +53,31 @@ struct GraphExecutionContext { mutable std::mutex mu; }; -#define RECORD_PROFILING_EVENT(context, evt_type, fmt, category, node_name, ...) \ - do { \ - if ((context != nullptr) && (context)->profiler != nullptr) { \ - if (node_name != nullptr) { \ - context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GetTid(), node_name, category, \ - ##__VA_ARGS__); \ - } else { \ - context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GetTid(), category, ##__VA_ARGS__); \ - } \ - } \ - } while (0) +#define RECORD_PROFILING_EVENT(context, evt_type, fmt, category, node_name, ...) \ +do { \ + if ((context != nullptr) && (context)->profiler != nullptr) { \ + if (node_name != nullptr) { \ + context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GetTid(), node_name, category, ##__VA_ARGS__);\ + } else { \ + context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GetTid(), category, ##__VA_ARGS__); \ + }\ + } \ +} while (0) #define RECORD_MODEL_EXECUTION_EVENT(context, fmt, ...) \ RECORD_PROFILING_EVENT((context), HybridProfiler::GENERAL, fmt, "ModelExecutor", nullptr, ##__VA_ARGS__) #define RECORD_SHAPE_INFERENCE_EVENT(context, name, fmt, ...) \ - RECORD_PROFILING_EVENT((context), HybridProfiler::SHAPE_INFERENCE, fmt, "ShapeInference", name, ##__VA_ARGS__) + RECORD_PROFILING_EVENT((context), HybridProfiler::SHAPE_INFERENCE, fmt, "ShapeInference", name, ##__VA_ARGS__) #define RECORD_COMPILE_EVENT(context, name, fmt, ...) \ - RECORD_PROFILING_EVENT((context), HybridProfiler::COMPILE, fmt, "Compilation", name, ##__VA_ARGS__) + RECORD_PROFILING_EVENT((context), HybridProfiler::COMPILE, fmt, "Compilation", name, ##__VA_ARGS__) #define RECORD_EXECUTION_EVENT(context, name, fmt, ...) \ - RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name, ##__VA_ARGS__) + RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name, ##__VA_ARGS__) #define RECORD_CALLBACK_EVENT(context, name, fmt, ...) \ - RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACK, fmt, "Callback", name, ##__VA_ARGS__) + RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACK, fmt, "Callback", name, ##__VA_ARGS__) } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_ +#endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_ diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 7f650017..6e93b7e4 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -25,7 +25,9 @@ namespace hybrid { namespace { int kDataOutputIndex = 0; } -HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) : model_(model), run_flag_(false) {} +HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) + : model_(model), run_flag_(false) { +} HybridModelAsyncExecutor::~HybridModelAsyncExecutor() { if (stream_ != nullptr) { @@ -33,9 +35,13 @@ HybridModelAsyncExecutor::~HybridModelAsyncExecutor() { } } -void HybridModelAsyncExecutor::SetDeviceId(uint32_t device_id) { device_id_ = device_id; } +void HybridModelAsyncExecutor::SetDeviceId(uint32_t device_id) { + device_id_ = device_id; +} -void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) { model_id_ = model_id; } +void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) { + model_id_ = model_id; +} Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr &data) { GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL, @@ -51,7 +57,9 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr &lis run_flag_ = true; listener_ = listener; - future_ = std::async([&]() -> Status { return RunInternal(); }); + future_ = std::async([&]() -> Status { + return RunInternal(); + }); GE_CHK_BOOL_RET_STATUS(future_.valid(), INTERNAL_ERROR, "Failed to start."); GELOGD("HybridModelExecutor::Start successfully"); @@ -73,11 +81,11 @@ Status HybridModelAsyncExecutor::Stop() { } Status HybridModelAsyncExecutor::Init() { - data_inputer_ = std::unique_ptr(new (std::nothrow) DataInputer()); + data_inputer_ = std::unique_ptr(new(std::nothrow) DataInputer()); GE_CHECK_NOTNULL(data_inputer_); GE_CHK_RT_RET(rtStreamCreate(&stream_, RT_STREAM_PRIORITY_DEFAULT)); - executor_ = std::unique_ptr(new (std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); + executor_ = std::unique_ptr(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); GE_CHECK_NOTNULL(executor_); GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine"); GE_CHK_STATUS_RET(InitInputTensors(), "Failed to init input tensors"); @@ -121,9 +129,9 @@ Status HybridModelAsyncExecutor::RunInternal() { RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] Start", iterator_count_); ret = PreRun(current_data); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - ret != SUCCESS, (void)HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); - CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); - continue, "PreRun failed."); // [No need to check value] + ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); + CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); + continue, "PreRun failed."); // [No need to check value] ret = executor_->Execute(args); ret = HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); @@ -142,7 +150,9 @@ Status HybridModelAsyncExecutor::RunInternal() { return SUCCESS; } -Status HybridModelAsyncExecutor::HandleResult(Status exec_ret, uint32_t data_id, HybridModelExecutor::ExecuteArgs &args, +Status HybridModelAsyncExecutor::HandleResult(Status exec_ret, + uint32_t data_id, + HybridModelExecutor::ExecuteArgs &args, OutputData *output_data) { GELOGD("Start to handle result. model id = %u, data index = %u, execution ret = %u", model_id_, data_id, exec_ret); std::vector output_tensor_info_list; @@ -174,8 +184,11 @@ Status HybridModelAsyncExecutor::SyncVarData() { if (global_step_var != nullptr) { std::vector v_step; v_step.push_back(iterator_count_); - GE_CHK_RT_RET(rtMemcpy(global_step_var->MutableData(), global_step_var->GetSize(), v_step.data(), - v_step.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(global_step_var->MutableData(), + global_step_var->GetSize(), + v_step.data(), + v_step.size() * sizeof(uint64_t), + RT_MEMCPY_HOST_TO_DEVICE)); } else { GELOGD("No GLOBAL_STEP variable was found."); } @@ -191,21 +204,26 @@ Status HybridModelAsyncExecutor::CopyInputData(const InputData ¤t_data) { auto data_size = input_tensor.GetSize(); GELOGD("To copy input data for input[%u]", input_index); if (input_index >= blobs.size()) { - GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), - model_->input_nodes_.size(), input_index, data_size); + GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", + blobs.size(), model_->input_nodes_.size(), input_index, data_size); return FAILED; } const DataBuffer &data_buf = blobs[input_index]; auto mem_size = static_cast(data_size); - GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length, PARAM_INVALID, - "input data size(%u) does not match model required size(%u), ret failed.", data_buf.length, + GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length, + PARAM_INVALID, + "input data size(%u) does not match model required size(%u), ret failed.", + data_buf.length, mem_size); GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%u]", model_->root_runtime_param_.graph_id, input_index, input_tensor.GetData(), mem_size, data_buf.length); - GE_CHK_RT_RET( - rtMemcpy(input_tensor.MutableData(), mem_size, data_buf.data, data_buf.length, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(input_tensor.MutableData(), + mem_size, + data_buf.data, + data_buf.length, + RT_MEMCPY_HOST_TO_DEVICE)); } return SUCCESS; @@ -220,7 +238,8 @@ Status HybridModelAsyncExecutor::InitInputTensors() { auto output_desc = input_node->op_desc->GetOutputDescPtr(kDataOutputIndex); GE_CHECK_NOTNULL(output_desc); int64_t tensor_size = 0; - GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size), "Failed to get size from %s", + GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size), + "Failed to get size from %s", input_node->NodeName().c_str()); if (tensor_size == 0) { GELOGW("[%s] Tensor size == 0", input_node->NodeName().c_str()); @@ -243,20 +262,24 @@ Status HybridModelAsyncExecutor::OnComputeDone(uint32_t data_index, uint32_t res std::vector &outputs) { GELOGD("OnComputeDone. model id = %u, data index = %u, execution ret = %u", model_id_, data_index, result_code); if (listener_ != nullptr) { - GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_index, result_code, outputs), "OnComputeDone failed"); + GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_index, result_code, outputs), + "OnComputeDone failed"); } return result_code; } -Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &args, OutputData *output_data, +Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &args, + OutputData *output_data, std::vector &outputs) { // copy output data from op to designated position std::vector &output_tensor_desc_list = args.output_desc; std::vector &output_tensors = args.outputs; if (output_tensor_desc_list.size() != output_tensors.size()) { - GELOGE(INTERNAL_ERROR, "Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu", - output_tensor_desc_list.size(), output_tensors.size()); + GELOGE(INTERNAL_ERROR, + "Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu", + output_tensor_desc_list.size(), + output_tensors.size()); return INTERNAL_ERROR; } @@ -267,23 +290,29 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a auto &tensor_desc = output_tensor_desc_list.at(i); GE_CHECK_NOTNULL(tensor_desc); int64_t output_size = -1; - GE_CHK_GRAPH_STATUS_RET(TensorUtils::CalcTensorMemSize(tensor_desc->GetShape(), tensor_desc->GetFormat(), - tensor_desc->GetDataType(), output_size), - "Failed to calc tensor size for output[%zu]. shape = [%s], type = %s, format = %s", i, + GE_CHK_GRAPH_STATUS_RET(TensorUtils::CalcTensorMemSize(tensor_desc->GetShape(), + tensor_desc->GetFormat(), + tensor_desc->GetDataType(), + output_size), + "Failed to calc tensor size for output[%zu]. shape = [%s], type = %s, format = %s", + i, tensor_desc->GetShape().ToString().c_str(), TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(), TypeUtils::FormatToSerialString(tensor_desc->GetFormat()).c_str()); - GELOGD("Got tensor size for output[%zu] successfully. shape = [%s], type = %s, format = %s, size = %ld", i, + GELOGD("Got tensor size for output[%zu] successfully. shape = [%s], type = %s, format = %s, size = %ld", + i, tensor_desc->GetShape().ToString().c_str(), TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(), - TypeUtils::FormatToSerialString(tensor_desc->GetFormat()).c_str(), output_size); + TypeUtils::FormatToSerialString(tensor_desc->GetFormat()).c_str(), + output_size); GE_CHECK_GE(output_size, 0); GE_CHECK_LE(output_size, UINT32_MAX); if (output_tensor.GetSize() < static_cast(output_size)) { - GELOGE(INTERNAL_ERROR, "output[%zu] tensor size(%zu) is not enough for output shape [%s]", i, - output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str()); + GELOGE(INTERNAL_ERROR, + "output[%zu] tensor size(%zu) is not enough for output shape [%s]", + i, output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str()); return INTERNAL_ERROR; } @@ -292,10 +321,13 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a output.dims = tensor_desc->GetShape().GetDims(); output.length = output_size; if (output_size > 0) { - std::unique_ptr data_buf(new (std::nothrow) uint8_t[output_size]); + std::unique_ptr data_buf(new(std::nothrow) uint8_t[output_size]); GE_CHECK_NOTNULL(data_buf); - GE_CHK_RT_RET( - rtMemcpy(data_buf.get(), output_size, output_tensor.GetData(), output_size, RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(data_buf.get(), + output_size, + output_tensor.GetData(), + output_size, + RT_MEMCPY_DEVICE_TO_HOST)); output.data = std::move(data_buf); output_data->blobs.emplace_back(data_buf.get(), static_cast(output_size), false); } else { @@ -305,9 +337,11 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a } outputs.emplace_back(std::move(output)); - GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld", i, + GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld", + i, TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(), - tensor_desc->GetShape().ToString().c_str(), output_size); + tensor_desc->GetShape().ToString().c_str(), + output_size); } return SUCCESS; @@ -351,7 +385,9 @@ Status HybridModelAsyncExecutor::Execute(const vector &inputs, vector< } ge_tensor.MutableTensorDesc() = *args.output_desc[out_index]; - GELOGD("Set output[%d], tensor size = %ld, shape = [%s]", out_index, out_tensor_info.length, + GELOGD("Set output[%d], tensor size = %ld, shape = [%s]", + out_index, + out_tensor_info.length, ge_tensor.MutableTensorDesc().MutableShape().ToString().c_str()); ++out_index; } diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 195f79a9..8de2beb6 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -54,10 +54,13 @@ class HybridModelAsyncExecutor { Status SyncVarData(); - Status HandleResult(Status exec_ret, uint32_t data_id, HybridModelExecutor::ExecuteArgs &args, + Status HandleResult(Status exec_ret, + uint32_t data_id, + HybridModelExecutor::ExecuteArgs &args, OutputData *output_data); - Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, OutputData *output_data, + Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, + OutputData *output_data, std::vector &outputs); Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector &outputs); @@ -82,4 +85,4 @@ class HybridModelAsyncExecutor { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_MODEL_HYBRID_MODEL_ASYNC_EXECUTOR_H_ +#endif // GE_HYBRID_EXECUTOR_MODEL_HYBRID_MODEL_ASYNC_EXECUTOR_H_ diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc old mode 100644 new mode 100755 index 718801b4..4af34451 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -23,13 +23,14 @@ namespace hybrid { namespace { const int kIntBase = 10; const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; -} // namespace +} // namespace HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream) - : model_(model), device_id_(device_id), stream_(stream) {} + : model_(model), device_id_(device_id), stream_(stream) { +} HybridModelExecutor::~HybridModelExecutor() { if (context_.rt_gen_context != nullptr) { - (void)rtCtxDestroy(context_.rt_gen_context); + (void) rtCtxDestroy(context_.rt_gen_context); } } @@ -61,7 +62,8 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { return SUCCESS; } -Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, HybridModelExecutor::ExecuteArgs &args) { +Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, + HybridModelExecutor::ExecuteArgs &args) { RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start"); GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); @@ -96,15 +98,15 @@ Status HybridModelExecutor::InitExecutionContext() { GELOGD("session id from model = %lu, from context = %lu", model_->GetSessionId(), context_.session_id); context_.allocator = NpuMemoryAllocator::GetAllocator(device_id_); GE_CHECK_NOTNULL(context_.allocator); - context_.callback_manager = std::unique_ptr(new (std::nothrow) CallbackManager(stream_)); + context_.callback_manager = std::unique_ptr(new(std::nothrow)CallbackManager(stream_)); GE_CHECK_NOTNULL(context_.callback_manager); context_.dump_properties = PropertiesManager::Instance().GetDumpProperties(context_.session_id); const char *profiling_level = std::getenv(kEnvProfilingLevel); if (profiling_level != nullptr) { context_.profiling_level = std::strtol(profiling_level, nullptr, kIntBase); - GELOGD("Got profiling level = %d", context_.profiling_level); + GELOGD("Got profiling level = %ld", context_.profiling_level); if (context_.profiling_level > 0) { - context_.profiler.reset(new (std::nothrow) HybridProfiler()); + context_.profiler.reset(new(std::nothrow)HybridProfiler()); GE_CHECK_NOTNULL(context_.profiler); } } diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h index 2d1320a2..04aef6a5 100644 --- a/ge/hybrid/executor/hybrid_model_executor.h +++ b/ge/hybrid/executor/hybrid_model_executor.h @@ -39,7 +39,9 @@ class HybridModelExecutor { Status Init(); - const GraphExecutionContext *GetContext() const { return &context_; } + const GraphExecutionContext* GetContext() const { + return &context_; + } Status Execute(ExecuteArgs &args); @@ -56,4 +58,4 @@ class HybridModelExecutor { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ +#endif // GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ diff --git a/ge/hybrid/executor/hybrid_profiler.cc b/ge/hybrid/executor/hybrid_profiler.cc index 0150934e..7228197f 100644 --- a/ge/hybrid/executor/hybrid_profiler.cc +++ b/ge/hybrid/executor/hybrid_profiler.cc @@ -28,9 +28,11 @@ const int kMaxEvents = 10000; const int kEventDescMax = 256; const int kMaxEventTypes = 8; const int kIndent = 8; -} // namespace +} -HybridProfiler::HybridProfiler() : counter_(0) { Reset(); } +HybridProfiler::HybridProfiler(): counter_(0) { + Reset(); +} void HybridProfiler::RecordEvent(EventType event_type, const char *fmt, ...) { va_list args; @@ -74,8 +76,8 @@ void HybridProfiler::Dump(std::ostream &output_stream) { auto end_dump = std::chrono::system_clock::now(); auto elapsed_dump = std::chrono::duration_cast(end_dump - start).count(); auto cost_dump = std::chrono::duration_cast(end_dump - start_dump).count(); - output_stream << std::setw(kIndent) << elapsed_dump << "\t\t" << cost_dump << "\t\t" - << "[Dump profiling]" << std::endl; + output_stream << std::setw(kIndent) << elapsed_dump << "\t\t" << cost_dump + << "\t\t" << "[Dump profiling]" << std::endl; events_.clear(); } diff --git a/ge/hybrid/executor/hybrid_profiler.h b/ge/hybrid/executor/hybrid_profiler.h index 6f6794f4..62ef9c73 100644 --- a/ge/hybrid/executor/hybrid_profiler.h +++ b/ge/hybrid/executor/hybrid_profiler.h @@ -57,4 +57,4 @@ class HybridProfiler { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_HYBRID_PROFILER_H_ +#endif // GE_HYBRID_EXECUTOR_HYBRID_PROFILER_H_ diff --git a/ge/hybrid/executor/node_done_manager.cc b/ge/hybrid/executor/node_done_manager.cc index de4ea14e..c0b0b17b 100644 --- a/ge/hybrid/executor/node_done_manager.cc +++ b/ge/hybrid/executor/node_done_manager.cc @@ -25,7 +25,8 @@ constexpr int kDefaultWaitTimeoutInSec = 60 * 10; } bool NodeDoneManager::Cond::Await() { std::unique_lock lk(cond_mu_); - if (!cv_.wait_for(lk, std::chrono::seconds(kDefaultWaitTimeoutInSec), + if (!cv_.wait_for(lk, + std::chrono::seconds(kDefaultWaitTimeoutInSec), [&]() { return is_released_ || is_cancelled_; })) { GELOGE(INTERNAL_ERROR, "Wait timed out."); return false; diff --git a/ge/hybrid/executor/node_done_manager.h b/ge/hybrid/executor/node_done_manager.h index f1fdfbec..faf12b46 100644 --- a/ge/hybrid/executor/node_done_manager.h +++ b/ge/hybrid/executor/node_done_manager.h @@ -40,7 +40,6 @@ class NodeDoneManager { void Release(); void Cancel(); bool Await(); - private: std::mutex cond_mu_; std::condition_variable cv_; @@ -56,4 +55,4 @@ class NodeDoneManager { } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_NODE_DONE_COND_MANAGER_H_ +#endif // GE_HYBRID_EXECUTOR_NODE_DONE_COND_MANAGER_H_ diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index e8e94c0d..4f1f3fe8 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -27,23 +27,31 @@ namespace { // 5s * 120, wait for 10m constexpr auto kWaitInternal = 5; constexpr auto kMaxWaitTimes = 120; -} // namespace +} ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(node_item) { this->num_pending_shapes_ = node_item.num_inputs - node_item.num_static_input_shapes; - GELOGD("[%s] ShapeInferenceState created, pending shape count = %d", node_item.NodeName().c_str(), + GELOGD("[%s] ShapeInferenceState created, pending shape count = %d", + node_item.NodeName().c_str(), this->num_pending_shapes_); } -void ShapeInferenceState::UpdateInputShape(uint32_t idx, const GeShape &ori_shape, const GeShape &shape) { +void ShapeInferenceState::UpdateInputShape(uint32_t idx, + const GeShape &ori_shape, + const GeShape &shape) { if (!node_item.is_dynamic || node_item.is_input_shape_static[idx]) { GELOGD("[%s] Trying to update static shape, idx = %u. old shape = [%s], new shape = [%s]", - node_item.NodeName().c_str(), idx, node_item.op_desc->MutableInputDesc(idx)->GetShape().ToString().c_str(), + node_item.NodeName().c_str(), + idx, + node_item.op_desc->MutableInputDesc(idx)->GetShape().ToString().c_str(), shape.ToString().c_str()); return; } - GELOGD("[%s] Update input shape [%u] with Shape: [%s] and OriginalShape: [%s]", node_item.NodeName().c_str(), idx, - shape.ToString().c_str(), ori_shape.ToString().c_str()); + GELOGD("[%s] Update input shape [%u] with Shape: [%s] and OriginalShape: [%s]", + node_item.NodeName().c_str(), + idx, + shape.ToString().c_str(), + ori_shape.ToString().c_str()); std::lock_guard lk(mu_); node_item.op_desc->MutableInputDesc(idx)->SetShape(shape); @@ -101,12 +109,17 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex GeShape shape; GeShape ori_shape; RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); - GE_CHK_STATUS_RET(future.Get(ori_shape, shape), "[%s] Get shape failed. index = %u", node_item.NodeName().c_str(), + GE_CHK_STATUS_RET(future.Get(ori_shape, shape), + "[%s] Get shape failed. index = %u", + node_item.NodeName().c_str(), idx); RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); - GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]", node_item.NodeName().c_str(), idx, - shape.ToString().c_str(), ori_shape.ToString().c_str()); + GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]", + node_item.NodeName().c_str(), + idx, + shape.ToString().c_str(), + ori_shape.ToString().c_str()); node_item.op_desc->MutableInputDesc(idx)->SetShape(std::move(shape)); node_item.op_desc->MutableInputDesc(idx)->SetOriginShape(ori_shape); } @@ -114,8 +127,11 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex return SUCCESS; } -ShapeFuture::ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context) - : src_node_(std::move(src_node)), src_index_(src_index), subgraph_context_(subgraph_context) {} +ShapeFuture::ShapeFuture(NodePtr src_node, + uint32_t src_index, + SubgraphContext *subgraph_context) + : src_node_(std::move(src_node)), src_index_(src_index), subgraph_context_(subgraph_context) { +} NodeState::NodeState(const NodeItem &node_item, SubgraphContext *subgraph_context) : node_item_(&node_item), shape_inference_state_(node_item), subgraph_context_(subgraph_context) { @@ -124,16 +140,21 @@ NodeState::NodeState(const NodeItem &node_item, SubgraphContext *subgraph_contex Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const { for (auto &src_node : node_item_->dependents_for_execution) { - GELOGI("[%s] Start to wait for data dependent node: [%s]", node_item_->NodeName().c_str(), + GELOGI("[%s] Start to wait for data dependent node: [%s]", + node_item_->NodeName().c_str(), src_node->GetName().c_str()); - RECORD_EXECUTION_EVENT(&context, node_item_->NodeName().c_str(), "[AwaitNodeDone] [%s] Start", + RECORD_EXECUTION_EVENT(&context, + node_item_->NodeName().c_str(), + "[AwaitNodeDone] [%s] Start", src_node->GetName().c_str()); if (!subgraph_context_->Await(src_node)) { GELOGE(INTERNAL_ERROR, "[%s] Await node [%s] failed.", GetName().c_str(), src_node->GetName().c_str()); return INTERNAL_ERROR; } - RECORD_EXECUTION_EVENT(&context, node_item_->NodeName().c_str(), "[AwaitNodeDone] [%s] End", + RECORD_EXECUTION_EVENT(&context, + node_item_->NodeName().c_str(), + "[AwaitNodeDone] [%s] End", src_node->GetName().c_str()); GELOGI("[%s] Done waiting node.", src_node->GetName().c_str()); } @@ -144,7 +165,8 @@ Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const { Status NodeState::WaitForPrepareDone() { if (prepare_future_.valid()) { GELOGD("[%s] Start to wait for prepare future.", GetName().c_str()); - GE_CHK_STATUS_RET(prepare_future_.get(), "[%s] PreRun failed.", GetName().c_str()); + GE_CHK_STATUS_RET(prepare_future_.get(), + "[%s] PreRun failed.", GetName().c_str()); } return SUCCESS; diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 73e0f75c..6ca714bb 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -66,23 +66,39 @@ struct NodeState { NodeState(const NodeItem &node_item, SubgraphContext *subgraph_context); ~NodeState() = default; - OpDesc *GetOpDesc() const { return op_desc_.get(); } + OpDesc *GetOpDesc() const { + return op_desc_.get(); + } - inline const NodeItem *GetNodeItem() const { return node_item_; } + inline const NodeItem *GetNodeItem() const { + return node_item_; + } - inline const string &GetName() const { return node_item_->NodeName(); } + inline const string &GetName() const { + return node_item_->NodeName(); + } - inline const string &GetType() const { return node_item_->NodeType(); } + inline const string &GetType() const { + return node_item_->NodeType(); + } - ShapeInferenceState &GetShapeInferenceState() { return shape_inference_state_; } + ShapeInferenceState &GetShapeInferenceState() { + return shape_inference_state_; + } - const shared_ptr &GetKernelTask() const { return kernel_task_; } + const shared_ptr &GetKernelTask() const { + return kernel_task_; + } - void SetKernelTask(const shared_ptr &kernel_task) { kernel_task_ = kernel_task; } + void SetKernelTask(const shared_ptr &kernel_task) { + kernel_task_ = kernel_task; + } Status WaitForPrepareDone(); - void SetPrepareFuture(std::future &&prepare_future) { this->prepare_future_ = std::move(prepare_future); } + void SetPrepareFuture(std::future &&prepare_future) { + this->prepare_future_ = std::move(prepare_future); + } Status AwaitInputTensors(GraphExecutionContext &context) const; @@ -100,4 +116,4 @@ using NodeStatePtr = std::shared_ptr; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_NODE_STATE_H_ +#endif // GE_HYBRID_EXECUTOR_NODE_STATE_H_ diff --git a/ge/hybrid/executor/rt_callback_manager.cc b/ge/hybrid/executor/rt_callback_manager.cc index c1c98f73..63eb46d5 100644 --- a/ge/hybrid/executor/rt_callback_manager.cc +++ b/ge/hybrid/executor/rt_callback_manager.cc @@ -21,16 +21,24 @@ namespace ge { namespace hybrid { -CallbackManager::CallbackManager(rtStream_t stream) : stream_(stream) {} +CallbackManager::CallbackManager(rtStream_t stream) : stream_(stream) { +} Status CallbackManager::RegisterCallback(rtCallback_t callback, void *user_data) { GELOGD("To register callback"); rtEvent_t event = nullptr; GE_CHK_RT_RET(rtEventCreate(&event)); - GE_CHK_RT_RET(rtEventRecord(event, stream_)); + auto rt_ret = rtEventRecord(event, stream_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Failed to invoke rtEventRecord, error code = %d", rt_ret); + (void) rtEventDestroy(event); + return RT_FAILED; + } + auto cb = std::pair(callback, user_data); auto entry = std::pair>(event, std::move(cb)); if (!callback_queue_.Push(entry)) { + (void) rtEventDestroy(event); return INTERNAL_ERROR; } @@ -41,7 +49,9 @@ Status CallbackManager::RegisterCallback(rtCallback_t callback, void *user_data) Status CallbackManager::Init() { rtContext_t ctx = nullptr; GE_CHK_RT_RET(rtCtxGetCurrent(&ctx)); - ret_future_ = std::async([&](rtContext_t context) -> Status { return CallbackProcess(context); }, ctx); + ret_future_ = std::async([&](rtContext_t context) ->Status { + return CallbackProcess(context); + }, ctx); if (!ret_future_.valid()) { GELOGE(INTERNAL_ERROR, "Failed to init callback manager."); return INTERNAL_ERROR; @@ -103,7 +113,7 @@ void CallbackManager::RtCallbackFunc(void *data) { } Status CallbackManager::RegisterCallback(const std::function &callback) { - auto func = std::unique_ptr>(new (std::nothrow) std::function(callback)); + auto func = std::unique_ptr>(new(std::nothrow) std::function(callback)); GE_CHECK_NOTNULL(func); GELOGD("Callback registered"); return RegisterCallback(RtCallbackFunc, func.release()); diff --git a/ge/hybrid/executor/rt_callback_manager.h b/ge/hybrid/executor/rt_callback_manager.h index f102d660..1d1fa1cc 100644 --- a/ge/hybrid/executor/rt_callback_manager.h +++ b/ge/hybrid/executor/rt_callback_manager.h @@ -52,4 +52,4 @@ class CallbackManager { } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_RT_CALLBACK_MANAGER_H_ +#endif // GE_HYBRID_EXECUTOR_RT_CALLBACK_MANAGER_H_ diff --git a/ge/hybrid/executor/subgraph_context.cc b/ge/hybrid/executor/subgraph_context.cc index 5d94efa2..923c2aa3 100644 --- a/ge/hybrid/executor/subgraph_context.cc +++ b/ge/hybrid/executor/subgraph_context.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,12 +20,16 @@ namespace ge { namespace hybrid { -SubgraphContext::SubgraphContext(const GraphItem *graph_item) : graph_item_(graph_item) {} +SubgraphContext::SubgraphContext(const GraphItem *graph_item) : graph_item_(graph_item) { + +} Status SubgraphContext::Init() { GE_CHECK_NOTNULL(graph_item_); - GELOGD("[%s] Start to init subgraph context. total inputs = %d, total outputs = %d", graph_item_->GetName().c_str(), - graph_item_->TotalInputs(), graph_item_->TotalOutputs()); + GELOGD("[%s] Start to init subgraph context. total inputs = %d, total outputs = %d", + graph_item_->GetName().c_str(), + graph_item_->TotalInputs(), + graph_item_->TotalOutputs()); all_inputs_.resize(static_cast(graph_item_->TotalInputs())); all_outputs_.resize(static_cast(graph_item_->TotalOutputs())); @@ -36,7 +40,7 @@ NodeStatePtr SubgraphContext::GetOrCreateNodeState(const NodeItem *node_item) { std::lock_guard lk(mu_); auto &node_state = node_states_[node_item]; if (node_state == nullptr) { - node_state.reset(new (std::nothrow) NodeState(*node_item, this)); + node_state.reset(new(std::nothrow)NodeState(*node_item, this)); } return node_state; @@ -44,7 +48,9 @@ NodeStatePtr SubgraphContext::GetOrCreateNodeState(const NodeItem *node_item) { Status SubgraphContext::SetInput(int index, const TensorValue &tensor) { if (static_cast(index) >= all_inputs_.size()) { - GELOGE(INTERNAL_ERROR, "output index output range. all input num = %zu, input index = %d", all_inputs_.size(), + GELOGE(INTERNAL_ERROR, + "output index output range. all input num = %zu, input index = %d", + all_inputs_.size(), index); return INTERNAL_ERROR; } @@ -60,8 +66,11 @@ Status SubgraphContext::SetInput(const NodeItem &node_item, int input_index, con Status SubgraphContext::SetOutput(const NodeItem &node_item, int output_index, const TensorValue &tensor) { auto index = node_item.output_start + output_index; if ((output_index >= node_item.num_outputs) || (static_cast(index) >= all_outputs_.size())) { - GELOGE(INTERNAL_ERROR, "output index output range. all output num = %zu, node_item = %s, output index = %d", - all_outputs_.size(), node_item.DebugString().c_str(), output_index); + GELOGE(INTERNAL_ERROR, + "output index output range. all output num = %zu, node_item = %s, output index = %d", + all_outputs_.size(), + node_item.DebugString().c_str(), + output_index); return INTERNAL_ERROR; } @@ -84,8 +93,10 @@ Status SubgraphContext::GetOutputs(std::vector &outputs) { for (int i = 0; i < output_node->num_inputs; ++i) { TensorValue tensor; GE_CHK_STATUS_RET_NOLOG(GetInput(output_node->input_start + i, tensor)); - GELOGD("[%s] Adding output tensor by input index [%d], tensor = %s", graph_item_->GetName().c_str(), - output_node->input_start + i, tensor.DebugString().c_str()); + GELOGD("[%s] Adding output tensor by input index [%d], tensor = %s", + graph_item_->GetName().c_str(), + output_node->input_start + i, + tensor.DebugString().c_str()); outputs.emplace_back(std::move(tensor)); } } @@ -100,13 +111,17 @@ Status SubgraphContext::GetOutputs(std::vector &outputs) { return SUCCESS; } -bool SubgraphContext::Await(const NodePtr &node) { return node_done_manager_.Await(node); } +bool SubgraphContext::Await(const NodePtr &node) { + return node_done_manager_.Await(node); +} void SubgraphContext::OnError(Status error) { GELOGE(error, "[%s] Error occurred while executing graph.", graph_item_->GetName().c_str()); node_done_manager_.Destroy(); } -void SubgraphContext::NodeDone(const NodePtr &node) { node_done_manager_.NodeDone(node); } +void SubgraphContext::NodeDone(const NodePtr &node) { + node_done_manager_.NodeDone(node); +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/subgraph_context.h b/ge/hybrid/executor/subgraph_context.h index fd934d80..b86765f7 100644 --- a/ge/hybrid/executor/subgraph_context.h +++ b/ge/hybrid/executor/subgraph_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,4 +58,4 @@ class SubgraphContext { } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_ITERATION_CONTEXT_H_ +#endif // GE_HYBRID_EXECUTOR_ITERATION_CONTEXT_H_ diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index c76bb209..ee5775f5 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -24,27 +24,31 @@ namespace hybrid { namespace { constexpr int kDefaultThreadNum = 4; constexpr int kDataInputIndex = 0; -} // namespace +} SubgraphExecutor::SubgraphExecutor(const GraphItem *graph_item, GraphExecutionContext *context, bool force_infer_shape) : graph_item_(graph_item), context_(context), force_infer_shape_(force_infer_shape), - pre_run_pool_(kDefaultThreadNum) {} + pre_run_pool_(kDefaultThreadNum) { +} -SubgraphExecutor::~SubgraphExecutor() { GELOGD("[%s] SubgraphExecutor destroyed.", graph_item_->GetName().c_str()); } +SubgraphExecutor::~SubgraphExecutor() { + GELOGD("[%s] SubgraphExecutor destroyed.", graph_item_->GetName().c_str()); +} Status SubgraphExecutor::Init(const std::vector &inputs, const std::vector &input_desc) { - subgraph_context_.reset(new (std::nothrow) SubgraphContext(graph_item_)); + subgraph_context_.reset(new(std::nothrow)SubgraphContext(graph_item_)); GE_CHECK_NOTNULL(subgraph_context_); GE_CHK_STATUS_RET(subgraph_context_->Init(), "[%s] Failed to init subgraph context.", graph_item_->GetName().c_str()); - shape_inference_engine_.reset(new (std::nothrow) ShapeInferenceEngine(context_, subgraph_context_.get())); + shape_inference_engine_.reset(new(std::nothrow) ShapeInferenceEngine(context_, subgraph_context_.get())); GE_CHECK_NOTNULL(shape_inference_engine_); if (graph_item_->IsDynamic()) { - GE_CHK_STATUS_RET(InitInputsForUnknownShape(inputs, input_desc), "[%s] Failed to set inputs.", + GE_CHK_STATUS_RET(InitInputsForUnknownShape(inputs, input_desc), + "[%s] Failed to set inputs.", graph_item_->GetName().c_str()); } else { GE_CHK_STATUS_RET(InitInputsForKnownShape(inputs), @@ -73,11 +77,16 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vectorGetName().c_str(), i, - input_node->input_start, input_tensor.DebugString().c_str()); + GELOGD("[%s] Set input tensor[%zu] to inputs with index = %d, tensor = %s", + graph_item_->GetName().c_str(), + i, + input_node->input_start, + input_tensor.DebugString().c_str()); GE_CHK_STATUS_RET(subgraph_context_->SetInput(*input_node, kDataInputIndex, input_tensor), - "[%s] Failed to set input tensor[%zu]", graph_item_->GetName().c_str(), i); + "[%s] Failed to set input tensor[%zu]", + graph_item_->GetName().c_str(), + i); if (force_infer_shape_ || input_node->is_dynamic) { GELOGD("[%s] Start to update input[%zu] for subgraph data node.", graph_item_->GetName().c_str(), i); @@ -100,15 +109,20 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector if (static_cast(parent_input_index) >= inputs.size()) { GELOGE(INTERNAL_ERROR, "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs", - graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1); + graph_item_->GetName().c_str(), + inputs.size(), + parent_input_index + 1); return INTERNAL_ERROR; } auto &input_tensor = inputs[parent_input_index]; subgraph_context_->SetInput(static_cast(i), input_tensor); - GELOGD("[%s] Set input tensor[%zu] with inputs with index = %d, tensor = %s", graph_item_->GetName().c_str(), i, - parent_input_index, input_tensor.DebugString().c_str()); + GELOGD("[%s] Set input tensor[%zu] with inputs with index = %d, tensor = %s", + graph_item_->GetName().c_str(), + i, + parent_input_index, + input_tensor.DebugString().c_str()); } return SUCCESS; @@ -131,7 +145,9 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector &inputs, Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector &inputs) { GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str()); if (graph_item_->GetAllNodes().size() != 1) { - GELOGE(INTERNAL_ERROR, "[%s] Invalid known shape subgraph. node size = %zu", graph_item_->GetName().c_str(), + GELOGE(INTERNAL_ERROR, + "[%s] Invalid known shape subgraph. node size = %zu", + graph_item_->GetName().c_str(), graph_item_->GetAllNodes().size()); return INTERNAL_ERROR; } @@ -146,7 +162,8 @@ Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vectorGetName().c_str(), + "[%s] Failed to execute node [%s] for known subgraph.", + graph_item_->GetName().c_str(), known_shape_task_context_->GetNodeName()); GELOGD("[%s] Done execute non-dynamic subgraph successfully.", graph_item_->GetName().c_str()); @@ -163,16 +180,19 @@ Status SubgraphExecutor::ExecuteAsync(TaskContext &task_context) { input_desc.emplace_back(task_context.GetInputDesc(i)); } - GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc), "[%s] Failed to execute subgraph.", + GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc), + "[%s] Failed to execute subgraph.", graph_item_->GetName().c_str()); - GE_CHK_STATUS_RET(SetOutputsToParentNode(task_context), "[%s] Failed to set output shapes to parent node.", + GE_CHK_STATUS_RET(SetOutputsToParentNode(task_context), + "[%s] Failed to set output shapes to parent node.", graph_item_->GetName().c_str()); return SUCCESS; } Status SubgraphExecutor::PrepareNodes() { - GELOGD("[%s] Start to prepare nodes. force infer shape = %s.", graph_item_->GetName().c_str(), + GELOGD("[%s] Start to prepare nodes. force infer shape = %s.", + graph_item_->GetName().c_str(), force_infer_shape_ ? "true" : "false"); auto &all_nodes = graph_item_->GetAllNodes(); for (auto all_node : all_nodes) { @@ -209,8 +229,8 @@ Status SubgraphExecutor::PrepareNodes() { GELOGD("[%s] Skipping shape inference and compilation for node with static shape.", node_item.NodeName().c_str()); if (node_item.kernel_task == nullptr) { GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str()); - GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_), "[%s] Failed to create task.", - p_node_state->GetName().c_str()); + GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_), + "[%s] Failed to create task.", p_node_state->GetName().c_str()); } else { node_state->SetKernelTask(node_item.kernel_task); } @@ -230,18 +250,18 @@ Status SubgraphExecutor::PrepareNodes() { Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) { const auto &node_item = *node_state.GetNodeItem(); - GE_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), "[%s] Failed to InferShape.", - node_state.GetName().c_str()); - GE_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_item), "[%s] Failed to PropagateOutputShapes.", - node_state.GetName().c_str()); + GE_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), + "[%s] Failed to InferShape.", node_state.GetName().c_str()); + GE_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_item), + "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); return SUCCESS; } Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state) { auto &node_item = *node_state.GetNodeItem(); if (node_item.kernel_task == nullptr) { - GE_CHK_STATUS_RET(TaskCompileEngine::Compile(node_state, ctx), "Failed to create task for node[%s]", - node_state.GetName().c_str()); + GE_CHK_STATUS_RET(TaskCompileEngine::Compile(node_state, ctx), + "Failed to create task for node[%s]", node_state.GetName().c_str()); } else { node_state.SetKernelTask(node_item.kernel_task); } @@ -276,7 +296,8 @@ Status SubgraphExecutor::LaunchTasks() { task_context->SetForceInferShape(force_infer_shape_); auto shared_task_context = std::shared_ptr(task_context.release()); GE_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_), - "[%s] Execute node failed.", node_state->GetName().c_str()); + "[%s] Execute node failed.", + node_state->GetName().c_str()); GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str()); } @@ -301,22 +322,29 @@ Status SubgraphExecutor::ScheduleTasks() { return ret; } - GE_CHK_STATUS_RET(prepare_future.get(), "[%s] Error occurred in task preparation.", graph_item_->GetName().c_str()); + GE_CHK_STATUS_RET(prepare_future.get(), + "[%s] Error occurred in task preparation.", + graph_item_->GetName().c_str()); GELOGD("[%s] Done launching all tasks successfully.", graph_item_->GetName().c_str()); return SUCCESS; } -Status SubgraphExecutor::GetOutputs(vector &outputs) { return subgraph_context_->GetOutputs(outputs); } +Status SubgraphExecutor::GetOutputs(vector &outputs) { + return subgraph_context_->GetOutputs(outputs); +} Status SubgraphExecutor::GetOutputs(vector &outputs, std::vector &output_desc) { GE_CHK_STATUS_RET(GetOutputs(outputs), "[%s] Failed to get output tensors.", graph_item_->GetName().c_str()); // copy output data from op to designated position - GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc), "[%s] Failed to get output tensor desc.", + GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc), + "[%s] Failed to get output tensor desc.", graph_item_->GetName().c_str()); if (outputs.size() != output_desc.size()) { - GELOGE(INTERNAL_ERROR, "Number of output tensors(%zu) mismatch number of output tensor desc(%zu).", outputs.size(), + GELOGE(INTERNAL_ERROR, + "Number of output tensors(%zu) mismatch number of output tensor desc(%zu).", + outputs.size(), output_desc.size()); return INTERNAL_ERROR; } @@ -334,14 +362,18 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { // get output tensors and tensor desc list std::vector outputs; std::vector output_desc_list; - GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs), "[%s] Failed to get output tensors.", + GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs), + "[%s] Failed to get output tensors.", graph_item_->GetName().c_str()); - GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc_list), "[%s] Failed to get output tensor desc.", + GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc_list), + "[%s] Failed to get output tensor desc.", graph_item_->GetName().c_str()); if (outputs.size() != output_desc_list.size()) { GELOGE(INTERNAL_ERROR, "[%s] num output tensors = %zu, num output tensor desc = %zu", - graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size()); + graph_item_->GetName().c_str(), + outputs.size(), + output_desc_list.size()); return INTERNAL_ERROR; } @@ -350,9 +382,14 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { int parent_output_index = graph_item_->GetParentOutputIndex(i); GE_CHECK_GE(parent_output_index, 0); // update tensor - GELOGD("[%s] Updating output[%zu] to parent output[%d]", graph_item_->GetName().c_str(), i, parent_output_index); - - GELOGD("[%s] Updating output tensor, index = %d, tensor = %s", graph_item_->GetName().c_str(), parent_output_index, + GELOGD("[%s] Updating output[%zu] to parent output[%d]", + graph_item_->GetName().c_str(), + i, + parent_output_index); + + GELOGD("[%s] Updating output tensor, index = %d, tensor = %s", + graph_item_->GetName().c_str(), + parent_output_index, outputs[i].DebugString().c_str()); GE_CHK_STATUS_RET(task_context.SetOutput(parent_output_index, outputs[i])); @@ -362,12 +399,17 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { const auto &output_desc = output_desc_list[i]; auto parent_output_desc = task_context.MutableOutputDesc(parent_output_index); GE_CHECK_NOTNULL(parent_output_desc); - GELOGD("[%s] Updating output shape[%d] from [%s] to [%s]", graph_item_->GetName().c_str(), parent_output_index, - parent_output_desc->MutableShape().ToString().c_str(), output_desc->GetShape().ToString().c_str()); + GELOGD("[%s] Updating output shape[%d] from [%s] to [%s]", + graph_item_->GetName().c_str(), + parent_output_index, + parent_output_desc->MutableShape().ToString().c_str(), + output_desc->GetShape().ToString().c_str()); parent_output_desc->SetShape(output_desc->GetShape()); - GELOGD("[%s] Updating output original shape[%d] from [%s] to [%s]", graph_item_->GetName().c_str(), - parent_output_index, parent_output_desc->GetOriginShape().ToString().c_str(), + GELOGD("[%s] Updating output original shape[%d] from [%s] to [%s]", + graph_item_->GetName().c_str(), + parent_output_index, + parent_output_desc->GetOriginShape().ToString().c_str(), output_desc->GetOriginShape().ToString().c_str()); parent_output_desc->SetOriginShape(output_desc->GetOriginShape()); } diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h index 7cdb2070..d1949947 100644 --- a/ge/hybrid/executor/subgraph_executor.h +++ b/ge/hybrid/executor/subgraph_executor.h @@ -77,7 +77,8 @@ class SubgraphExecutor { private: static Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); - Status Init(const std::vector &inputs, const std::vector &input_desc); + Status Init(const std::vector &inputs, + const std::vector &input_desc); Status InitInputsForUnknownShape(const std::vector &inputs, const std::vector &input_desc); Status InitInputsForKnownShape(const std::vector &inputs); @@ -98,4 +99,4 @@ class SubgraphExecutor { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_EXECUTOR_SUBGRAPH_EXECUTOR_H_ +#endif // GE_HYBRID_EXECUTOR_EXECUTOR_SUBGRAPH_EXECUTOR_H_ diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc old mode 100644 new mode 100755 index 1eb73e41..7dc65433 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -18,10 +18,14 @@ #include "graph/runtime_inference_context.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/tensor_adapter.h" +#include "graph/debug/ge_attr_define.h" #include "hybrid/node_executor/node_executor.h" #include "common/dump/dump_manager.h" #include "common/dump/dump_op.h" #include "common/types.h" +#include "common/ge_types.h" +#include "common/profiling/profiling_manager.h" +#include "runtime/base.h" namespace ge { namespace hybrid { @@ -34,8 +38,11 @@ Status LogInputs(const NodeItem &node_item, const TaskContext &task_context) { GE_CHECK_NOTNULL(input_tensor); const auto &tensor_desc = node_item.op_desc->MutableInputDesc(i); GE_CHECK_NOTNULL(tensor_desc); - GELOGD("[%s] Print task args. input[%d] = %s, shape = [%s]", node_item.NodeName().c_str(), i, - input_tensor->DebugString().c_str(), tensor_desc->MutableShape().ToString().c_str()); + GELOGD("[%s] Print task args. input[%d] = %s, shape = [%s]", + node_item.NodeName().c_str(), + i, + input_tensor->DebugString().c_str(), + tensor_desc->MutableShape().ToString().c_str()); } return SUCCESS; @@ -47,8 +54,11 @@ Status LogOutputs(const NodeItem &node_item, const TaskContext &task_context) { GE_CHECK_NOTNULL(output_tensor); const auto &tensor_desc = node_item.op_desc->MutableOutputDesc(i); GE_CHECK_NOTNULL(tensor_desc); - GELOGD("[%s] Print task args. output[%d] = %s, shape = [%s]", node_item.NodeName().c_str(), i, - output_tensor->DebugString().c_str(), tensor_desc->MutableShape().ToString().c_str()); + GELOGD("[%s] Print task args. output[%d] = %s, shape = [%s]", + node_item.NodeName().c_str(), + i, + output_tensor->DebugString().c_str(), + tensor_desc->MutableShape().ToString().c_str()); } return SUCCESS; @@ -59,21 +69,28 @@ class NodeDoneCallback { NodeDoneCallback(GraphExecutionContext *graph_context, std::shared_ptr task_context); ~NodeDoneCallback() = default; Status OnNodeDone(); - private: Status PrepareConstInputs(const NodeItem &node_item); Status DumpDynamicNode(); + Status ProfilingReport(); + Status GetGraphDescInfo(const NodePtr node, const HybridModel *model, + std::vector &compute_graph_info); + Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, + std::vector &task_desc_info); GraphExecutionContext *graph_context_; std::shared_ptr context_; DumpOp dump_op_; }; -NodeDoneCallback::NodeDoneCallback(GraphExecutionContext *graph_context, std::shared_ptr task_context) - : graph_context_(graph_context), context_(std::move(task_context)) {} +NodeDoneCallback::NodeDoneCallback(GraphExecutionContext *graph_context, + std::shared_ptr task_context) + : graph_context_(graph_context), context_(std::move(task_context)) { +} Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) { for (auto output_idx : node_item.to_const_output_id_list) { - RECORD_CALLBACK_EVENT(graph_context_, node_item.NodeName().c_str(), "[PrepareConstInputs] [index = %d] Start", + RECORD_CALLBACK_EVENT(graph_context_, node_item.NodeName().c_str(), + "[PrepareConstInputs] [index = %d] Start", output_idx); auto output_tensor = context_->GetOutput(output_idx); @@ -89,18 +106,26 @@ Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) { "Failed to invoke GetTensorSizeInBytes"); if (output_tensor->GetSize() < static_cast(tensor_size)) { - GELOGE(INTERNAL_ERROR, "[%s] Tensor size is not enough. output index = %d, required size = %zu, tensor = %s", - node_item.NodeName().c_str(), output_idx, tensor_size, output_tensor->DebugString().c_str()); + GELOGE(INTERNAL_ERROR, + "[%s] Tensor size is not enough. output index = %d, required size = %zu, tensor = %s", + node_item.NodeName().c_str(), + output_idx, + tensor_size, + output_tensor->DebugString().c_str()); return INTERNAL_ERROR; } vector host_buffer(static_cast(tensor_size)); - GELOGD("[%s] To cache output[%d] to host, size = %zu", node_item.NodeName().c_str(), output_idx, + GELOGD("[%s] To cache output[%d] to host, size = %zu", + node_item.NodeName().c_str(), + output_idx, output_tensor->GetSize()); - GE_CHK_RT_RET( - rtMemcpy(host_buffer.data(), tensor_size, output_tensor->GetData(), tensor_size, RT_MEMCPY_DEVICE_TO_HOST)); - tensor.SetData(host_buffer); - + GE_CHK_RT_RET(rtMemcpy(host_buffer.data(), + tensor_size, + output_tensor->GetData(), + tensor_size, + RT_MEMCPY_DEVICE_TO_HOST)); + tensor.SetData(std::move(host_buffer)); string session_id = std::to_string(context_->GetSessionId()); RuntimeInferenceContext *runtime_infer_ctx = nullptr; GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(session_id, &runtime_infer_ctx), @@ -108,16 +133,134 @@ Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) { GE_CHK_STATUS_RET(runtime_infer_ctx->SetTensor(node_item.node_id, output_idx, std::move(tensor)), "Failed to SetTensor, node = %s, output_index = %d", node_item.NodeName().c_str(), output_idx); GELOGD("[%s] Output[%d] cached successfully in session: %s. node_id = %d, shape = [%s]", - node_item.NodeName().c_str(), output_idx, session_id.c_str(), node_item.node_id, + node_item.NodeName().c_str(), + output_idx, + session_id.c_str(), + node_item.node_id, ge_tensor_desc->GetShape().ToString().c_str()); - RECORD_CALLBACK_EVENT(graph_context_, node_item.NodeName().c_str(), "[PrepareConstInputs] [index = %d] End", + RECORD_CALLBACK_EVENT(graph_context_, node_item.NodeName().c_str(), + "[PrepareConstInputs] [index = %d] End", output_idx); } return SUCCESS; } +Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *model, + std::vector &task_desc_info) { + GE_CHECK_NOTNULL(node); + GE_CHECK_NOTNULL(model); + + GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); + auto op_desc = node->GetOpDesc(); + std::string op_name = op_desc->GetName(); + std::string dynamic_model_name = model->GetModelName(); + + uint32_t task_id = 0; + uint32_t stream_id = 0; + if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { + GELOGE(PARAM_INVALID, "Get task_id and stream_id failed."); + return PARAM_INVALID; + } + + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = dynamic_model_name; + tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.block_dim = 0; + auto task_defs = model->GetTaskDefs(node); + if (task_defs != nullptr && (*task_defs).size() > 0) { + const auto &task_def = (*task_defs)[0]; + tmp_task_desc_info.block_dim = task_def.kernel().block_dim(); + } + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", + node->GetName().c_str(), task_id, stream_id); + task_desc_info.emplace_back(tmp_task_desc_info); + return SUCCESS; +} + +Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model, + std::vector &compute_graph_info) { + GE_CHECK_NOTNULL(node); + GE_CHECK_NOTNULL(model); + + GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); + + std::string dynamic_model_name = model->GetModelName(); + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + GELOGE(PARAM_INVALID, "op_desc is nullptr."); + return PARAM_INVALID; + } + + auto op_mode = static_cast(domi::ImplyType::INVALID); + if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && + op_mode == static_cast(domi::ImplyType::TVM)) { + ComputeGraphDescInfo tmp_compute_graph_info; + tmp_compute_graph_info.model_name = dynamic_model_name; + tmp_compute_graph_info.op_name = op_desc->GetName(); + tmp_compute_graph_info.op_type = op_desc->GetType(); + + for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { + GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); + if (input_desc == nullptr) { + continue; + } + tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); + tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); + tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); + } + + for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { + GeTensorDesc output_desc = op_desc->GetOutputDesc(j); + tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); + tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); + tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); + } + compute_graph_info.emplace_back(tmp_compute_graph_info); + GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str()); + } + return SUCCESS; +} + +Status NodeDoneCallback::ProfilingReport() { + auto node = context_->GetNodeItem().node; + if (node == nullptr) { + GELOGE(PARAM_INVALID, "Get node is nullptr"); + return PARAM_INVALID; + } + + const auto &op_type = node->GetType(); + if (op_type == PARTITIONEDCALL) { + return SUCCESS; + } + + GE_CHECK_NOTNULL(graph_context_); + const HybridModel *model = graph_context_->model; + GE_CHECK_NOTNULL(model); + + GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str()); + std::vector task_desc_info; + TaskDescInfo tmp_task_desc_info; + auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info); + if (profiling_ret != RT_ERROR_NONE) { + GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str()); + return profiling_ret; + } + + std::vector compute_graph_info; + profiling_ret = GetGraphDescInfo(node, model, compute_graph_info); + if (profiling_ret != RT_ERROR_NONE) { + GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str()); + return profiling_ret; + } + + ProfilingManager::Instance().ReportProfilingData(task_desc_info, compute_graph_info); + return SUCCESS; +} + Status NodeDoneCallback::DumpDynamicNode() { auto node = context_->GetNodeItem().node; if (node == nullptr) { @@ -191,6 +334,11 @@ Status NodeDoneCallback::OnNodeDone() { GE_CHK_STATUS_RET(DumpDynamicNode(), "Failed to dump dynamic node"); } + if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + GE_CHK_STATUS_RET(ProfilingReport(), "Report node[%s] to profiling failed.", + node_item.NodeName().c_str()); + } + // release inputs for (int i = 0; i < context_->NumInputs(); ++i) { context_->ReleaseInput(i); @@ -200,10 +348,11 @@ Status NodeDoneCallback::OnNodeDone() { // PropagateOutputs for type == DEPEND_COMPUTE if (node_item.shape_inference_type == DEPEND_COMPUTE) { if (graph_context_->trace_enabled) { - (void)LogOutputs(node_item, *context_); + (void) LogOutputs(node_item, *context_); } - GE_CHK_STATUS_RET(context_->PropagateOutputs(), "[%s] Failed to propagate outputs failed", + GE_CHK_STATUS_RET(context_->PropagateOutputs(), + "[%s] Failed to propagate outputs failed", node_item.NodeName().c_str()); RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[PropagateOutputs] End"); @@ -219,11 +368,12 @@ Status NodeDoneCallback::OnNodeDone() { return SUCCESS; } -Status ExecutionEngine::ExecuteAsync(NodeState &node_state, const std::shared_ptr &task_context, +Status ExecutionEngine::ExecuteAsync(NodeState &node_state, + const std::shared_ptr &task_context, GraphExecutionContext &execution_context) { GELOGI("[%s] Node is ready for execution", task_context->GetNodeName()); RECORD_EXECUTION_EVENT(&execution_context, task_context->GetNodeName(), "Start"); - auto cb = std::shared_ptr(new (std::nothrow) NodeDoneCallback(&execution_context, task_context)); + auto cb = std::shared_ptr(new(std::nothrow) NodeDoneCallback(&execution_context, task_context)); GE_CHECK_NOTNULL(cb); auto callback = [&, cb]() { auto ret = cb->OnNodeDone(); @@ -237,7 +387,9 @@ Status ExecutionEngine::ExecuteAsync(NodeState &node_state, const std::shared_pt return SUCCESS; } -Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, TaskContext &task_context, GraphExecutionContext &context, +Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, + TaskContext &task_context, + GraphExecutionContext &context, const std::function &callback) { const auto &task = node_state.GetKernelTask(); if (task == nullptr) { @@ -247,14 +399,16 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, TaskContext &task_ // Wait for dependent nodes(DEPEND_COMPUTE), so that the input tensors are valid. RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[AwaitDependents] Start"); - GE_CHK_STATUS_RET(node_state.AwaitInputTensors(context), "[%s] Failed to wait for dependent nodes.", + GE_CHK_STATUS_RET(node_state.AwaitInputTensors(context), + "[%s] Failed to wait for dependent nodes.", node_state.GetName().c_str()); const auto &node_item = *node_state.GetNodeItem(); auto executor = node_item.node_executor; GE_CHECK_NOTNULL(executor); RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] Start"); - GE_CHK_STATUS_RET(executor->PrepareTask(*task, task_context), "[%s] Failed to prepare task", + GE_CHK_STATUS_RET(executor->PrepareTask(*task, task_context), + "[%s] Failed to prepare task", node_state.GetName().c_str()); RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] End"); GELOGD("[%s] Done task preparation successfully.", node_state.GetName().c_str()); @@ -272,10 +426,13 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, TaskContext &task_ if (context.profiling_level > 0) { auto *ctx = &context; const string &name = node_state.GetName(); - (void)task_context.RegisterCallback([ctx, name]() { RECORD_CALLBACK_EVENT(ctx, name.c_str(), "[Compute] Start"); }); + (void)task_context.RegisterCallback([ctx, name]() { + RECORD_CALLBACK_EVENT(ctx, name.c_str(), "[Compute] Start"); + }); } RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ExecuteTask] Start"); - GE_CHK_STATUS_RET(node_item.node_executor->ExecuteTask(*task, task_context, callback), "[%s] Failed to execute task", + GE_CHK_STATUS_RET(node_item.node_executor->ExecuteTask(*task, task_context, callback), + "[%s] Failed to execute task", node_state.GetName().c_str()); RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ExecuteTask] End"); @@ -299,17 +456,29 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const continue; } + if (input_tensor->GetData() == nullptr) { + GELOGD("[%s] Skipping null input, index = %d", task_context.GetNodeName(), i); + continue; + } + int64_t expected_size; GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, expected_size)); GELOGD("[%s] Input[%d] expects [%ld] bytes.", task_context.GetNodeName(), i, expected_size); auto size_diff = expected_size - static_cast(input_tensor->GetSize()); if (size_diff > 0) { if (size_diff <= kMaxPadding) { - GELOGW("[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu", task_context.GetNodeName(), i, - expected_size, input_tensor->GetSize()); + GELOGW("[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu", + task_context.GetNodeName(), + i, + expected_size, + input_tensor->GetSize()); } else { - GELOGE(INTERNAL_ERROR, "[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu", - task_context.GetNodeName(), i, expected_size, input_tensor->GetSize()); + GELOGE(INTERNAL_ERROR, + "[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu", + task_context.GetNodeName(), + i, + expected_size, + input_tensor->GetSize()); return INTERNAL_ERROR; } } @@ -318,10 +487,12 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const return SUCCESS; } -Status ExecutionEngine::PropagateOutputs(const NodeItem &node_item, TaskContext &task_context, +Status ExecutionEngine::PropagateOutputs(const NodeItem &node_item, + TaskContext &task_context, GraphExecutionContext &context) { if (node_item.shape_inference_type != DEPEND_COMPUTE) { - GE_CHK_STATUS_RET(task_context.PropagateOutputs(), "[%s] Failed to propagate outputs.", + GE_CHK_STATUS_RET(task_context.PropagateOutputs(), + "[%s] Failed to propagate outputs.", node_item.NodeName().c_str()); RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PropagateOutputs] End"); GELOGD("[%s] Done propagating outputs successfully.", node_item.NodeName().c_str()); diff --git a/ge/hybrid/executor/worker/execution_engine.h b/ge/hybrid/executor/worker/execution_engine.h index 56f1557d..ad80d99b 100644 --- a/ge/hybrid/executor/worker/execution_engine.h +++ b/ge/hybrid/executor/worker/execution_engine.h @@ -24,15 +24,18 @@ namespace ge { namespace hybrid { class ExecutionEngine { public: - static Status ExecuteAsync(NodeState &node_state, const std::shared_ptr &task_context, + static Status ExecuteAsync(NodeState &node_state, + const std::shared_ptr &task_context, GraphExecutionContext &execution_context); private: static Status ValidateInputTensors(const NodeState &node_state, const TaskContext &task_context); static Status PropagateOutputs(const NodeItem &node_item, TaskContext &task_context, GraphExecutionContext &context); - static Status DoExecuteAsync(NodeState &node_state, TaskContext &task_context, GraphExecutionContext &context, + static Status DoExecuteAsync(NodeState &node_state, + TaskContext &task_context, + GraphExecutionContext &context, const std::function &callback); }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_EXECUTOR_EXECUTION_ENGINE_H_ +#endif // GE_HYBRID_EXECUTOR_EXECUTOR_EXECUTION_ENGINE_H_ diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc old mode 100644 new mode 100755 index 49a29259..f4dec60a --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -22,13 +22,19 @@ namespace ge { namespace hybrid { ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) - : execution_context_(execution_context), subgraph_context_(subgraph_context) {} + : execution_context_(execution_context), + subgraph_context_(subgraph_context) { +} Status ShapeInferenceEngine::InferShape(NodeState &node_state) { // Wait for all input shape become valid GE_CHK_STATUS_RET_NOLOG(node_state.GetShapeInferenceState().AwaitShapesReady(*execution_context_)); auto &node_item = *node_state.GetNodeItem(); + + // Wait for "const input nodes" if node's shape inference function requires any. + // Even if output shape is static, there are cases that the const-input will be used in OpTiling and Execution + GE_CHK_STATUS_RET_NOLOG(AwaitDependentNodes(node_state)); if (node_item.is_output_shape_static) { return SUCCESS; } @@ -51,9 +57,6 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { } } - // Wait for "const input nodes" if node's shape inference function requires any. - GE_CHK_STATUS_RET_NOLOG(AwaitDependentNodes(node_state)); - // Do shape inference GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); { @@ -66,13 +69,17 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) { bool is_unknown_shape = false; GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape), - "Failed to get shape status. node = %s", node_item.NodeName().c_str()); + "Failed to get shape status. node = %s", + node_item.NodeName().c_str()); - GE_CHK_BOOL_RET_STATUS(!is_unknown_shape, INTERNAL_ERROR, "[%s] Shape is still unknown after shape inference.", + GE_CHK_BOOL_RET_STATUS(!is_unknown_shape, + INTERNAL_ERROR, + "[%s] Shape is still unknown after shape inference.", node_item.NodeName().c_str()); } - GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", node_item.NodeName().c_str(), + GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", + node_item.NodeName().c_str(), node_item.DebugString().c_str()); GELOGD("[%s] InferShapeAndType finished successfully.", node_item.NodeName().c_str()); @@ -82,15 +89,21 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { Status ShapeInferenceEngine::AwaitDependentNodes(NodeState &node_state) { auto &node_item = *node_state.GetNodeItem(); for (auto &src_node : node_item.dependents_for_shape_inference) { - GELOGI("[%s] Start to wait for data dependent node: %s", node_item.NodeName().c_str(), src_node->GetName().c_str()); - RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[AwaitNodeDone] [%s] Start", + GELOGI("[%s] Start to wait for data dependent node: %s", + node_item.NodeName().c_str(), + src_node->GetName().c_str()); + RECORD_SHAPE_INFERENCE_EVENT(execution_context_, + node_item.NodeName().c_str(), + "[AwaitNodeDone] [%s] Start", src_node->GetName().c_str()); if (!subgraph_context_->Await(src_node)) { GELOGE(INTERNAL_ERROR, "[%s] Await node failed.", src_node->GetName().c_str()); return INTERNAL_ERROR; } - RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[AwaitNodeDone] [%s] End", + RECORD_SHAPE_INFERENCE_EVENT(execution_context_, + node_item.NodeName().c_str(), + "[AwaitNodeDone] [%s] End", src_node->GetName().c_str()); GELOGI("[%s] Done waiting node.", src_node->GetName().c_str()); } @@ -105,8 +118,9 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { // output shape will not be valid until compute is done. bool shape_is_future = - node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE; - GELOGD("[%s] Start to propagate output shapes. shape_type = %d", node_item.NodeName().c_str(), + node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE; + GELOGD("[%s] Start to propagate output shapes. shape_type = %d", + node_item.NodeName().c_str(), node_item.shape_inference_type); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[PropagateOutputShapes] Start"); // propagate each output @@ -122,8 +136,10 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { auto dst_node_state = subgraph_context_->GetOrCreateNodeState(dst_node_item); GE_CHECK_NOTNULL(dst_node_state); - GELOGI("[%s] Update dst node [%s], input index = %d", node_item.NodeName().c_str(), - dst_node_item->NodeName().c_str(), dst_input_index_and_node.first); + GELOGI("[%s] Update dst node [%s], input index = %d", + node_item.NodeName().c_str(), + dst_node_item->NodeName().c_str(), + dst_input_index_and_node.first); // in case type 3 and 4, shape will be valid after computing is done if (shape_is_future) { @@ -158,7 +174,8 @@ Status ShapeInferenceEngine::InferShapeForSubgraph(const NodeItem &node_item, co GELOGD("[%s] Start to invoke InferShapeAndType", node->GetName().c_str()); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndType(node)); GELOGD("[%s] Done invoking InferShapeAndType", node->GetName().c_str()); - GE_CHK_STATUS_RET(UpdatePeerNodeShape(*node), "[%s] Failed to update shapes of peer node.", + GE_CHK_STATUS_RET(UpdatePeerNodeShape(*node), + "[%s] Failed to update shapes of peer node.", node->GetName().c_str()); } @@ -196,13 +213,15 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) { } GELOGI("Peer input op desc name is %s, need to flush: shape size is %zu, datatype is %d, original datatype is %d", - peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), output_tensor->GetShape().GetDimNum(), - output_tensor->GetDataType(), output_tensor->GetOriginDataType()); + peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), + output_tensor->GetShape().GetDimNum(), output_tensor->GetDataType(), + output_tensor->GetOriginDataType()); peer_input_desc->SetOriginShape(output_tensor->GetOriginShape()); peer_input_desc->SetShape(output_tensor->GetShape()); GELOGI("Peer input op desc name is %s, shape size is %zu, datatype is %d, original datatype is %d", - peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_input_desc->GetShape().GetDimNum(), - peer_input_desc->GetDataType(), peer_input_desc->GetOriginDataType()); + peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), + peer_input_desc->GetShape().GetDimNum(), peer_input_desc->GetDataType(), + peer_input_desc->GetOriginDataType()); } } return SUCCESS; diff --git a/ge/hybrid/executor/worker/shape_inference_engine.h b/ge/hybrid/executor/worker/shape_inference_engine.h index f8a391e2..7bb9269c 100644 --- a/ge/hybrid/executor/worker/shape_inference_engine.h +++ b/ge/hybrid/executor/worker/shape_inference_engine.h @@ -44,4 +44,4 @@ class ShapeInferenceEngine { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_INFERSHAPE_SHAPE_INFERENCE_ENGINE_H_ +#endif // GE_HYBRID_EXECUTOR_INFERSHAPE_SHAPE_INFERENCE_ENGINE_H_ diff --git a/ge/hybrid/executor/worker/task_compile_engine.cc b/ge/hybrid/executor/worker/task_compile_engine.cc old mode 100644 new mode 100755 diff --git a/ge/hybrid/executor/worker/task_compile_engine.h b/ge/hybrid/executor/worker/task_compile_engine.h index a677cb2e..0bc66a69 100644 --- a/ge/hybrid/executor/worker/task_compile_engine.h +++ b/ge/hybrid/executor/worker/task_compile_engine.h @@ -27,4 +27,4 @@ class TaskCompileEngine { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_COMPILE_TASK_COMPILE_ENGINE_H_ +#endif // GE_HYBRID_EXECUTOR_COMPILE_TASK_COMPILE_ENGINE_H_ diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc old mode 100644 new mode 100755 index 0454fa72..d696adf9 --- a/ge/hybrid/hybrid_davinci_model.cc +++ b/ge/hybrid/hybrid_davinci_model.cc @@ -24,9 +24,12 @@ namespace ge { namespace hybrid { class HybridDavinciModel::Impl { public: - explicit Impl(GeRootModelPtr ge_model) : model_(std::move(ge_model)), executor_(&model_) {} + explicit Impl(GeRootModelPtr ge_model) : model_(std::move(ge_model)), executor_(&model_) { + } - ~Impl() { NodeExecutorManager::GetInstance().FinalizeExecutors(); } + ~Impl() { + NodeExecutorManager::GetInstance().FinalizeExecutors(); + } Status Init() { GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().EnsureInitialized(), "Failed to initialize executors"); @@ -39,13 +42,21 @@ class HybridDavinciModel::Impl { return executor_.Execute(inputs, outputs); } - Status ModelRunStart() { return executor_.Start(listener_); } + Status ModelRunStart() { + return executor_.Start(listener_); + } - Status ModelRunStop() { return executor_.Stop(); } + Status ModelRunStop() { + return executor_.Stop(); + } - Status EnqueueData(const std::shared_ptr &data) { return executor_.EnqueueData(data); } + Status EnqueueData(const std::shared_ptr &data) { + return executor_.EnqueueData(data); + } - void SetListener(const shared_ptr &listener) { listener_ = listener; } + void SetListener(const shared_ptr &listener) { + listener_ = listener; + } void SetModelId(uint32_t model_id) { executor_.SetModelId(model_id); @@ -63,10 +74,12 @@ class HybridDavinciModel::Impl { HybridModelAsyncExecutor executor_; }; -HybridDavinciModel::~HybridDavinciModel() { delete impl_; } +HybridDavinciModel::~HybridDavinciModel() { + delete impl_; +} unique_ptr HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { - auto instance = unique_ptr(new (std::nothrow) HybridDavinciModel()); + auto instance = unique_ptr(new (std::nothrow)HybridDavinciModel()); if (instance != nullptr) { instance->impl_ = new (std::nothrow) HybridDavinciModel::Impl(ge_root_model); if (instance->impl_ != nullptr) { diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h index c286a222..00a48c1e 100644 --- a/ge/hybrid/hybrid_davinci_model.h +++ b/ge/hybrid/hybrid_davinci_model.h @@ -58,4 +58,4 @@ class HybridDavinciModel { }; } // namespace hybrid } // namespace ge -#endif // HYBRID_HYBRID_DAVINCI_MODEL_H_ +#endif // HYBRID_HYBRID_DAVINCI_MODEL_H_ diff --git a/ge/hybrid/hybrid_davinci_model_stub.cc b/ge/hybrid/hybrid_davinci_model_stub.cc index 7bde98a3..b95b9efc 100644 --- a/ge/hybrid/hybrid_davinci_model_stub.cc +++ b/ge/hybrid/hybrid_davinci_model_stub.cc @@ -21,23 +21,36 @@ namespace hybrid { HybridDavinciModel::~HybridDavinciModel() {} std::unique_ptr HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { - return std::unique_ptr(new (std::nothrow) HybridDavinciModel()); + return std::unique_ptr(new (std::nothrow)HybridDavinciModel()); } -Status HybridDavinciModel::Init() { return UNSUPPORTED; } +Status HybridDavinciModel::Init() { + return UNSUPPORTED; +} -Status HybridDavinciModel::Execute(const vector &inputs, vector &outputs) { return UNSUPPORTED; } +Status HybridDavinciModel::Execute(const vector &inputs, vector &outputs) { + return UNSUPPORTED; +} -Status HybridDavinciModel::ModelRunStart() { return UNSUPPORTED; } +Status HybridDavinciModel::ModelRunStart() { + return UNSUPPORTED; +} -Status HybridDavinciModel::ModelRunStop() { return UNSUPPORTED; } +Status HybridDavinciModel::ModelRunStop() { + return UNSUPPORTED; +} -Status HybridDavinciModel::EnqueueData(const shared_ptr &data) { return UNSUPPORTED; } +Status HybridDavinciModel::EnqueueData(const shared_ptr &data) { + return UNSUPPORTED; +} -void HybridDavinciModel::SetListener(const shared_ptr &listener) {} +void HybridDavinciModel::SetListener(const shared_ptr &listener) { +} -void HybridDavinciModel::SetModelId(uint32_t model_id) {} +void HybridDavinciModel::SetModelId(uint32_t model_id) { +} -void HybridDavinciModel::SetDeviceId(uint32_t device_id) {} +void HybridDavinciModel::SetDeviceId(uint32_t device_id) { +} } // namespace hybrid } // namespace ge \ No newline at end of file diff --git a/ge/hybrid/model/graph_item.cc b/ge/hybrid/model/graph_item.cc index 120865ce..b763772e 100644 --- a/ge/hybrid/model/graph_item.cc +++ b/ge/hybrid/model/graph_item.cc @@ -22,11 +22,17 @@ namespace hybrid { namespace { constexpr int kInvalidIndex = -1; } // namespace -GraphItem::~GraphItem() { GELOGD("[%s] GraphItem destroyed.", name_.c_str()); } +GraphItem::~GraphItem() { + GELOGD("[%s] GraphItem destroyed.", name_.c_str()); +} -const vector &hybrid::GraphItem::GetAllNodes() const { return node_items_; } +const vector &hybrid::GraphItem::GetAllNodes() const { + return node_items_; +} -const vector &GraphItem::GetInputNodes() const { return input_nodes_; } +const vector &GraphItem::GetInputNodes() const { + return input_nodes_; +} Status GraphItem::GetOutputDescList(vector &output_desc_list) const { if (output_node_ == nullptr) { @@ -46,9 +52,13 @@ Status GraphItem::GetOutputDescList(vector &output_desc_li return SUCCESS; } -bool GraphItem::IsDynamic() const { return is_dynamic_; } +bool GraphItem::IsDynamic() const { + return is_dynamic_; +} -const vector &GraphItem::GetInputIndexMapping() const { return input_index_mapping_; } +const vector &GraphItem::GetInputIndexMapping() const { + return input_index_mapping_; +} int GraphItem::GetParentOutputIndex(size_t index) const { if (index >= output_index_mapping_.size()) { @@ -58,6 +68,8 @@ int GraphItem::GetParentOutputIndex(size_t index) const { return output_index_mapping_[index]; } -const NodeItem *GraphItem::GetOutputNode() const { return output_node_; } +const NodeItem *GraphItem::GetOutputNode() const { + return output_node_; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/graph_item.h b/ge/hybrid/model/graph_item.h index cb0fbbed..64d809ee 100644 --- a/ge/hybrid/model/graph_item.h +++ b/ge/hybrid/model/graph_item.h @@ -30,13 +30,21 @@ class GraphItem { const vector &GetInputNodes() const; Status GetOutputDescList(std::vector &output_desc_list) const; - int TotalInputs() const { return total_inputs_; } + int TotalInputs() const { + return total_inputs_; + } - int TotalOutputs() const { return total_outputs_; } + int TotalOutputs() const { + return total_outputs_; + } - const std::string &GetName() const { return name_; } + const std::string& GetName() const { + return name_; + } - void SetName(const string &name) { name_ = name; } + void SetName(const string &name) { + name_ = name; + } const NodeItem *GetOutputNode() const; @@ -61,4 +69,4 @@ class GraphItem { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_MODEL_SUBGRAPH_ITEM_H_ +#endif // GE_HYBRID_MODEL_SUBGRAPH_ITEM_H_ diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index 18db28cb..64138d4b 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -16,8 +16,8 @@ #include "hybrid_model.h" #include -#include "graph/load/new_model_manager/model_utils.h" #include "graph/debug/ge_attr_define.h" +#include "graph/load/new_model_manager/model_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" #include "graph/utils/tensor_utils.h" @@ -27,9 +27,12 @@ namespace ge { namespace hybrid { -HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) {} +HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) { +} -HybridModel::~HybridModel() { GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); } +HybridModel::~HybridModel() { + GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); +} Status HybridModel::Init() { GELOGD("Start to init hybrid model."); @@ -38,7 +41,7 @@ Status HybridModel::Init() { return SUCCESS; } -TensorValue *HybridModel::GetVariable(const string &name) const { +TensorValue* HybridModel::GetVariable(const string &name) const { auto it = variable_tensors_.find(name); if (it == variable_tensors_.end()) { GELOGI("Failed to get variable tensor. var name = [%s]", name.c_str()); @@ -50,13 +53,16 @@ TensorValue *HybridModel::GetVariable(const string &name) const { } NodePtr HybridModel::GetVariableNode(const string &name) const { - auto it = variable_nodes_.find(name); - if (it == variable_nodes_.end()) { - GELOGI("Failed to get variable node by name = [%s]", name.c_str()); - return nullptr; + auto it = device_variable_nodes_.find(name); + if (it != device_variable_nodes_.end()) { + return it->second; } - - return it->second; + auto host_find = host_variable_nodes_.find(name); + if (host_find != host_variable_nodes_.end()) { + return host_find->second; + } + GELOGI("Failed to get variable node by name = [%s]", name.c_str()); + return nullptr; } const std::vector *HybridModel::GetTaskDefs(const NodePtr &node) const { @@ -96,7 +102,9 @@ GeModelPtr HybridModel::GetGeModel(const NodePtr &node) const { return it->second; } -const GraphItem *HybridModel::GetRootGraphItem() const { return root_graph_item_.get(); } +const GraphItem* HybridModel::GetRootGraphItem() const { + return root_graph_item_.get(); +} const GraphItem *HybridModel::GetSubgraphItem(const std::string &graph_name) const { GELOGD("To find subgraph item by name = %s", graph_name.c_str()); @@ -119,6 +127,8 @@ const GraphItem *HybridModel::GetSubgraphItem(const ComputeGraphPtr &subgraph) c return GetSubgraphItem(subgraph_name); } -const string &HybridModel::GetModelName() const { return model_name_; } +const string &HybridModel::GetModelName() const { + return model_name_; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 668b5fd7..11311968 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,27 +41,39 @@ class HybridModel { const NodeItem *GetNodeItem(const NodePtr &node) const; - uint64_t GetSessionId() const { return root_runtime_param_.session_id; } + uint64_t GetSessionId() const { + return root_runtime_param_.session_id; + } GeModelPtr GetGeModel(const NodePtr &node) const; NodeItem *MutableNodeItem(const NodePtr &node); - size_t TotalVarMemSize() const { return root_runtime_param_.var_size; } + size_t TotalVarMemSize() const { + return root_runtime_param_.var_size; + } - const uint8_t *GetVarMemBase() const { return var_mem_base_; } + const uint8_t* GetVarMemBase() const { + return var_mem_base_; + } - void SetDeviceId(uint32_t device_id) { device_id_ = device_id; } + void SetDeviceId(uint32_t device_id) { + device_id_ = device_id; + } - void SetModelId(uint32_t model_id) { model_id_ = model_id; } + void SetModelId(uint32_t model_id) { + model_id_ = model_id; + } - uint32_t GetModelId() const { return model_id_; } + uint32_t GetModelId() const { + return model_id_; + } - TensorValue *GetVariable(const string &name) const; + TensorValue* GetVariable(const string &name) const; NodePtr GetVariableNode(const string &name) const; - const std::vector *GetTaskDefs(const NodePtr &node) const; + const std::vector* GetTaskDefs(const NodePtr &node) const; const GraphItem *GetRootGraphItem() const; @@ -79,7 +91,8 @@ class HybridModel { GeRootModelPtr ge_root_model_; std::map input_nodes_; std::map constant_op_nodes_; - std::map variable_nodes_; + std::map device_variable_nodes_; //lint !e148 + std::map host_variable_nodes_; //lint !e148 std::map> variable_tensors_; std::map> task_defs_; std::map known_shape_sub_models_; @@ -96,4 +109,4 @@ class HybridModel { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_HYBRID_GRAPH_H_ +#endif // GE_HYBRID_HYBRID_GRAPH_H_ diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc old mode 100644 new mode 100755 index 0671990c..812d822f --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -17,10 +17,12 @@ #include "hybrid/model/hybrid_model_builder.h" #include "common/math/math_util.h" #include "graph/ge_context.h" +#include "graph/build/memory/var_mem_assign_util.h" #include "graph/utils/node_utils.h" #include "graph/debug/ge_attr_define.h" #include "graph/load/new_model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" +#include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/type_utils.h" @@ -39,7 +41,7 @@ int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { int64_t var_size = 0; auto data_type = desc.GetDataType(); if (data_type == DT_STRING) { - (void)TensorUtils::GetSize(desc, var_size); + (void) TensorUtils::GetSize(desc, var_size); } else { var_size = GetSizeByDataType(data_type); if (var_size <= 0) { @@ -91,7 +93,8 @@ Status HybridModelBuilder::ValidateParams() { Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_item) { auto op_desc = node->GetOpDesc(); vector dependencies = node->GetOpDesc()->GetOpInferDepends(); - GE_CHK_STATUS_RET(ParseDependentInputNodes(node_item, dependencies), "[%s] Failed to parse node dependencies.", + GE_CHK_STATUS_RET(ParseDependentInputNodes(node_item, dependencies), + "[%s] Failed to parse node dependencies.", node_item.NodeName().c_str()); node_item.outputs.resize(node_item.num_outputs); @@ -102,7 +105,7 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite return INTERNAL_ERROR; } - for (auto &dst_in_anchor : out_data_anchor->GetPeerInDataAnchors()) { + for (auto &dst_in_anchor: out_data_anchor->GetPeerInDataAnchors()) { auto dst_node = dst_in_anchor->GetOwnerNode(); if (dst_node == nullptr) { GELOGW("dst node is nullptr. out anchor = %d", out_data_anchor->GetIdx()); @@ -110,7 +113,8 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite } NodeItem *dst_node_item = nullptr; - GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item), "[%s] Failed to get or create node item.", + GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item), + "[%s] Failed to get or create node item.", dst_node->GetName().c_str()); node_item.outputs[i].emplace_back(dst_in_anchor->GetIdx(), dst_node_item); } @@ -123,7 +127,7 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite Status HybridModelBuilder::ResolveRefIo(NodeItem &node_item) { bool is_ref = false; auto &op_desc = *node_item.op_desc; - (void)AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); + (void) AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); if (!is_ref) { return SUCCESS; } @@ -152,7 +156,7 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n return SUCCESS; } - auto new_node = std::unique_ptr(new (std::nothrow) NodeItem(node)); + auto new_node = std::unique_ptr(new(std::nothrow) NodeItem(node)); GE_CHECK_NOTNULL(new_node); GE_CHECK_NOTNULL(new_node->op_desc); GE_CHK_STATUS_RET(new_node->Init(), "Failed to init NodeItem [%s] .", node->GetName().c_str()); @@ -161,8 +165,8 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n // we do not need L2 Buffer const char *const kIsFirstNode = "is_first_node"; const char *const kIsLastNode = "is_last_node"; - (void)AttrUtils::SetBool(new_node->op_desc, kIsFirstNode, false); - (void)AttrUtils::SetBool(new_node->op_desc, kIsLastNode, false); + (void) AttrUtils::SetBool(new_node->op_desc, kIsFirstNode, false); + (void) AttrUtils::SetBool(new_node->op_desc, kIsLastNode, false); if (new_node->is_dynamic && (new_node->IsControlOp() || new_node->NodeType() == PARTITIONEDCALL)) { new_node->shape_inference_type = DEPEND_COMPUTE; @@ -180,6 +184,8 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const std::vector &dependencies) { std::set dependent_input_nodes; auto &ge_node = node_item.node; + bool is_hccl_op = + NodeExecutorManager::GetInstance().ResolveExecutorType(*ge_node) == NodeExecutorManager::ExecutorType::HCCL; // The input tensors become valid after computation is done for parent nodes of type DEPEND_COMPUTE. // Wait for these parent nodes before execution. @@ -194,9 +200,16 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s auto src_node_item = MutableNodeItem(src_node); GE_CHECK_NOTNULL(src_node_item); - if (src_node_item->shape_inference_type == DEPEND_COMPUTE) { + if (is_hccl_op) { + GELOGD("[%s] Add input data dependent node [%s] due to engine type is HCCL", + node_item.NodeName().c_str(), + src_node_item->NodeName().c_str()); + src_node_item->has_observer = true; + node_item.dependents_for_execution.emplace_back(src_node); + } else if (src_node_item->shape_inference_type == DEPEND_COMPUTE) { GELOGD("[%s] Add input data dependent node [%s] due to inference type = DEPEND_COMPUTE", - node_item.NodeName().c_str(), src_node_item->NodeName().c_str()); + node_item.NodeName().c_str(), + src_node_item->NodeName().c_str()); src_node_item->has_observer = true; node_item.dependents_for_execution.emplace_back(src_node); @@ -204,7 +217,8 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s if (src_node_item->shape_inference_type == DEPEND_SHAPE_RANGE) { GELOGD("[%s] Add input shape dependent node [%s] due to inference type = DEPEND_SHAPE_RANGE", - node_item.NodeName().c_str(), src_node_item->NodeName().c_str()); + node_item.NodeName().c_str(), + src_node_item->NodeName().c_str()); src_node_item->has_observer = true; dependent_input_nodes.emplace(src_node); } @@ -222,14 +236,17 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s GE_CHECK_NOTNULL(src_node_item); src_node_item->has_observer = true; node_item.dependents_for_execution.emplace_back(src_node); - GELOGD("[%s] Dependent added from %s for control op's cond/branch", node_item.NodeName().c_str(), + GELOGD("[%s] Dependent added from %s for control op's cond/branch", + node_item.NodeName().c_str(), src_node_item->NodeName().c_str()); } for (const auto &input_name : dependencies) { int input_index = node_item.op_desc->GetInputIndexByName(input_name); if (input_index < 0) { - GELOGE(INTERNAL_ERROR, "[%s] Failed to get input index by name: %s", node_item.NodeName().c_str(), + GELOGE(INTERNAL_ERROR, + "[%s] Failed to get input index by name: %s", + node_item.NodeName().c_str(), input_name.c_str()); return INTERNAL_ERROR; } @@ -245,8 +262,10 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s src_node_item->has_observer = true; dependent_input_nodes.emplace(src_node); - GELOGD("[%s] Dependent added from output of [%s:%d]", node_item.NodeName().c_str(), - src_node_item->NodeName().c_str(), peer_out_anchor->GetIdx()); + GELOGD("[%s] Dependent added from output of [%s:%d]", + node_item.NodeName().c_str(), + src_node_item->NodeName().c_str(), + peer_out_anchor->GetIdx()); } for (const auto &dep_node : dependent_input_nodes) { @@ -287,21 +306,31 @@ Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) { Status HybridModelBuilder::DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_anchor, const InDataAnchorPtr &in_data_anchor) { GE_CHK_GRAPH_STATUS_RET(out_data_anchor->Unlink(in_data_anchor), "Failed to unlink %s:%d from %s:%d", - out_data_anchor->GetOwnerNode()->GetName().c_str(), out_data_anchor->GetIdx(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx()); - - GELOGD("Succeeded in unlinking %s:%d from %s:%d", out_data_anchor->GetOwnerNode()->GetName().c_str(), - out_data_anchor->GetIdx(), in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx()); + out_data_anchor->GetOwnerNode()->GetName().c_str(), + out_data_anchor->GetIdx(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), + in_data_anchor->GetIdx()); + + GELOGD("Succeeded in unlinking %s:%d from %s:%d", + out_data_anchor->GetOwnerNode()->GetName().c_str(), + out_data_anchor->GetIdx(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), + in_data_anchor->GetIdx()); return SUCCESS; } Status HybridModelBuilder::DoLinkDataAnchors(OutDataAnchorPtr &out_data_anchor, InDataAnchorPtr &in_data_anchor) { GE_CHK_GRAPH_STATUS_RET(out_data_anchor->LinkTo(in_data_anchor), "Failed to link %s:%d to %s:%d", - out_data_anchor->GetOwnerNode()->GetName().c_str(), out_data_anchor->GetIdx(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx()); - - GELOGD("Succeeded in linking %s:%d to %s:%d", out_data_anchor->GetOwnerNode()->GetName().c_str(), - out_data_anchor->GetIdx(), in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx()); + out_data_anchor->GetOwnerNode()->GetName().c_str(), + out_data_anchor->GetIdx(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), + in_data_anchor->GetIdx()); + + GELOGD("Succeeded in linking %s:%d to %s:%d", + out_data_anchor->GetOwnerNode()->GetName().c_str(), + out_data_anchor->GetIdx(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), + in_data_anchor->GetIdx()); return SUCCESS; } @@ -323,7 +352,9 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGE(FAILED, "[%s] Failed to get attr [%s]", data_op_desc->GetName().c_str(), + GELOGE(FAILED, + "[%s] Failed to get attr [%s]", + data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); return FAILED; } @@ -357,7 +388,7 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) { if (in_node_set.count(in_control_node) == 0) { GELOGD("[%s] Restore control edge to [%s]", in_control_node->GetName().c_str(), root_node->GetName().c_str()); GE_CHECK_NOTNULL(in_control_node->GetOutControlAnchor()); - (void)in_control_node->GetOutControlAnchor()->LinkTo(root_node->GetInControlAnchor()); + (void) in_control_node->GetOutControlAnchor()->LinkTo(root_node->GetInControlAnchor()); } } } @@ -369,7 +400,10 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) { Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) { const auto &parent_node = graph.GetParentNode(); const NodePtr &net_output_node = graph.FindFirstNodeMatchType(NETOUTPUT); - GE_CHECK_NOTNULL(net_output_node); + if (net_output_node == nullptr) { + GELOGD("Graph has no netoutput no need to merge."); + return SUCCESS; + } const auto &net_output_desc = net_output_node->GetOpDesc(); GE_CHECK_NOTNULL(net_output_desc); @@ -392,8 +426,8 @@ Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(input_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGW("SubGraph: %s NetOutput input tensor %d, attr %s not found.", graph.GetName().c_str(), index, - ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGW("SubGraph: %s NetOutput input tensor %d, attr %s not found.", + graph.GetName().c_str(), index, ATTR_NAME_PARENT_NODE_INDEX.c_str()); continue; } @@ -441,18 +475,17 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGrap continue; } - bool is_unknown_shape = false; - GE_CHK_GRAPH_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), - "Failed to invoke GetNodeUnknownShapeStatus."); + auto subgraph = NodeUtils::GetSubgraph(*node, kSubgraphIndex); + GE_CHECK_NOTNULL(subgraph); + bool is_unknown_shape = subgraph->GetGraphUnknownFlag(); if (!is_unknown_shape) { merged_graph->AddNode(node); GELOGD("[%s] Known shape partitioned call added to merged graph.", op_desc->GetName().c_str()); continue; } - auto subgraph = NodeUtils::GetSubgraph(*node, kSubgraphIndex); - GE_CHECK_NOTNULL(subgraph); - GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, *merged_graph, *subgraph), "[%s] Failed to merge subgraph.", + GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, *merged_graph, *subgraph), + "[%s] Failed to merge subgraph.", subgraph->GetName().c_str()); } @@ -461,21 +494,25 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGrap for (auto &remained_subgraph : root_graph.GetAllSubgraphs()) { GELOGD("Adding subgraph [%s] to merged-graph.", remained_subgraph->GetName().c_str()); - GE_CHK_GRAPH_STATUS_RET(merged_graph->AddSubgraph(remained_subgraph), "Failed to add subgraph [%s]", + GE_CHK_GRAPH_STATUS_RET(merged_graph->AddSubgraph(remained_subgraph), + "Failed to add subgraph [%s]", remained_subgraph->GetName().c_str()); } return SUCCESS; } -Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph &parent_graph, +Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, + ComputeGraph &parent_graph, ComputeGraph &sub_graph) { auto parent_node = sub_graph.GetParentNode(); GE_CHECK_NOTNULL(parent_node); - GE_CHK_STATUS_RET(MergeInputNodes(sub_graph), "[%s] Failed to merge data nodes for subgraph", + GE_CHK_STATUS_RET(MergeInputNodes(sub_graph), + "[%s] Failed to merge data nodes for subgraph", sub_graph.GetName().c_str()); - GE_CHK_STATUS_RET(MergeNetOutputNode(sub_graph), "[%s] Failed to merge net output nodes for subgraph", + GE_CHK_STATUS_RET(MergeNetOutputNode(sub_graph), + "[%s] Failed to merge net output nodes for subgraph", sub_graph.GetName().c_str()); GELOGD("[%s] Done merging subgraph inputs and outputs successfully.", sub_graph.GetName().c_str()); @@ -484,28 +521,21 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph if (sub_op_type == DATA_TYPE || sub_op_type == NETOUTPUT) { continue; } - - if (sub_op_type == CONSTANT || sub_op_type == VARIABLE) { - GELOGE(INTERNAL_ERROR, "Unexpected node in unknown subgraph. type = %s, node = %s::%s", sub_op_type.c_str(), - sub_graph.GetName().c_str(), sub_node->GetName().c_str()); - return INTERNAL_ERROR; - } - if (sub_op_type == PARTITIONEDCALL) { - bool is_unknown_shape = false; - GE_CHK_GRAPH_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*sub_node, is_unknown_shape), - "[%s] Failed to invoke GetNodeUnknownShapeStatus.", sub_node->GetName().c_str()); - if (is_unknown_shape) { - auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, kSubgraphIndex); - GE_CHECK_NOTNULL(sub_sub_graph); - GE_CHK_STATUS_RET(UnfoldSubgraph(root_graph, parent_graph, *sub_sub_graph), "[%s] Failed to merge subgraph", + auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, kSubgraphIndex); + GE_CHECK_NOTNULL(sub_sub_graph); + if (sub_sub_graph->GetGraphUnknownFlag()) { + GE_CHK_STATUS_RET(UnfoldSubgraph(root_graph, parent_graph, *sub_sub_graph), + "[%s] Failed to merge subgraph", sub_sub_graph->GetName().c_str()); continue; } } parent_graph.AddNode(sub_node); - GELOGD("[%s::%s] added to parent graph: [%s].", sub_graph.GetName().c_str(), sub_node->GetName().c_str(), + GELOGD("[%s::%s] added to parent graph: [%s].", + sub_graph.GetName().c_str(), + sub_node->GetName().c_str(), parent_graph.GetName().c_str()); } @@ -514,7 +544,9 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph return SUCCESS; } -Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item, const NodeItem &node_item, bool is_root_graph) { +Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item, + const NodeItem &node_item, + bool is_root_graph) { auto output_size = node_item.op_desc->GetAllInputsSize(); GE_CHECK_LE(output_size, UINT32_MAX); graph_item.output_edges_.resize(output_size); @@ -528,8 +560,11 @@ Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item, const NodeI auto src_node_item = GetNodeItem(src_node); GE_CHECK_NOTNULL(src_node_item); auto output_offset = src_node_item->output_start + peer_out_anchor->GetIdx(); - GELOGI("Output[%d], node = %s, output_index = %d, output_offset = %d ", in_data_anchor->GetIdx(), - src_node_item->NodeName().c_str(), peer_out_anchor->GetIdx(), output_offset); + GELOGI("Output[%d], node = %s, output_index = %d, output_offset = %d ", + in_data_anchor->GetIdx(), + src_node_item->NodeName().c_str(), + peer_out_anchor->GetIdx(), + output_offset); graph_item.output_edges_[in_data_anchor->GetIdx()] = {src_node_item, peer_out_anchor->GetIdx()}; } @@ -553,11 +588,13 @@ Status HybridModelBuilder::LoadGraph() { auto root_graph = ge_root_model_->GetRootGraph(); if (!GetContext().GetHostExecFlag()) { std::shared_ptr merged_graph; - GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", root_graph->GetDirectNodesSize(), + GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", + root_graph->GetDirectNodesSize(), root_graph->GetAllNodesSize()); GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(*root_graph, merged_graph), "Failed to unfold subgraphs."); root_graph = std::move(merged_graph); - GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", root_graph->GetDirectNodesSize(), + GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", + root_graph->GetDirectNodesSize(), root_graph->GetAllNodesSize()); GE_DUMP(root_graph, "hybrid_merged_graph"); } @@ -578,16 +615,19 @@ Status HybridModelBuilder::LoadGraph() { } if (sub_graph->GetGraphUnknownFlag()) { - GE_CHK_STATUS_RET(LoadDynamicSubgraph(*sub_graph, false), "Failed to load subgraph: [%s]", + GE_CHK_STATUS_RET(LoadDynamicSubgraph(*sub_graph, false), + "Failed to load subgraph: [%s]", sub_graph->GetName().c_str()); } else { - GE_CHK_STATUS_RET(IdentifyVariableOutputs(*parent_node_item), "[%s] Failed to identify ref outputs.", + GE_CHK_STATUS_RET(IdentifyVariableOutputs(*parent_node_item), + "[%s] Failed to identify ref outputs.", parent_node_item->NodeName().c_str()); // if parent is function control op. need add a virtual partitioned call if (parent_node_item->IsControlOp()) { GE_CHK_STATUS_RET(LoadKnownShapedSubgraph(*sub_graph, parent_node_item), - "Failed to load function control op subgraph [%s]", sub_graph->GetName().c_str()); + "Failed to load function control op subgraph [%s]", + sub_graph->GetName().c_str()); } } } @@ -596,16 +636,21 @@ Status HybridModelBuilder::LoadGraph() { return SUCCESS; } -const NodeItem *HybridModelBuilder::GetNodeItem(const NodePtr &node) const { return hybrid_model_.GetNodeItem(node); } +const NodeItem *HybridModelBuilder::GetNodeItem(const NodePtr &node) const { + return hybrid_model_.GetNodeItem(node); +} -NodeItem *HybridModelBuilder::MutableNodeItem(const NodePtr &node) { return hybrid_model_.MutableNodeItem(node); } +NodeItem *HybridModelBuilder::MutableNodeItem(const NodePtr &node) { + return hybrid_model_.MutableNodeItem(node); +} Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_ptr &tensor) { string var_name = var_node->GetName(); auto tensor_desc = var_node->GetOpDesc()->MutableOutputDesc(0); uint8_t *var_logic = nullptr; GE_CHK_STATUS_RET(var_manager_->GetVarAddr(var_name, *tensor_desc, &var_logic), - "Failed to get var addr. var_name = %s, session_id = %ld", var_name.c_str(), + "Failed to get var addr. var_name = %s, session_id = %ld", + var_name.c_str(), hybrid_model_.GetSessionId()); uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, RT_MEMORY_HBM); @@ -619,7 +664,7 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_ int64_t var_size = CalcVarSizeInBytes(*tensor_desc); // var size is only for checking, will not allocate any memory by it - tensor.reset(new (std::nothrow) TensorValue(dev_mem, static_cast(var_size))); + tensor.reset(new(std::nothrow)TensorValue(dev_mem, static_cast(var_size))); GE_CHECK_NOTNULL(tensor); return SUCCESS; } @@ -642,7 +687,8 @@ Status HybridModelBuilder::HandleDtString(const GeTensor &tensor, void *var_addr GE_CHK_BOOL_RET_STATUS(ge::CheckInt64Uint32MulOverflow(elem_num, kBytes) == SUCCESS, FAILED, "Shape size is invalid"); auto offset = static_cast(elem_num * kBytes); - auto hbm_raw_data_base_addr = reinterpret_cast(reinterpret_cast(var_addr) + offset); + auto hbm_raw_data_base_addr = + reinterpret_cast(reinterpret_cast(var_addr) + offset); for (int64_t i = elem_num - 1; i >= 0; --i) { buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]); } @@ -668,6 +714,19 @@ Status HybridModelBuilder::AssignUninitializedConstantOps() { } } + for (auto &it : hybrid_model_.device_variable_nodes_) { + const string &var_name = it.first; + const NodePtr &var_node = it.second; + auto tensor_desc = var_node->GetOpDesc()->MutableOutputDesc(0); + if (!var_manager_->IsVarExist(var_name, *tensor_desc)) { + // allocate constant + GELOGD("[%s] Constant not allocated during graph building. now allocate it.", var_name.c_str()); + GE_CHK_STATUS_RET(var_manager_->AssignVarMem(var_name, *tensor_desc, RT_MEMORY_HBM)); + GE_CHK_STATUS_RET(VarMemAssignUtil::AssignData2Fp32Var(var_node, runtime_param_.session_id)) + GE_CHK_STATUS_RET(var_manager_->SetAllocatedGraphId(var_name, runtime_param_.graph_id)); + } + } + return SUCCESS; } @@ -675,28 +734,32 @@ Status HybridModelBuilder::InitConstantOps() { for (auto &it : hybrid_model_.constant_op_nodes_) { const string &var_name = it.first; const NodePtr &var_node = it.second; - std::unique_ptr var_tensor; - - GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); - GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); - var_tensor->SetName("ConstOp_" + var_name); - auto op_desc = var_node->GetOpDesc(); auto v_weights = ModelUtils::GetWeights(op_desc); - auto v_output_size = var_tensor->GetSize(); - auto v_output_addr = var_tensor->MutableData(); - auto *ge_tensor = const_cast(v_weights[0].get()); - if (ge_tensor->GetData().size() > 0) { - GE_CHK_STATUS_RET_NOLOG(HandleDtString(*ge_tensor, v_output_addr)); - - GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p] mem_size[%zu] datasize[%zu]", - runtime_param_.graph_id, op_desc->GetName().c_str(), 0, v_output_addr, v_output_size, - ge_tensor->GetData().size()); - GE_CHK_RT_RET(rtMemcpy(v_output_addr, v_output_size, ge_tensor->GetData().data(), ge_tensor->GetData().size(), - RT_MEMCPY_HOST_TO_DEVICE)); + + std::unique_ptr var_tensor; + if (GetContext().GetHostExecFlag()) { + auto buffer = ge_tensor->MutableData(); + GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize()); + var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize())); } else { - GELOGI("[%s] Const op has no weight data.", op_desc->GetName().c_str()); + GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); + GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); + var_tensor->SetName("ConstOp_" + var_name); + auto v_output_size = var_tensor->GetSize(); + auto v_output_addr = var_tensor->MutableData(); + if (ge_tensor->GetData().size() > 0) { + GE_CHK_STATUS_RET_NOLOG(HandleDtString(*ge_tensor, v_output_addr)); + + GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p] mem_size[%zu] datasize[%zu]", + runtime_param_.graph_id, op_desc->GetName().c_str(), 0, v_output_addr, v_output_size, + ge_tensor->GetData().size()); + GE_CHK_RT_RET(rtMemcpy(v_output_addr, v_output_size, ge_tensor->GetData().data(), ge_tensor->GetData().size(), + RT_MEMCPY_HOST_TO_DEVICE)); + } else { + GELOGI("[%s] Const op has no weight data.", op_desc->GetName().c_str()); + } } hybrid_model_.variable_tensors_.emplace(var_name, std::move(var_tensor)); @@ -706,17 +769,40 @@ Status HybridModelBuilder::InitConstantOps() { } Status HybridModelBuilder::InitVariableTensors() { - for (auto &it : hybrid_model_.variable_nodes_) { + for (auto &it : hybrid_model_.device_variable_nodes_) { string var_name = it.first; NodePtr &var_node = it.second; std::unique_ptr tensor; GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, tensor)); - GELOGD("Init variable tensor. name = %s, size = %ld, addr = %p", var_name.c_str(), tensor->GetSize(), + GELOGD("Init variable tensor. name = %s, size = %ld, addr = %p", + var_name.c_str(), + tensor->GetSize(), tensor->GetData()); tensor->SetName("Var_" + var_name); hybrid_model_.variable_tensors_.emplace(var_name, std::move(tensor)); } + for (const auto &it : hybrid_model_.host_variable_nodes_) { + auto op_desc = it.second->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + GeTensorDesc output_tensor = op_desc->GetOutputDesc(0); + int64_t tensor_size = 0; + if (TensorUtils::CalcTensorMemSize(output_tensor.GetShape(), output_tensor.GetFormat(), output_tensor.GetDataType(), + tensor_size) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Calculate variable size failed, node name:%s", it.first.c_str()); + return INTERNAL_ERROR; + } + SharedMemInfo mem_info(it.first, tensor_size); + if (HostMemManager::Instance().MallocSharedMemory(mem_info) != SUCCESS) { + GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str()); + return GE_GRAPH_MALLOC_FAILED; + } + GELOGD("Host variable [%s] malloc success.", it.first.c_str()); + + std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size)); + hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); + } + return SUCCESS; } @@ -734,7 +820,9 @@ Status HybridModelBuilder::LoadTasks() { } GELOGD("[%s] Start to build kernel task", node_ptr->GetName().c_str()); - auto load_ret = node_item->node_executor->LoadTask(hybrid_model_, node_ptr, node_item->kernel_task); + auto load_ret = node_item->node_executor->LoadTask(hybrid_model_, + node_ptr, + node_item->kernel_task); if (load_ret != UNSUPPORTED && load_ret != SUCCESS) { GELOGE(load_ret, "[%s] Failed to load task", node_ptr->GetName().c_str()); return load_ret; @@ -751,11 +839,13 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr GE_CHECK_NOTNULL(parent_node); auto op_type = parent_node->GetType(); if (op_type == IF || op_type == CASE || op_type == WHILE) { - GELOGD("Set ge_model for control op subgraph: [%s], task_size = %d", sub_graph.GetName().c_str(), + GELOGD("Set ge_model for control op subgraph: [%s], task_size = %d", + sub_graph.GetName().c_str(), ge_model->GetModelTaskDefPtr()->task_size()); subgraph_models_.emplace(sub_graph.GetName(), ge_model); } else { - GELOGD("Set ge_model for subgraph: [%s], task_size = %d", sub_graph.GetName().c_str(), + GELOGD("Set ge_model for subgraph: [%s], task_size = %d", + sub_graph.GetName().c_str(), ge_model->GetModelTaskDefPtr()->task_size()); hybrid_model_.known_shape_sub_models_.emplace(sub_graph.GetParentNode(), ge_model); } @@ -837,14 +927,22 @@ Status HybridModelBuilder::IndexSpecialNodes() { auto op_type = node->GetType(); GELOGD("node name = %s, node type = %s", node->GetName().c_str(), node->GetType().c_str()); if (op_type == VARIABLE) { - hybrid_model_.variable_nodes_.emplace(node->GetName(), node); + string placement; + (void) AttrUtils::GetStr(node->GetOpDesc(), ATTR_VARIABLE_PLACEMENT, placement); + if (placement == "host") { + hybrid_model_.host_variable_nodes_.emplace(node->GetName(), node); + } else { + hybrid_model_.device_variable_nodes_.emplace(node->GetName(), node); + } } else if (op_type == CONSTANTOP) { hybrid_model_.constant_op_nodes_.emplace(node->GetName(), node); } else if (op_type == DATA && node->GetOwnerComputeGraph() != root_graph) { NodePtr src_node; int peer_out_index = -1; GE_CHK_STATUS_RET_NOLOG(GetPeerNodeAcrossSubGraphs(node, src_node, peer_out_index)); - GELOGD("Got peer node for data node %s, peer node = %s(%s)", node->GetName().c_str(), src_node->GetName().c_str(), + GELOGD("Got peer node for data node %s, peer node = %s(%s)", + node->GetName().c_str(), + src_node->GetName().c_str(), src_node->GetType().c_str()); auto src_op_type = src_node->GetType(); @@ -857,11 +955,11 @@ Status HybridModelBuilder::IndexSpecialNodes() { } } } - return SUCCESS; } -Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, NodePtr &peer_node, +Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, + NodePtr &peer_node, int &peer_out_index) { auto sub_graph = data_node->GetOwnerComputeGraph(); GE_CHECK_NOTNULL(sub_graph); @@ -874,7 +972,9 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, auto data_op_desc = data_node->GetOpDesc(); uint32_t parent_index = 0; if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGE(INTERNAL_ERROR, "[%s] Failed to get attr [%s]", data_op_desc->GetName().c_str(), + GELOGE(INTERNAL_ERROR, + "[%s] Failed to get attr [%s]", + data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); return INTERNAL_ERROR; } @@ -897,7 +997,8 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, if (src_node_type != PARTITIONEDCALL) { peer_node = src_wrapped_node; peer_out_index = kVarOutputIndex; - GELOGD("[%s] Node is connected to root graph's node: %s", data_node->GetName().c_str(), + GELOGD("[%s] Node is connected to root graph's node: %s", + data_node->GetName().c_str(), peer_node->GetName().c_str()); return SUCCESS; } @@ -905,8 +1006,10 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, auto src_graph = NodeUtils::GetSubgraph(*src_wrapped_node, kSubgraphIndex); GE_CHECK_NOTNULL(src_graph); auto src_net_output_node = src_graph->FindFirstNodeMatchType(NETOUTPUT); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(src_net_output_node == nullptr, return INTERNAL_ERROR, - "Failed to find NetOutput in subgraph: %s", src_graph->GetName().c_str()); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(src_net_output_node == nullptr, + return INTERNAL_ERROR, + "Failed to find NetOutput in subgraph: %s", + src_graph->GetName().c_str()); auto net_output_desc = src_net_output_node->GetOpDesc(); GE_CHECK_NOTNULL(net_output_desc); @@ -919,8 +1022,8 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, for (uint32_t i = 0; i < static_cast(input_size); ++i) { uint32_t p_index = 0; if (!AttrUtils::GetInt(net_output_desc->GetInputDesc(i), ATTR_NAME_PARENT_NODE_INDEX, p_index)) { - GELOGW("SubGraph: %s input tensor %u attr %s not found.", src_graph->GetName().c_str(), i, - ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGW("SubGraph: %s input tensor %u attr %s not found.", + src_graph->GetName().c_str(), i, ATTR_NAME_PARENT_NODE_INDEX.c_str()); continue; } @@ -933,13 +1036,19 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, peer_node = peer_out_anchor->GetOwnerNode(); GE_CHECK_NOTNULL(peer_node); peer_out_index = peer_out_anchor->GetIdx(); - GELOGD("Found peer node of Data node: %s::%s is %s::%s", sub_graph->GetName().c_str(), - data_node->GetName().c_str(), src_graph->GetName().c_str(), peer_node->GetName().c_str()); + GELOGD("Found peer node of Data node: %s::%s is %s::%s", + sub_graph->GetName().c_str(), + data_node->GetName().c_str(), + src_graph->GetName().c_str(), + peer_node->GetName().c_str()); return SUCCESS; } } - GELOGE(FAILED, "Failed to find peer node for %s::%s", sub_graph->GetName().c_str(), data_node->GetName().c_str()); + GELOGE(FAILED, + "Failed to find peer node for %s::%s", + sub_graph->GetName().c_str(), + data_node->GetName().c_str()); return FAILED; } Status HybridModelBuilder::InitRuntimeParams() { @@ -959,15 +1068,15 @@ Status HybridModelBuilder::InitRuntimeParams() { runtime_param_.graph_id = ge_root_model_->GetRootGraph()->GetGraphID(); value = 0; for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { - (void)ge::AttrUtils::GetInt(it.second, ATTR_MODEL_VAR_SIZE, value); + (void) ge::AttrUtils::GetInt(it.second, ATTR_MODEL_VAR_SIZE, value); if (value > 0) { runtime_param_.var_size = static_cast(value); break; } } - GELOGI("InitRuntimeParams(), session_id:%lu, var_size:%lu. graph_id = %u", runtime_param_.session_id, - runtime_param_.var_size, runtime_param_.graph_id); + GELOGI("InitRuntimeParams(), session_id:%lu, var_size:%lu. graph_id = %u", + runtime_param_.session_id, runtime_param_.var_size, runtime_param_.graph_id); var_manager_ = VarManager::Instance(runtime_param_.session_id); GE_CHECK_NOTNULL(var_manager_); @@ -991,8 +1100,11 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { auto src_node = GetPeerNode(in_data_anchor); GE_CHECK_NOTNULL(src_node); auto src_op_type = src_node->GetType(); - GELOGD("Node %s, output %d, src node = %s, src node type = %s", node_item.NodeName().c_str(), - in_data_anchor->GetIdx(), src_node->GetName().c_str(), src_op_type.c_str()); + GELOGD("Node %s, output %d, src node = %s, src node type = %s", + node_item.NodeName().c_str(), + in_data_anchor->GetIdx(), + src_node->GetName().c_str(), + src_op_type.c_str()); if (src_op_type != CONSTANTOP && src_op_type != VARIABLE) { continue; @@ -1012,7 +1124,7 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { } string ref_var_name; - (void)AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_name); + (void) AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_name); if (ref_var_name.empty()) { continue; } @@ -1045,7 +1157,8 @@ Status HybridModelBuilder::GetParentNodeOutputIndex(const OpDesc &op_desc, int i auto input_desc = op_desc.MutableInputDesc(index); GE_CHECK_NOTNULL(input_desc); if (!AttrUtils::GetInt(input_desc, ATTR_NAME_PARENT_NODE_INDEX, out_index)) { - GELOGE(INTERNAL_ERROR, "NetOutput input tensor %d, attr %s not found.", index, ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGE(INTERNAL_ERROR, "NetOutput input tensor %d, attr %s not found.", + index, ATTR_NAME_PARENT_NODE_INDEX.c_str()); return INTERNAL_ERROR; } return SUCCESS; @@ -1060,7 +1173,8 @@ Status HybridModelBuilder::InitModelMem() { } if (total_var_size > 0 && hybrid_model_.var_mem_base_ == nullptr) { - GE_CHK_STATUS_RET(var_manager_->MallocVarMemory(total_var_size), "Malloc Var Memory Fail."); + GE_CHK_STATUS_RET(var_manager_->MallocVarMemory(total_var_size), + "Malloc Var Memory Fail."); hybrid_model_.var_mem_base_ = var_manager_->GetVarMemoryBase(RT_MEMORY_HBM); } @@ -1078,30 +1192,33 @@ Status HybridModelBuilder::TransAllVarData() { } std::vector variable_node_list; - for (auto &it : hybrid_model_.variable_nodes_) { + for (auto &it : hybrid_model_.device_variable_nodes_) { variable_node_list.emplace_back(it.second); GELOGD("[%s] added for trans var data", it.first.c_str()); } - GE_CHK_STATUS_RET( - TransVarDataUtils::TransAllVarData(variable_node_list, runtime_param_.session_id, ctx, runtime_param_.graph_id), - "TransAllVarData failed."); + GE_CHK_STATUS_RET(TransVarDataUtils::TransAllVarData(variable_node_list, + runtime_param_.session_id, + ctx, + runtime_param_.graph_id), + "TransAllVarData failed."); GELOGI("TransAllVarData success."); return SUCCESS; } Status HybridModelBuilder::CopyVarData() { - GE_CHK_STATUS_RET( - TransVarDataUtils::CopyVarData(ge_root_model_->GetRootGraph(), runtime_param_.session_id, hybrid_model_.device_id_), - "CopyVarData failed."); + GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(ge_root_model_->GetRootGraph(), + runtime_param_.session_id, + hybrid_model_.device_id_), + "CopyVarData failed."); GELOGI("CopyVarData success."); return SUCCESS; } Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item) { GELOGD("Start to load known shaped subgraph [%s]", graph.GetName().c_str()); - auto graph_item = std::unique_ptr(new (std::nothrow) GraphItem()); + auto graph_item = std::unique_ptr(new(std::nothrow)GraphItem()); GE_CHECK_NOTNULL(graph_item); graph_item->is_dynamic_ = false; auto subgraph_name = graph.GetName(); @@ -1117,11 +1234,14 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem if (op_type == DATA) { int32_t data_index = 0; if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, data_index)) { - GELOGE(FAILED, "[%s] Failed to get attr [%s]", node->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGE(FAILED, + "[%s] Failed to get attr [%s]", + node->GetName().c_str(), + ATTR_NAME_PARENT_NODE_INDEX.c_str()); return FAILED; } - (void)wrapper_op_desc->AddInputDesc(op_desc->GetInputDesc(0)); + (void) wrapper_op_desc->AddInputDesc(op_desc->GetInputDesc(0)); graph_item->input_index_mapping_.emplace_back(data_index); } else if (op_type == NETOUTPUT) { int output_index = 0; @@ -1132,7 +1252,8 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem } GE_CHK_GRAPH_STATUS_RET(wrapper_op_desc->AddOutputDesc(*output_desc), - "[%s] Failed to add output desc. output index = %d", graph.GetName().c_str(), + "[%s] Failed to add output desc. output index = %d", + graph.GetName().c_str(), output_index); graph_item->output_index_mapping_.emplace_back(data_index); @@ -1157,7 +1278,8 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem graph_item->total_inputs_ = node_item->num_inputs; graph_item->total_outputs_ = node_item->num_outputs; - GELOGD("NodeItem create for known shape subgraph [%s], NodeItem = %s", graph.GetName().c_str(), + GELOGD("NodeItem create for known shape subgraph [%s], NodeItem = %s", + graph.GetName().c_str(), node_item->DebugString().c_str()); GELOGD("Done parse known shape subgraph successfully. graph = [%s]", graph.GetName().c_str()); @@ -1170,7 +1292,7 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root_graph) { GELOGD("Start to load subgraph [%s]", graph.GetName().c_str()); // for known partitioned call, load all nodes - auto graph_item = std::unique_ptr(new (std::nothrow) GraphItem()); + auto graph_item = std::unique_ptr(new(std::nothrow)GraphItem()); GE_CHECK_NOTNULL(graph_item); graph_item->is_dynamic_ = true; @@ -1186,7 +1308,7 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root NodeItem *node_item = nullptr; GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node, &node_item)); GE_CHK_STATUS_RET_NOLOG(BuildNodeItem(node, *node_item)); - GE_CHK_STATUS_RET_NOLOG(UpdateAnchorStatus(node)); // needed by FE generate task + GE_CHK_STATUS_RET_NOLOG(UpdateAnchorStatus(node)); // needed by FE generate task node_item->input_start = input_start; node_item->output_start = output_start; @@ -1226,7 +1348,7 @@ Status HybridModelBuilder::ParseVarOutputs(NodeItem &node_item) { for (int i = 0; i < node_item.num_outputs; ++i) { auto output_tensor_desc = node_item.op_desc->GetOutputDesc(i); std::string var_name; - (void)AttrUtils::GetStr(output_tensor_desc, ASSIGN_VAR_NAME, var_name); + (void) AttrUtils::GetStr(output_tensor_desc, ASSIGN_VAR_NAME, var_name); if (!var_name.empty()) { auto var_node = hybrid_model_.GetVariableNode(var_name); GE_CHECK_NOTNULL(var_node); @@ -1236,7 +1358,8 @@ Status HybridModelBuilder::ParseVarOutputs(NodeItem &node_item) { return SUCCESS; } -Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item, vector &data_nodes, +Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item, + vector &data_nodes, bool is_root_graph) { uint32_t data_op_index = 0; for (auto &node_item : data_nodes) { @@ -1249,7 +1372,10 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item, vectorGetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, data_index)) { - GELOGE(FAILED, "[%s] Failed to get attr [%s]", node->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGE(FAILED, + "[%s] Failed to get attr [%s]", + node->GetName().c_str(), + ATTR_NAME_PARENT_NODE_INDEX.c_str()); return FAILED; } } diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index ecd327ff..d522939e 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -49,7 +49,9 @@ class HybridModelBuilder { static Status UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGraphPtr &merged_graph); static Status UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph &parent_graph, ComputeGraph &sub_graph); static Status InitWeights(); - static Status BuildInputMapping(GraphItem &graph_item, std::vector &data_nodes, bool is_root_graph); + static Status BuildInputMapping(GraphItem &graph_item, + std::vector &data_nodes, + bool is_root_graph); static Status ResolveRefIo(NodeItem &node_item); Status BuildOutputMapping(GraphItem &partitioned_call, const NodeItem &node_item, bool is_root_graph); Status ValidateParams(); @@ -74,7 +76,9 @@ class HybridModelBuilder { Status ParseVarOutputs(NodeItem &node_item); Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item); - const char *GetGraphName() const { return hybrid_model_.model_name_.c_str(); } + const char* GetGraphName() const { + return hybrid_model_.model_name_.c_str(); + } const NodeItem *GetNodeItem(const NodePtr &node) const; NodeItem *MutableNodeItem(const NodePtr &node); @@ -91,4 +95,4 @@ class HybridModelBuilder { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_MODEL_HYBRID_MODEL_BUILDER_H_ +#endif // GE_HYBRID_MODEL_HYBRID_MODEL_BUILDER_H_ diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc index 7ec8d946..a740aa7d 100644 --- a/ge/hybrid/model/node_item.cc +++ b/ge/hybrid/model/node_item.cc @@ -26,13 +26,16 @@ namespace ge { namespace hybrid { namespace { -const char *const kAttrNameOriginalFusionGraph = "_original_fusion_graph"; -const char *const kNodeTypeRetVal = "_RetVal"; +const char * const kAttrNameOriginalFusionGraph = "_original_fusion_graph"; +const char * const kNodeTypeRetVal = "_RetVal"; Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgraph) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGE(FAILED, "[%s] Failed to get attr [%s]", op_desc.GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGE(FAILED, + "[%s] Failed to get attr [%s]", + op_desc.GetName().c_str(), + ATTR_NAME_PARENT_NODE_INDEX.c_str()); return FAILED; } @@ -51,7 +54,10 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr Status ParseOutputMapping(OpDescPtr op_desc, FusedSubgraph &fused_subgraph) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGE(FAILED, "[%s] Failed to get attr [%s]", op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGE(FAILED, + "[%s] Failed to get attr [%s]", + op_desc->GetName().c_str(), + ATTR_NAME_PARENT_NODE_INDEX.c_str()); return FAILED; } @@ -65,11 +71,11 @@ Status ParseFusedSubgraph(NodeItem &node_item) { } GELOGI("[%s] Start to parse fused subgraph.", node_item.node_name.c_str()); - auto fused_subgraph = std::unique_ptr(new (std::nothrow) FusedSubgraph()); + auto fused_subgraph = std::unique_ptr(new (std::nothrow)FusedSubgraph()); GE_CHECK_NOTNULL(fused_subgraph); ComputeGraphPtr fused_graph; - (void)AttrUtils::GetGraph(*node_item.op_desc, kAttrNameOriginalFusionGraph, fused_graph); + (void) AttrUtils::GetGraph(*node_item.op_desc, kAttrNameOriginalFusionGraph, fused_graph); GE_CHECK_NOTNULL(fused_graph); fused_graph->SetGraphUnknownFlag(true); @@ -96,7 +102,7 @@ Status ParseFusedSubgraph(NodeItem &node_item) { return SUCCESS; } } // namespace -NodeItem::NodeItem(NodePtr node) : node(std::move(node)) { +NodeItem::NodeItem(NodePtr node): node(std::move(node)) { this->op_desc = this->node->GetOpDesc().get(); this->node_id = this->op_desc->GetId(); this->num_inputs = this->op_desc->GetInputsSize(); @@ -107,11 +113,19 @@ NodeItem::NodeItem(NodePtr node) : node(std::move(node)) { Status NodeItem::Init() { int32_t unknown_shape_type_val = 0; - (void)AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); + (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); shape_inference_type = static_cast(unknown_shape_type_val); - GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), "[%s] Failed to get shape status.", - node->GetName().c_str()); + bool test_is_dynamic = false; + NodeUtils::GetNodeUnknownShapeStatus(*node, test_is_dynamic); + (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); + GELOGI("node name = %s, is_dynamic = %d, test_is_dynamic = %d", this->node_name.c_str(), is_dynamic, test_is_dynamic); + if (!is_dynamic) { + GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), + "[%s] Failed to get shape status.", + node->GetName().c_str()); + } + GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str()); if (is_dynamic) { for (int i = 0; i < num_inputs; ++i) { @@ -122,8 +136,8 @@ Status NodeItem::Init() { } else { num_static_input_shapes++; is_input_shape_static.push_back(true); - GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", NodeName().c_str(), i, - input_desc->MutableShape().ToString().c_str()); + GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", + NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); } } @@ -167,7 +181,7 @@ std::string NodeItem::DebugString() const { for (auto &items : outputs) { ss << ", output[" << index++ << "]: "; for (auto &item : items) { - ss << "(" << item.second->NodeName() << ":" << item.first << "), "; + ss << "(" << item.second->NodeName() << ":" <> &&tasks) : tasks_(std::move(tasks)) {} +AiCoreNodeTask::AiCoreNodeTask(std::vector> &&tasks) : tasks_(std::move(tasks)) { +} Status AiCoreNodeExecutor::Initialize() { auto ge_lib = GELib::GetInstance(); @@ -38,7 +39,7 @@ Status AiCoreNodeExecutor::Initialize() { auto aic_ops_store = kernel_manager.GetOpsKernelInfoStore("AIcoreEngine"); GE_CHECK_NOTNULL(aic_ops_store); - compiler_.reset(new (std::nothrow) AiCoreTaskCompiler(aic_ops_store)); + compiler_.reset(new(std::nothrow)AiCoreTaskCompiler(aic_ops_store)); GE_CHECK_NOTNULL(compiler_); return SUCCESS; } @@ -84,7 +85,7 @@ Status AiCoreNodeExecutor::GenNodeKey(const NodePtr &node, std::string &node_key auto num_dims = shape.GetDimNum(); if (num_dims == 0) { continue; - } // scalar + } // scalar for (std::size_t i = 0; i < num_dims - 1; i++) { node_key.append(std::to_string(shape.GetDim(i))); node_key.push_back('_'); @@ -112,8 +113,8 @@ std::shared_ptr AiCoreNodeTaskRegistry::GetTask(const std::string &nod return (iter != reg_node_tasks_.end()) ? iter->second : nullptr; } -Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, const NodePtr &node, - shared_ptr &task) const { +Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, + const NodePtr &node, shared_ptr &task) const { GE_CHECK_NOTNULL(node); auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.h b/ge/hybrid/node_executor/aicore/aicore_node_executor.h old mode 100644 new mode 100755 index 506202fa..b4afc34c --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.h +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.h @@ -36,7 +36,6 @@ class AiCoreNodeTaskRegistry { std::shared_ptr GetTask(const std::string &node_key); bool AddTask(const std::string &node_key, const std::shared_ptr task); - private: AiCoreNodeTaskRegistry() = default; std::map> reg_node_tasks_; @@ -52,7 +51,6 @@ class AiCoreNodeTask : public NodeTask { Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; - private: std::vector> tasks_; }; @@ -61,7 +59,8 @@ class AiCoreNodeExecutor : public NodeExecutor { public: Status Initialize() override; Status LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const override; - Status CompileTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const override; + Status CompileTask(const HybridModel &model, const NodePtr &node, + std::shared_ptr &task) const override; private: static Status GenNodeKey(const NodePtr &node, std::string &node_key); @@ -69,4 +68,4 @@ class AiCoreNodeExecutor : public NodeExecutor { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ +#endif //GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 9ec0cc22..7f69acd4 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -37,7 +37,9 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) } Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { - GE_CHK_STATUS_RET(ValidateTaskDef(task_def), "[%s] Failed to validate task def: [%s]", op_desc.GetName().c_str(), + GE_CHK_STATUS_RET(ValidateTaskDef(task_def), + "[%s] Failed to validate task def: [%s]", + op_desc.GetName().c_str(), task_def.DebugString().c_str()); const domi::KernelDef &kernel_def = task_def.kernel(); @@ -48,7 +50,7 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef block_dim_ = kernel_def.block_dim(); // malloc args memory - args_.reset(new (std::nothrow) uint8_t[args_size_]); + args_.reset(new(std::nothrow) uint8_t[args_size_]); GE_CHECK_NOTNULL(args_); errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); if (err != EOK) { @@ -64,7 +66,10 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef const auto *args_offset_buffer = reinterpret_cast(context.args_offset().data()); uint32_t offset = *args_offset_buffer; if (offset > args_size_) { - GELOGE(INTERNAL_ERROR, "[%s] Arg offset out of range. offset = %u, arg size = %u", GetName().c_str(), offset, + GELOGE(INTERNAL_ERROR, + "[%s] Arg offset out of range. offset = %u, arg size = %u", + GetName().c_str(), + offset, args_size_); return INTERNAL_ERROR; } @@ -72,7 +77,11 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef arg_base_ = reinterpret_cast(args_.get() + offset); max_arg_count_ = (args_size_ - offset) / sizeof(void *); GELOGD("[%s] Done setting kernel args successfully. stub_func = %s, block_dim = %d, arg base = %p, arg size = %u", - op_desc.GetName().c_str(), stub_name_.c_str(), block_dim_, arg_base_, args_size_); + op_desc.GetName().c_str(), + stub_name_.c_str(), + block_dim_, + arg_base_, + args_size_); return SUCCESS; } @@ -111,7 +120,7 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { GELOGD("[%s] Start to update tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str()); OpRunInfo tiling_info; - tiling_info.block_dim = -1; // codex: Using uninitialized value + tiling_info.block_dim = -1; // codex: Using uninitialized value auto execution_context = context.GetExecutionContext(); RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] Start"); @@ -135,8 +144,9 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { } RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CopyTilingInfo] Start"); - GE_CHK_RT_RET(rtMemcpy(tiling_buffer_->GetData(), tiling_buffer_->GetSize(), tiling_data_.c_str(), - tiling_data_.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(tiling_buffer_->GetData(), tiling_buffer_->GetSize(), + tiling_data_.c_str(), tiling_data_.size(), + RT_MEMCPY_HOST_TO_DEVICE)); RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CopyTilingInfo] End"); GELOGD("[%s] Done updating tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str()); @@ -145,7 +155,8 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { GELOGD("[%s] Start to invoke OpParaCalculate.", node->GetName().c_str()); - GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), "Failed calc tiling data of node %s.", + GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), + "Failed calc tiling data of node %s.", node->GetName().c_str()); GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); return SUCCESS; @@ -157,8 +168,11 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { ++expected_arg_count; } if (expected_arg_count > max_arg_count_) { - GELOGE(INTERNAL_ERROR, "[%s] Invalid arg memory, max arg count = %u, but expect = %zu", GetName().c_str(), - max_arg_count_, expected_arg_count); + GELOGE(INTERNAL_ERROR, + "[%s] Invalid arg memory, max arg count = %u, but expect = %zu", + GetName().c_str(), + max_arg_count_, + expected_arg_count); return INTERNAL_ERROR; } @@ -204,7 +218,7 @@ Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { bool dynamic_supported = false; - (void)AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported); + (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported); if (!dynamic_supported) { GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str()); return SUCCESS; @@ -212,7 +226,7 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { GELOGD("Start alloc tiling data of node %s.", op_desc.GetName().c_str()); int64_t max_size = -1; - (void)AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size); + (void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size); GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size); if (max_size <= 0) { GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size); @@ -228,11 +242,17 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { return SUCCESS; } -bool AiCoreOpTask::IsDynamicShapeSupported() { return tiling_buffer_ != nullptr; } +bool AiCoreOpTask::IsDynamicShapeSupported() { + return tiling_buffer_ != nullptr; +} -const std::string &AiCoreOpTask::GetName() const { return stub_name_; } +const std::string &AiCoreOpTask::GetName() const { + return stub_name_; +} -std::string AiCoreOpTask::GetKeyForOpParamSize() const { return kAttrOpParamSize; } +std::string AiCoreOpTask::GetKeyForOpParamSize() const { + return kAttrOpParamSize; +} Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::Init(op_desc, task_def)); @@ -242,11 +262,12 @@ Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &t Status AtomicAddrCleanOpTask::InitAtomicAddrCleanIndices(const OpDesc &op_desc) { GELOGD("[%s] Start to setup AtomicAddrClean task.", op_desc.GetName().c_str()); std::vector atomic_output_indices; - (void)ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); - map> workspace_info; // op_name, ws_index, ws_offset + (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); + map> workspace_info; // op_name, ws_index, ws_offset workspace_info = op_desc.TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, workspace_info); if (atomic_output_indices.empty() && workspace_info.empty()) { - GELOGE(INTERNAL_ERROR, "[%s] Neither ATOMIC_ATTR_OUTPUT_INDEX nor EXT_ATTR_ATOMIC_WORKSPACE_INFO is empty.", + GELOGE(INTERNAL_ERROR, + "[%s] Neither ATOMIC_ATTR_OUTPUT_INDEX nor EXT_ATTR_ATOMIC_WORKSPACE_INFO is empty.", op_desc.GetName().c_str()); return INTERNAL_ERROR; } @@ -274,19 +295,25 @@ Status AtomicAddrCleanOpTask::InitAtomicAddrCleanIndices(const OpDesc &op_desc) } if (arg_count > max_arg_count_) { - GELOGE(INTERNAL_ERROR, "[%s] Invalid arg memory, max arg count = %u, but expect = %zu", GetName().c_str(), - max_arg_count_, arg_count); + GELOGE(INTERNAL_ERROR, + "[%s] Invalid arg memory, max arg count = %u, but expect = %zu", + GetName().c_str(), + max_arg_count_, + arg_count); return INTERNAL_ERROR; } return SUCCESS; } -std::string AtomicAddrCleanOpTask::GetKeyForOpParamSize() const { return kAttrAtomicOpParamSize; } +std::string AtomicAddrCleanOpTask::GetKeyForOpParamSize() const { + return kAttrAtomicOpParamSize; +} Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); - GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), "Failed calc tiling data of node %s.", + GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), + "Failed calc tiling data of node %s.", node->GetName().c_str()); GELOGD("[%s] Done invoking OpAtomicCalculate successfully.", node->GetName().c_str()); return SUCCESS; diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h old mode 100644 new mode 100755 index 41ab0d79..eaa821e3 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -44,7 +44,7 @@ class AiCoreOpTask { Status LaunchKernel(rtStream_t stream); - const std::string &GetName() const; + const std::string& GetName() const; protected: Status UpdateTilingInfo(TaskContext &context); @@ -84,4 +84,4 @@ class AtomicAddrCleanOpTask : public AiCoreOpTask { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_KERNEL_AICORE_OP_TASK_H_ +#endif //GE_HYBRID_KERNEL_AICORE_OP_TASK_H_ diff --git a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc old mode 100644 new mode 100755 index bad91806..b2996435 --- a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc +++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc @@ -34,12 +34,15 @@ const char *AiCoreKernelRegistry::GetUnique(const string &stub_key) { } AiCoreTaskBuilder::AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector &task_defs) - : op_desc_(op_desc), task_defs_(task_defs) {} + : op_desc_(op_desc), task_defs_(task_defs) { +} Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, bool ignore_failure_on_atomic) { GE_CHECK_NOTNULL(op_desc_); if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) { - GELOGE(INTERNAL_ERROR, "[%s] At most 2 task was supported, but got %zu", op_desc_->GetName().c_str(), + GELOGE(INTERNAL_ERROR, + "[%s] At most 2 task was supported, but got %zu", + op_desc_->GetName().c_str(), task_defs_.size()); return INTERNAL_ERROR; } @@ -48,32 +51,38 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, bool i if (ExpectAtomicAddrCleanTask()) { if (task_defs_.size() != kNumTaskWithAtomicAddrCleanTask) { if (ignore_failure_on_atomic) { - GELOGI("[%s] AtomicAddrClean task was expected, but got %zu task_defs", op_desc_->GetName().c_str(), + GELOGI("[%s] AtomicAddrClean task was expected, but got %zu task_defs", + op_desc_->GetName().c_str(), task_defs_.size()); return SUCCESS; } else { - GELOGE(INTERNAL_ERROR, "[%s] AtomicAddrClean task was expected, but got %zu task_defs", - op_desc_->GetName().c_str(), task_defs_.size()); + GELOGE(INTERNAL_ERROR, + "[%s] AtomicAddrClean task was expected, but got %zu task_defs", + op_desc_->GetName().c_str(), + task_defs_.size()); return INTERNAL_ERROR; } } GELOGD("[%s] Build AtomicAddrClean task.", op_desc_->GetName().c_str()); - auto atomic_task = std::unique_ptr(new (std::nothrow) AtomicAddrCleanOpTask()); + auto atomic_task = + std::unique_ptr(new(std::nothrow)AtomicAddrCleanOpTask()); GE_CHECK_NOTNULL(atomic_task); - GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), "[%s] Failed to init task for AtomicAddrClean", + GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), + "[%s] Failed to init task for AtomicAddrClean", op_desc_->GetName().c_str()); op_tasks.emplace_back(std::move(atomic_task)); } // build aicore task - auto aicore_task = std::unique_ptr(new (std::nothrow) AiCoreOpTask()); + auto aicore_task = std::unique_ptr(new(std::nothrow)AiCoreOpTask()); GE_CHECK_NOTNULL(aicore_task); - GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), "[%s] Failed to init task for AtomicAddrClean", + GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), + "[%s] Failed to init task for AtomicAddrClean", op_desc_->GetName().c_str()); op_tasks.emplace_back(std::move(aicore_task)); - node_task.reset(new (std::nothrow) AiCoreNodeTask(std::move(op_tasks))); + node_task.reset(new(std::nothrow)AiCoreNodeTask(std::move(op_tasks))); GE_CHECK_NOTNULL(node_task); return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicore/aicore_task_builder.h b/ge/hybrid/node_executor/aicore/aicore_task_builder.h old mode 100644 new mode 100755 index 4610e57a..92db809d --- a/ge/hybrid/node_executor/aicore/aicore_task_builder.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.h @@ -57,4 +57,4 @@ class AiCoreTaskBuilder { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_KERNEL_AICORE_TASK_BUILDER_H_ +#endif //GE_HYBRID_KERNEL_AICORE_TASK_BUILDER_H_ diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc b/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc old mode 100644 new mode 100755 index 588f179d..52f24809 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc @@ -24,7 +24,7 @@ namespace { uintptr_t kWeightBase = 0x10000000; uintptr_t kMemBase = 0x20000000; uint64_t kFakeSize = 0x10000000UL; -} // namespace +} std::mutex AiCoreTaskCompiler::mu_; AiCoreTaskCompiler::AiCoreTaskCompiler(OpsKernelInfoStorePtr aic_kernel_store) @@ -34,9 +34,11 @@ Status AiCoreTaskCompiler::DoCompileOp(OpsKernelInfoStore &ops_store, const Node GE_CHECK_NOTNULL(node); vector node_vec; node_vec.emplace_back(node); - GE_CHK_STATUS_RET(ops_store.CompileOpRun(node_vec), "Failed to execute CompileOp, node = %s", + GE_CHK_STATUS_RET(ops_store.CompileOpRun(node_vec), + "Failed to execute CompileOp, node = %s", node->GetName().c_str()); - GE_CHK_STATUS_RET(ops_store.CalcOpRunningParam(*node), "Failed to execute CalcOpRunningParam, node = %s", + GE_CHK_STATUS_RET(ops_store.CalcOpRunningParam(*node), + "Failed to execute CalcOpRunningParam, node = %s", node->GetName().c_str()); return SUCCESS; } @@ -62,7 +64,8 @@ Status AiCoreTaskCompiler::CompileOp(const NodePtr &node, std::vector &tasks) { rtModel_t rt_model_ = nullptr; GE_CHK_RT_RET(rtModelCreate(&rt_model_, 0)); diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h old mode 100644 new mode 100755 index 39673188..36c09d86 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h @@ -28,7 +28,6 @@ class AiCoreTaskCompiler { ~AiCoreTaskCompiler() = default; Status CompileOp(const NodePtr &node, std::vector &tasks) const; - private: static Status DoCompileOp(OpsKernelInfoStore &store, const NodePtr &node); static Status DoGenerateTask(OpsKernelInfoStore &store, const Node &node, std::vector &tasks); @@ -37,4 +36,4 @@ class AiCoreTaskCompiler { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_KERNEL_AICORE_TASK_COMPILER_H_ +#endif //GE_HYBRID_KERNEL_AICORE_TASK_COMPILER_H_ diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc index 1d6c464f..3974e29b 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc @@ -24,7 +24,7 @@ namespace hybrid { namespace { // if dim count is not reach kMaxShapeDims(8), use INT64_MIN to mark dim end. constexpr int64_t kDimEndFlag = INT64_MIN; -} // namespace +} Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { GELOGI("Node[%s] parse ext info start.", node_name_.c_str()); @@ -34,10 +34,10 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { } ext_info_len_ = ext_info.size(); - ext_info_.reset(new (std::nothrow) uint8_t[ext_info_len_]); + ext_info_.reset(new(std::nothrow)uint8_t[ext_info_len_]); GE_CHECK_NOTNULL(ext_info_); - (void)memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()); + (void) memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()); input_shape_and_type_.clear(); output_shape_and_type_.clear(); @@ -58,8 +58,8 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { GE_CHK_STATUS_RET(ParseExtOutputShape(aicpu_ext_info), "Parse ext output shape failed."); break; default: - GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoType, - aicpu_ext_info->infoLen); + GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", + node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); break; } offset += sizeof(AicpuExtInfo); @@ -75,14 +75,14 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), PARAM_INVALID, - "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", node_name_.c_str(), - sizeof(int32_t), aicpu_ext_info->infoLen); + "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", + node_name_.c_str(), sizeof(int32_t), aicpu_ext_info->infoLen); auto type = reinterpret_cast(aicpu_ext_info->infoMsg); GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, PARAM_INVALID, - "Node[%s] parse ext shape type failed as need %d but %d.", node_name_.c_str(), unknown_type_, - *type); + "Node[%s] parse ext shape type failed as need %d but %d.", + node_name_.c_str(), unknown_type_, *type); GELOGI("Node[%s] parse ext shape type success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); return SUCCESS; } @@ -105,8 +105,8 @@ Status AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) { Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { if (unknown_type_ == DEPEND_COMPUTE) { - GELOGD("Node[%s] is depend compute type no need ext output shape, ignore it, infoLen=%u.", node_name_.c_str(), - aicpu_ext_info->infoLen); + GELOGD("Node[%s] is depend compute type no need ext output shape, ignore it, infoLen=%u.", + node_name_.c_str(), aicpu_ext_info->infoLen); return SUCCESS; } auto need_len = output_num_ * sizeof(AicpuShapeAndType); @@ -128,7 +128,8 @@ Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const const auto &shape = input_desc.GetShape(); GE_CHK_STATUS_RET(UpdateShapeAndType(shape, input_desc.GetDataType(), input_shape_and_type_[input_index]), - "Node[%s] input[%u] update input shape and type failed.", node_name_.c_str(), input_index); + "Node[%s] input[%u] update input shape and type failed.", + node_name_.c_str(), input_index); return SUCCESS; } @@ -144,12 +145,12 @@ Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, cons std::vector> range; auto range_ret = output_desc.GetShapeRange(range); GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, INTERNAL_ERROR, - "Node[%s] is shape range type but get GetShapeRange failed, ret=%u.", node_name_.c_str(), - range_ret); + "Node[%s] is shape range type but get GetShapeRange failed, ret=%u.", + node_name_.c_str(), range_ret); for (size_t k = 0; k < range.size(); ++k) { if (shape.GetDim(k) < 0 && k < range.size()) { - GELOGD("Node[%s] output[%u] update dim[%zu] from %ld to range max %ld.", node_name_.c_str(), output_index, k, - shape.GetDim(k), range[k].second); + GELOGD("Node[%s] output[%u] update dim[%zu] from %ld to range max %ld.", + node_name_.c_str(), output_index, k, shape.GetDim(k), range[k].second); shape.SetDim(k, range[k].second); } } @@ -170,8 +171,8 @@ Status AicpuExtInfoHandler::UpdateShapeAndType(const GeShape &shape, DataType da AicpuShapeAndType *shape_and_type) { auto dim_num = shape.GetDimNum(); if (dim_num > aicpu::FWKAdapter::kMaxShapeDims) { - GELOGE(PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.", dim_num, - aicpu::FWKAdapter::kMaxShapeDims); + GELOGE(PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.", + dim_num, aicpu::FWKAdapter::kMaxShapeDims); return PARAM_INVALID; } size_t index = 0; @@ -186,7 +187,8 @@ Status AicpuExtInfoHandler::UpdateShapeAndType(const GeShape &shape, DataType da return SUCCESS; } -void AicpuExtInfoHandler::GetShapeAndType(const AicpuShapeAndType *shape_and_type, GeShape &shape, +void AicpuExtInfoHandler::GetShapeAndType(const AicpuShapeAndType *shape_and_type, + GeShape &shape, DataType &data_type) { std::vector dims; for (uint32_t index = 0; index < aicpu::FWKAdapter::kMaxShapeDims; ++index) { diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h index a42678b1..9c867cdc 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h @@ -30,12 +30,20 @@ using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo; class AicpuExtInfoHandler { public: AicpuExtInfoHandler(std::string node_name, uint32_t input_num, uint32_t output_num, UnknowShapeOpType unknown_type) - : node_name_(std::move(node_name)), input_num_(input_num), output_num_(output_num), unknown_type_(unknown_type) {} + : node_name_(std::move(node_name)), + input_num_(input_num), + output_num_(output_num), + unknown_type_(unknown_type) { + } ~AicpuExtInfoHandler() = default; - uint8_t *GetExtInfo() const { return ext_info_.get(); } - size_t GetExtInfoLen() const { return ext_info_len_; } + uint8_t *GetExtInfo() const { + return ext_info_.get(); + } + size_t GetExtInfoLen() const { + return ext_info_len_; + } Status Parse(const std::string &ext_info); @@ -46,13 +54,18 @@ class AicpuExtInfoHandler { Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); private: + Status ParseExtShapeType(AicpuExtInfo *aicpu_ext_info); Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); - static Status UpdateShapeAndType(const GeShape &shape, DataType data_type, AicpuShapeAndType *shape_and_type); + static Status UpdateShapeAndType(const GeShape &shape, + DataType data_type, + AicpuShapeAndType *shape_and_type); - static void GetShapeAndType(const AicpuShapeAndType *shape_and_type, GeShape &shape, DataType &data_type); + static void GetShapeAndType(const AicpuShapeAndType *shape_and_type, + GeShape &shape, + DataType &data_type); private: const std::string node_name_; @@ -67,4 +80,4 @@ class AicpuExtInfoHandler { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_AICPU_EXT_INFO_H_ \ No newline at end of file +#endif // GE_HYBRID_AICPU_EXT_INFO_H_ \ No newline at end of file diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc old mode 100644 new mode 100755 index 44fe377a..4e07f95b --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -27,7 +27,7 @@ namespace hybrid { namespace { // mem need release constexpr uint64_t kReleaseFlag = 1; -} // namespace +} REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_TF, AiCpuNodeExecutor); REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_CUSTOM, AiCpuNodeExecutor); @@ -43,24 +43,25 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info) { if (node_item_->is_dynamic) { // dynamic node must have ext info GE_CHK_STATUS_RET(aicpu_ext_handle_.Parse(kernel_ext_info), - "Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.", node_name_.c_str(), - kernel_ext_info.size()); + "Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.", + node_name_.c_str(), kernel_ext_info.size()); } // if no ext info no need copy to device. if (kernel_ext_info.empty()) { - GELOGI("Node[%s] kernel_ext_info is empty, no need copy to device, is_dynamic=%s.", node_name_.c_str(), - node_item_->is_dynamic ? "true" : "false"); + GELOGI("Node[%s] kernel_ext_info is empty, no need copy to device, is_dynamic=%s.", + node_name_.c_str(), node_item_->is_dynamic ? "true" : "false"); return SUCCESS; } // copy task args buf GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_ext_info.size(), ext_info_addr_dev_), - "Node[%s] alloc kernel_ext_info buf failed, size=%zu", node_name_.c_str(), kernel_ext_info.size()); + "Node[%s] alloc kernel_ext_info buf failed, size=%zu", + node_name_.c_str(), kernel_ext_info.size()); // copy default ext info to device - GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), kernel_ext_info.data(), - kernel_ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), + kernel_ext_info.data(), kernel_ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } @@ -71,8 +72,11 @@ Status AicpuNodeTaskBase::UpdateOutputShapeFromExtInfo() { return SUCCESS; } // copy to host buf - GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_.GetExtInfo(), aicpu_ext_handle_.GetExtInfoLen(), - ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_.GetExtInfo(), + aicpu_ext_handle_.GetExtInfoLen(), + ext_info_addr_dev_->GetData(), + ext_info_addr_dev_->GetSize(), + RT_MEMCPY_DEVICE_TO_HOST)); for (auto i = 0; i < node_item_->num_outputs; ++i) { GeShape shape; @@ -81,18 +85,19 @@ Status AicpuNodeTaskBase::UpdateOutputShapeFromExtInfo() { aicpu_ext_handle_.GetOutputShapeAndType(i, shape, data_type); auto output_desc = node_item_->op_desc->MutableOutputDesc(i); GE_CHECK_NOTNULL(output_desc); - GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, i, output_desc), "Update node %s [%d]th output shape failed.", + GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, i, output_desc), + "Update node %s [%d]th output shape failed.", node_name_.c_str(), i); } return SUCCESS; } -Status AicpuNodeTaskBase::UpdateShapeToOutputDesc(const GeShape &shape_new, int32_t output_index, - GeTensorDescPtr &output_desc) { +Status AicpuNodeTaskBase::UpdateShapeToOutputDesc(const GeShape &shape_new, + int32_t output_index, GeTensorDescPtr &output_desc) { auto shape_old = output_desc->GetShape(); output_desc->SetShape(shape_new); - GELOGI("Update node[%s] out[%d] shape from %s to %s.", node_name_.c_str(), output_index, shape_old.ToString().c_str(), - shape_new.ToString().c_str()); + GELOGI("Update node[%s] out[%d] shape from %s to %s.", node_name_.c_str(), output_index, + shape_old.ToString().c_str(), shape_new.ToString().c_str()); auto origin_shape_old = output_desc->GetOriginShape(); auto origin_format = output_desc->GetOriginFormat(); @@ -103,15 +108,16 @@ Status AicpuNodeTaskBase::UpdateShapeToOutputDesc(const GeShape &shape_new, int3 } // if format is not same need convert shape std::vector origin_dims_new; - auto trans_ret = - formats::TransShape(format, shape_new.GetDims(), output_desc->GetDataType(), origin_format, origin_dims_new); + auto trans_ret = formats::TransShape(format, shape_new.GetDims(), + output_desc->GetDataType(), origin_format, origin_dims_new); GE_CHK_STATUS_RET(trans_ret, "Node[%s] out[%d] originFormat[%d] is not same as format[%d], but TransShape failed, shape=%s.", node_name_.c_str(), output_index, origin_format, format, shape_new.ToString().c_str()); auto origin_shape_new = GeShape(origin_dims_new); output_desc->SetOriginShape(origin_shape_new); - GELOGI("Node[%s] out[%d] originFormat[%d] is not same as format[%d], need update from %s ro %s.", node_name_.c_str(), - output_index, origin_format, format, origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); + GELOGI("Node[%s] out[%d] originFormat[%d] is not same as format[%d], need update from %s ro %s.", + node_name_.c_str(), output_index, origin_format, format, + origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); return SUCCESS; } @@ -126,7 +132,8 @@ Status AicpuNodeTaskBase::UpdateExtInfo() { auto input_desc = node_item_->op_desc->MutableInputDesc(i); GE_CHECK_NOTNULL(input_desc); GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateInputShapeAndType(i, *input_desc), - "Node[%s] input[%d] update input shape failed.", node_name_.c_str(), i); + "Node[%s] input[%d] update input shape failed.", + node_name_.c_str(), i); } if (unknown_type_ != DEPEND_COMPUTE) { @@ -135,21 +142,25 @@ Status AicpuNodeTaskBase::UpdateExtInfo() { GE_CHECK_NOTNULL(output_desc); GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateOutputShapeAndType(j, *output_desc), - "Node[%s] output[%d] UpdateOutputShapeAndType failed.", node_name_.c_str(), j); + "Node[%s] output[%d] UpdateOutputShapeAndType failed.", + node_name_.c_str(), j); } } // copy input and output shapes to device - GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), aicpu_ext_handle_.GetExtInfo(), - aicpu_ext_handle_.GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), + ext_info_addr_dev_->GetSize(), + aicpu_ext_handle_.GetExtInfo(), + aicpu_ext_handle_.GetExtInfoLen(), + RT_MEMCPY_HOST_TO_DEVICE)); GELOGI("Node[%s] update ext info end.", node_name_.c_str()); return SUCCESS; } Status AicpuNodeTaskBase::UpdateArgs(TaskContext &context) { - GELOGI("Node[%s] update args begin. is_dynamic=%s, unknown_type=%d", node_name_.c_str(), - node_item_->is_dynamic ? "true" : "false", unknown_type_); + GELOGI("Node[%s] update args begin. is_dynamic=%s, unknown_type=%d", + node_name_.c_str(), node_item_->is_dynamic ? "true" : "false", unknown_type_); if (node_item_->num_inputs == 0 && node_item_->num_outputs == 0) { GELOGI("Node[%s] has no input and output, no need update args.", node_name_.c_str()); return SUCCESS; @@ -194,8 +205,8 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionnum_outputs == 0)) { - GELOGI("Node[%s] type[%s] unknown_type is %d, output num is %d.", node_name_.c_str(), node_item_->node_type.c_str(), - unknown_type_, node_item_->num_outputs); + GELOGI("Node[%s] type[%s] unknown_type is %d, output num is %d.", + node_name_.c_str(), node_item_->node_type.c_str(), unknown_type_, node_item_->num_outputs); return SUCCESS; } @@ -203,8 +214,8 @@ Status AicpuTfNodeTask::InitForDependComputeTask() { constexpr auto result_summary_size = sizeof(aicpu::FWKAdapter::ResultSummary); for (auto i = 0; i < node_item_->num_outputs; ++i) { GE_CHK_STATUS_RET(AllocTensorBuffer(result_summary_size, output_summary_[i]), - "Node[%s] alloc buffer for result summary info failed, size=%zu.", node_name_.c_str(), - result_summary_size); + "Node[%s] alloc buffer for result summary info failed, size=%zu.", + node_name_.c_str(), result_summary_size); } output_summary_host_.resize(node_item_->num_outputs); @@ -212,20 +223,22 @@ Status AicpuTfNodeTask::InitForDependComputeTask() { // copy task need copy output_data and output_shape, max len is 2 * output_num const size_t copy_input_buf_len = node_item_->num_outputs * 2 * sizeof(uint64_t); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_release_flag_dev_), - "Node[%s] alloc copy task input release_flag failed, size=%zu", node_name_.c_str(), - copy_input_buf_len); + "Node[%s] alloc copy task input release_flag failed, size=%zu", + node_name_.c_str(), copy_input_buf_len); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_data_size_dev_), - "Node[%s] alloc copy task input data_size failed, size=%zu", node_name_.c_str(), - copy_input_buf_len); + "Node[%s] alloc copy task input data_size failed, size=%zu", + node_name_.c_str(), copy_input_buf_len); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_src_dev_), - "Node[%s] alloc copy task input src failed, size=%zu", node_name_.c_str(), copy_input_buf_len); + "Node[%s] alloc copy task input src failed, size=%zu", + node_name_.c_str(), copy_input_buf_len); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_dst_dev_), - "Node[%s] alloc copy task input dst failed, size=%zu", node_name_.c_str(), copy_input_buf_len); + "Node[%s] alloc copy task input dst failed, size=%zu", + node_name_.c_str(), copy_input_buf_len); // copy task args buf GE_CHK_STATUS_RET(AllocTensorBuffer(sizeof(STR_FWK_OP_KERNEL), copy_task_args_buf_), - "Node[%s] alloc copy task args buf failed, size=%zu", node_name_.c_str(), - sizeof(STR_FWK_OP_KERNEL)); + "Node[%s] alloc copy task args buf failed, size=%zu", + node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL)); std::vector copy_io_addr; copy_io_addr.emplace_back(reinterpret_cast(copy_input_release_flag_dev_->GetData())); @@ -238,38 +251,42 @@ Status AicpuTfNodeTask::InitForDependComputeTask() { // can alloc in init, it can reuse GE_CHK_STATUS_RET(AllocTensorBuffer(copy_io_addr_size, copy_ioaddr_dev_), - "Node[%s] alloc copy task io buf failed, size=%zu", node_name_.c_str(), copy_io_addr_size); + "Node[%s] alloc copy task io buf failed, size=%zu", + node_name_.c_str(), copy_io_addr_size); - GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_->GetData(), copy_io_addr_size, ©_io_addr[0], copy_io_addr_size, - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_->GetData(), copy_io_addr_size, + ©_io_addr[0], copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } Status AicpuTfNodeTask::Init(const HybridModel &model) { GELOGI("Node[%s] init start.", node_name_.c_str()); - GE_CHK_BOOL_RET_STATUS(task_def_.has_kernel_ex(), FAILED, "Node[%s] is tf node but task def does not has kernel ex.", + GE_CHK_BOOL_RET_STATUS(task_def_.has_kernel_ex(), FAILED, + "Node[%s] is tf node but task def does not has kernel ex.", node_name_.c_str()); auto &kernel_ex_def = task_def_.kernel_ex(); auto kernel_workspace_size = kernel_ex_def.task_info().size(); GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_workspace_size, kernel_workspace_), - "Node[%s] alloc buffer for kernel workspace failed, size=%zu.", node_name_.c_str(), - kernel_workspace_size); + "Node[%s] alloc buffer for kernel workspace failed, size=%zu.", + node_name_.c_str(), kernel_workspace_size); - GE_CHK_RT_RET(rtMemcpy(kernel_workspace_->GetData(), kernel_workspace_size, kernel_ex_def.task_info().data(), - kernel_workspace_size, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(kernel_workspace_->GetData(), kernel_workspace_size, + kernel_ex_def.task_info().data(), kernel_workspace_size, + RT_MEMCPY_HOST_TO_DEVICE)); auto input_output_size = (node_item_->num_inputs + node_item_->num_outputs) * sizeof(uint64_t); // alloc input output addr buf, allow alloc size 0 GE_CHK_STATUS_RET(AllocTensorBuffer(input_output_size, input_output_addr_), - "Node[%s] alloc buffer for io addr failed, size=%zu.", node_name_.c_str(), input_output_size); + "Node[%s] alloc buffer for io addr failed, size=%zu.", + node_name_.c_str(), input_output_size); auto &kernel_ext_info = kernel_ex_def.kernel_ext_info(); auto kernel_ext_info_size = kernel_ex_def.kernel_ext_info_size(); GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, - "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", node_name_.c_str(), - kernel_ext_info.size(), kernel_ext_info_size); + "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", + node_name_.c_str(), kernel_ext_info.size(), kernel_ext_info_size); // init ext info GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name_.c_str()); @@ -277,14 +294,14 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { // build fwk_op_kernel. GE_CHK_BOOL_RET_STATUS(sizeof(STR_FWK_OP_KERNEL) >= kernel_ex_def.args_size(), FAILED, - "Node[%s] sizeof STR_FWK_OP_KERNEL is: %zu, but args_size is: %u", node_name_.c_str(), - sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args_size()); + "Node[%s] sizeof STR_FWK_OP_KERNEL is: %zu, but args_size is: %u", + node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args_size()); STR_FWK_OP_KERNEL fwk_op_kernel = {0}; - errno_t sec_ret = - memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args().data(), kernel_ex_def.args_size()); - GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR, "Node[%s] memcpy fwk_op_kernel failed, ret: %d.", - node_name_.c_str(), sec_ret); + errno_t sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), + kernel_ex_def.args().data(), kernel_ex_def.args_size()); + GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR, + "Node[%s] memcpy fwk_op_kernel failed, ret: %d.", node_name_.c_str(), sec_ret); fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(kernel_workspace_->GetData()); fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(input_output_addr_->GetData()); @@ -298,15 +315,16 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = GetStepIdAddr(model); auto session_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID; - GE_CHK_STATUS_RET(EnsureSessionCreated(session_id), "Node[%s] create session id %lu failed.", node_name_.c_str(), - session_id); + GE_CHK_STATUS_RET(EnsureSessionCreated(session_id), "Node[%s] create session id %lu failed.", + node_name_.c_str(), session_id); // alloc kernel_buf_ and copy to device. GE_CHK_STATUS_RET(AllocTensorBuffer(sizeof(STR_FWK_OP_KERNEL), kernel_buf_), - "Node[%s] alloc buffer for kernel buf failed, size=%zu.", node_name_.c_str(), - sizeof(STR_FWK_OP_KERNEL)); + "Node[%s] alloc buffer for kernel buf failed, size=%zu.", + node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL)); - GE_CHK_RT_RET(rtMemcpy(kernel_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), + GE_CHK_RT_RET(rtMemcpy(kernel_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), + &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); GELOGI("Node[%s] init end.", node_name_.c_str()); @@ -326,7 +344,8 @@ uint64_t AicpuTfNodeTask::GetStepIdAddr(const HybridModel &model) { Status AicpuTfNodeTask::EnsureSessionCreated(uint64_t session_id) { auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - GE_CHK_STATUS_RET(model_manager->CreateAicpuSession(session_id), "Create aicpu session %lu failed", session_id); + GE_CHK_STATUS_RET(model_manager->CreateAicpuSession(session_id), + "Create aicpu session %lu failed", session_id); return SUCCESS; } @@ -334,22 +353,23 @@ Status AicpuTfNodeTask::ReadResultSummaryAndPrepareMemory(TaskContext &context, std::vector> &out_shape_hbm) { for (auto i = 0; i < node_item_->num_outputs; ++i) { auto &result_summary = output_summary_host_[i]; - GE_CHK_RT_RET(rtMemcpy(&result_summary, sizeof(aicpu::FWKAdapter::ResultSummary), output_summary_[i]->GetData(), - output_summary_[i]->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(&result_summary, sizeof(aicpu::FWKAdapter::ResultSummary), + output_summary_[i]->GetData(), output_summary_[i]->GetSize(), + RT_MEMCPY_DEVICE_TO_HOST)); auto raw_data_size = result_summary.raw_data_size; std::unique_ptr tensor_buffer; GE_CHK_STATUS_RET(AllocTensorBuffer(raw_data_size, tensor_buffer), - "Node[%s] out[%d] alloc tensor buffer failed, raw_data_size=%lu", node_name_.c_str(), i, - raw_data_size); + "Node[%s] out[%d] alloc tensor buffer failed, raw_data_size=%lu", + node_name_.c_str(), i, raw_data_size); auto status = context.SetOutput(i, TensorValue(std::shared_ptr(tensor_buffer.release()))); GE_CHK_STATUS_RET(status, "Node[%s] set output %d failed.", node_name_.c_str(), i); auto shape_data_size = result_summary.shape_data_size; std::unique_ptr shape_buffer; GE_CHK_STATUS_RET(AllocTensorBuffer(shape_data_size, shape_buffer), - "Node[%s] out[%d] alloc shape buffer failed, shape_data_size=%lu", node_name_.c_str(), i, - shape_data_size); + "Node[%s] out[%d] alloc shape buffer failed, shape_data_size=%lu", + node_name_.c_str(), i, shape_data_size); out_shape_hbm.emplace_back(std::move(shape_buffer)); } return SUCCESS; @@ -357,37 +377,41 @@ Status AicpuTfNodeTask::ReadResultSummaryAndPrepareMemory(TaskContext &context, Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, const std::vector> &out_shape_hbm) { - GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), INTERNAL_ERROR, - "Node[%s] has %d outputs but out shape is %zu.", node_name_.c_str(), node_item_->num_outputs, - out_shape_hbm.size()); + GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), + INTERNAL_ERROR, + "Node[%s] has %d outputs but out shape is %zu.", + node_name_.c_str(), node_item_->num_outputs, out_shape_hbm.size()); uint64_t copy_num = 0; GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm, copy_num)); STR_FWK_OP_KERNEL aicpu_task = {0}; std::string task_info; - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[GenMemCopyTask] Start"); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), + "[GenMemCopyTask] Start"); GE_CHK_STATUS_RET_NOLOG(GenMemCopyTask(copy_num, aicpu_task, task_info)); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[GenMemCopyTask] End"); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), + "[GenMemCopyTask] End"); std::unique_ptr kernel_workspace_buf; GE_CHK_STATUS_RET(AllocTensorBuffer(task_info.size(), kernel_workspace_buf), - "Node[%s] alloc copy task workspace buf failed, size=%zu.", node_name_.c_str(), task_info.size()); + "Node[%s] alloc copy task workspace buf failed, size=%zu.", + node_name_.c_str(), task_info.size()); - GE_CHK_RT_RET(rtMemcpy(kernel_workspace_buf->GetData(), task_info.size(), task_info.data(), task_info.size(), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(kernel_workspace_buf->GetData(), task_info.size(), + task_info.data(), task_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_->GetData()); aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(kernel_workspace_buf->GetData()); aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; - GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), &aicpu_task, - sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), + &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); - GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, - context.GetStream())); + GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), + RT_KERNEL_DEFAULT, context.GetStream())); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); GE_CHK_RT_RET(rtStreamSynchronize(context.GetStream())); @@ -406,8 +430,9 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, for (auto i = 0; i < node_item_->num_outputs; ++i) { const auto &summary = output_summary_host_[i]; GELOGI("Node[%s] out[%d] summary, shape data=0x%lx, shape data size=%lu, raw data=0x%lx, raw data size=%lu.", - node_name_.c_str(), i, summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, - summary.raw_data_size); + node_name_.c_str(), i, + summary.shape_data_ptr, summary.shape_data_size, + summary.raw_data_ptr, summary.raw_data_size); if (summary.raw_data_size > 0) { auto output = context.GetOutput(i); GE_CHECK_NOTNULL(output); @@ -431,7 +456,8 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, copy_num = copy_input_release_flag.size(); - GE_CHK_BOOL_RET_STATUS(copy_num > 0, INTERNAL_ERROR, "Node[%s] need copy num is 0", node_name_.c_str()); + GE_CHK_BOOL_RET_STATUS(copy_num > 0, INTERNAL_ERROR, + "Node[%s] need copy num is 0", node_name_.c_str()); // copy task need copy output and output shape const size_t copy_input_buf_len = copy_num * sizeof(uint64_t); @@ -440,19 +466,19 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, ©_input_release_flag[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT_RET(rtMemcpy(copy_input_data_size_dev_->GetData(), copy_input_data_size_dev_->GetSize(), ©_input_data_size[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT_RET(rtMemcpy(copy_input_src_dev_->GetData(), copy_input_src_dev_->GetSize(), ©_input_src[0], - copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT_RET(rtMemcpy(copy_input_dst_dev_->GetData(), copy_input_dst_dev_->GetSize(), ©_input_dst[0], - copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_input_src_dev_->GetData(), copy_input_src_dev_->GetSize(), + ©_input_src[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_input_dst_dev_->GetData(), copy_input_dst_dev_->GetSize(), + ©_input_dst[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } Status AicpuTfNodeTask::GenMemCopyTask(uint64_t copy_num, STR_FWK_OP_KERNEL &task, std::string &task_info) { auto instance_ptr = ge::GELib::GetInstance(); - GE_CHK_BOOL_RET_STATUS(instance_ptr != nullptr && instance_ptr->InitFlag(), GE_CLI_GE_NOT_INITIALIZED, - "GE is not initialized"); + GE_CHK_BOOL_RET_STATUS(instance_ptr != nullptr && instance_ptr->InitFlag(), + GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized"); - static constexpr const char *const kKernelLibName = "aicpu_kernel"; + static constexpr const char *const kKernelLibName = "aicpu_tf_kernel"; OpsKernelInfoStorePtr kernel_info = instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(kKernelLibName); GE_CHK_BOOL_RET_STATUS(kernel_info != nullptr, FAILED, "Get op kernel info store[%s] failed", kKernelLibName); auto ret = kernel_info->GenMemCopyTask(copy_num, task, task_info); @@ -462,9 +488,10 @@ Status AicpuTfNodeTask::GenMemCopyTask(uint64_t copy_num, STR_FWK_OP_KERNEL &tas Status AicpuTfNodeTask::UpdateShapeByHbmBuffer(TaskContext &context, const std::vector> &out_shape_hbm) { - GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), INTERNAL_ERROR, - "Node[%s] has %d outputs but out shape is %zu", node_name_.c_str(), node_item_->num_outputs, - out_shape_hbm.size()); + GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), + INTERNAL_ERROR, + "Node[%s] has %d outputs but out shape is %zu", + node_name_.c_str(), node_item_->num_outputs, out_shape_hbm.size()); for (auto i = 0; i < node_item_->num_outputs; ++i) { const auto &result_summary = output_summary_host_[i]; auto output_desc = node_item_->op_desc->MutableOutputDesc(i); @@ -476,17 +503,18 @@ Status AicpuTfNodeTask::UpdateShapeByHbmBuffer(TaskContext &context, node_name_.c_str(), i, result_summary.shape_data_size); uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); GELOGI("Node[%s] [%d]th output dim num=%u.", node_name_.c_str(), i, dim_num); - std::unique_ptr shape_addr(new (std::nothrow) int64_t[dim_num]()); + std::unique_ptr shape_addr(new(std::nothrow) int64_t[dim_num]()); GE_CHECK_NOTNULL(shape_addr); - GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm->GetData(), - shape_hbm->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, + shape_hbm->GetData(), shape_hbm->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { shape_dims.emplace_back(shape_addr[dim_idx]); GELOGD("Node[%s] [%d]th output dim[%u]=%ld.", node_name_.c_str(), i, dim_idx, shape_addr[dim_idx]); } } GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), i, output_desc), - "Node[%s] update [%d]th output shape failed.", node_name_.c_str(), i); + "Node[%s] update [%d]th output shape failed.", + node_name_.c_str(), i); } return SUCCESS; } @@ -496,15 +524,20 @@ Status AicpuTfNodeTask::UpdateShapeAndDataByResultSummary(TaskContext &context) std::vector> out_shape_hbm; GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(context, out_shape_hbm), - "Node[%s] read ResultSummary and update output shape failed.", node_name_.c_str()); + "Node[%s] read ResultSummary and update output shape failed.", + node_name_.c_str()); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[ReadResultSummaryAndPrepareMemory] End"); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), + "[ReadResultSummaryAndPrepareMemory] End"); - GE_CHK_STATUS_RET(CopyDataToHbm(context, out_shape_hbm), "Node[%s] copy data to output failed.", node_name_.c_str()); + GE_CHK_STATUS_RET(CopyDataToHbm(context, out_shape_hbm), + "Node[%s] copy data to output failed.", + node_name_.c_str()); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[CopyDataToHbm] End"); - GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(context, out_shape_hbm), "Node[%s] update shape by hbm buffer failed.", + GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(context, out_shape_hbm), + "Node[%s] update shape by hbm buffer failed.", node_name_.c_str()); GELOGI("Node[%s] update shape and data by result summary end.", node_name_.c_str()); @@ -517,8 +550,8 @@ Status AicpuTfNodeTask::UpdateIoAddr(TaskContext &context) { for (auto i = 0; i < node_item_->num_inputs; ++i) { auto inputData = context.GetInput(i); GE_CHECK_NOTNULL(inputData); - GELOGD("Node[%s] input[%d] addr = %p, size = %zu", node_name_.c_str(), i, inputData->GetData(), - inputData->GetSize()); + GELOGD("Node[%s] input[%d] addr = %p, size = %zu", node_name_.c_str(), i, + inputData->GetData(), inputData->GetSize()); io_addrs.emplace_back(reinterpret_cast(inputData->GetData())); } @@ -530,16 +563,17 @@ Status AicpuTfNodeTask::UpdateIoAddr(TaskContext &context) { auto outputData = context.GetOutput(j); GE_CHECK_NOTNULL(outputData); - GELOGD("Node[%s] output[%d] addr = %p, size = %zu", node_name_.c_str(), j, outputData->GetData(), - outputData->GetSize()); + GELOGD("Node[%s] output[%d] addr = %p, size = %zu", + node_name_.c_str(), j, outputData->GetData(), outputData->GetSize()); io_addrs.emplace_back(reinterpret_cast(outputData->GetData())); } } else { // unknown type 4 use result summary update ioaddr. GELOGI("Node[%s] is depend compute node, use result summary as out addr.", node_name_.c_str()); - GE_CHK_BOOL_RET_STATUS(output_summary_.size() == static_cast(node_item_->num_outputs), INTERNAL_ERROR, - "Node[%s] has %d output but %zu output summary.", node_name_.c_str(), - node_item_->num_outputs, output_summary_.size()); + GE_CHK_BOOL_RET_STATUS(output_summary_.size() == static_cast(node_item_->num_outputs), + INTERNAL_ERROR, + "Node[%s] has %d output but %zu output summary.", + node_name_.c_str(), node_item_->num_outputs, output_summary_.size()); for (auto j = 0; j < node_item_->num_outputs; ++j) { void *summary_addr = output_summary_[j]->GetData(); @@ -550,8 +584,11 @@ Status AicpuTfNodeTask::UpdateIoAddr(TaskContext &context) { // if has input and output, need copy to ioaddr if (!io_addrs.empty()) { // copy input and output to device - GE_CHK_RT_RET(rtMemcpy(input_output_addr_->GetData(), input_output_addr_->GetSize(), &io_addrs[0], - sizeof(uint64_t) * io_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(input_output_addr_->GetData(), + input_output_addr_->GetSize(), + &io_addrs[0], + sizeof(uint64_t) * io_addrs.size(), + RT_MEMCPY_HOST_TO_DEVICE)); } return SUCCESS; } @@ -567,8 +604,8 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { } Status AicpuTfNodeTask::TaskCallback(TaskContext &context) { - GELOGI("Node[%s] task callback start. is_dynamic=%s, unknown_type=%d.", node_name_.c_str(), - node_item_->is_dynamic ? "true" : "false", unknown_type_); + GELOGI("Node[%s] task callback start. is_dynamic=%s, unknown_type=%d.", + node_name_.c_str(), node_item_->is_dynamic ? "true" : "false", unknown_type_); Status callback_ret = SUCCESS; if (node_item_->is_dynamic) { // check need update shape, call update shape. @@ -588,8 +625,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) { GELOGI("Node[%s] init start.", node_name.c_str()); GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, - "Node[%s] unknown type[%d] is depend compute, it's not supported now.", node_name.c_str(), - unknown_type_); + "Node[%s] unknown type[%d] is depend compute, it's not supported now.", + node_name.c_str(), unknown_type_); GE_CHK_BOOL_RET_STATUS(task_def_.has_kernel(), FAILED, "Node[%s] task def does not has kernel.", node_name.c_str()); auto &kernel_def = task_def_.kernel(); @@ -597,40 +634,43 @@ Status AicpuNodeTask::Init(const HybridModel &model) { auto &args = kernel_def.args(); args_size_ = kernel_def.args_size(); - GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, "Node[%s] task def args.size=%zu, but args_size=%u.", + GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, + "Node[%s] task def args.size=%zu, but args_size=%u.", node_name.c_str(), args.size(), args_size_); GE_CHK_BOOL_RET_STATUS(args_size_ >= sizeof(aicpu::AicpuParamHead), FAILED, - "Node[%s] task def args_size=%u is less than aicpu param head len=%zu.", node_name.c_str(), - args_size_, sizeof(aicpu::AicpuParamHead)); + "Node[%s] task def args_size=%u is less than aicpu param head len=%zu.", + node_name.c_str(), args_size_, sizeof(aicpu::AicpuParamHead)); - args_.reset(new (std::nothrow) uint8_t[args_size_]()); - GE_CHK_BOOL_RET_STATUS(args_ != nullptr, FAILED, "Node[%s] malloc args mem failed, args_size_=%u.", node_name.c_str(), - args_size_); + args_.reset(new(std::nothrow) uint8_t[args_size_]()); + GE_CHK_BOOL_RET_STATUS(args_ != nullptr, FAILED, + "Node[%s] malloc args mem failed, args_size_=%u.", + node_name.c_str(), args_size_); errno_t sec_ret = memcpy_s(args_.get(), args_size_, args.c_str(), args.size()); - GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR, "Node[%s] copy args failed, ret: %d", node_name_.c_str(), - sec_ret); + GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR, + "Node[%s] copy args failed, ret: %d", node_name_.c_str(), sec_ret); auto aicpu_param_head = reinterpret_cast(args_.get()); auto io_num = node_item_->num_inputs + node_item_->num_outputs; // check AicpuParamHead ioAddrNum is right. GE_CHK_BOOL_RET_STATUS((aicpu_param_head->ioAddrNum == static_cast(io_num)), PARAM_INVALID, - "Node[%s] param head ioAddrNum=%u, but node has %d inputs and %d outputs.", node_name.c_str(), - aicpu_param_head->ioAddrNum, node_item_->num_inputs, node_item_->num_outputs); + "Node[%s] param head ioAddrNum=%u, but node has %d inputs and %d outputs.", + node_name.c_str(), aicpu_param_head->ioAddrNum, + node_item_->num_inputs, node_item_->num_outputs); auto mini_len = sizeof(aicpu::AicpuParamHead) + io_num * sizeof(uint64_t); // check args len must over mini len. GE_CHK_BOOL_RET_STATUS((mini_len <= aicpu_param_head->length), PARAM_INVALID, - "Node[%s] param head length=%u, but min len need %zu.", node_name.c_str(), - aicpu_param_head->length, mini_len); + "Node[%s] param head length=%u, but min len need %zu.", + node_name.c_str(), aicpu_param_head->length, mini_len); auto &kernel_ext_info = kernel_def.kernel_ext_info(); auto kernel_ext_info_size = kernel_def.kernel_ext_info_size(); GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, - "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", node_name.c_str(), - kernel_ext_info.size(), kernel_ext_info_size); + "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", + node_name.c_str(), kernel_ext_info.size(), kernel_ext_info_size); GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name.c_str()); @@ -661,15 +701,15 @@ Status AicpuNodeTask::UpdateIoAddr(TaskContext &context) { for (auto j = 0; j < node_item_->num_outputs; ++j) { auto outputData = context.GetOutput(j); GE_CHECK_NOTNULL(outputData); - GELOGD("Node[%s] output[%d] addr = %p, size = %zu", node_name_.c_str(), j, outputData->GetData(), - outputData->GetSize()); + GELOGD("Node[%s] output[%d] addr = %p, size = %zu", node_name_.c_str(), j, + outputData->GetData(), outputData->GetSize()); io_addrs.emplace_back(reinterpret_cast(outputData->GetData())); } auto io_addr = args_.get() + sizeof(aicpu::AicpuParamHead); // if has input and output, need copy to ioaddr - error_t cpy_ret = - memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead), &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); + error_t cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead), + &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); GE_CHK_BOOL_RET_STATUS(cpy_ret == EOK, INTERNAL_ERROR, "Node[%s] memcpy io addr to AicpuParamHead failed, ret=%d, args_size=%u, io nums=%zu.", node_name_.c_str(), cpy_ret, args_size_, io_addrs.size()); @@ -683,16 +723,17 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { uint32_t flag = RT_KERNEL_DEFAULT; auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name.c_str()), reinterpret_cast(kernel_name.c_str()), - 1, // default core dim is 1 - args_.get(), args_size_, nullptr, context.GetStream(), flag); + 1, // default core dim is 1 + args_.get(), args_size_, + nullptr, context.GetStream(), flag); GE_CHK_RT_RET(rt_ret); GELOGI("Node[%s] launch task end.", node_name_.c_str()); return SUCCESS; } Status AicpuNodeTask::TaskCallback(TaskContext &context) { - GELOGI("Node[%s] task callback start, is_dynamic = %s, unknown_type=%d.", node_name_.c_str(), - node_item_->is_dynamic ? "true" : "false", unknown_type_); + GELOGI("Node[%s] task callback start, is_dynamic = %s, unknown_type=%d.", + node_name_.c_str(), node_item_->is_dynamic ? "true" : "false", unknown_type_); Status callback_ret = SUCCESS; // check need update shape, call update shape. @@ -700,7 +741,8 @@ Status AicpuNodeTask::TaskCallback(TaskContext &context) { // check result callback_ret = UpdateOutputShapeFromExtInfo(); } else { - GELOGI("Node[%s] unknown shape type is %d no need update output shape.", node_name_.c_str(), unknown_type_); + GELOGI("Node[%s] unknown shape type is %d no need update output shape.", + node_name_.c_str(), unknown_type_); } GELOGI("Node[%s] task callback end.", node_name_.c_str()); return callback_ret; @@ -714,7 +756,8 @@ Status AiCpuNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) cons return status; } -Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, +Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, + const NodePtr &node, std::shared_ptr &task) const { GE_CHECK_NOTNULL(node); GELOGI("Node[%s] load task start.", node->GetName().c_str()); @@ -722,8 +765,15 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node GE_CHECK_NOTNULL(node_item); auto task_defs = model.GetTaskDefs(node); GE_CHECK_NOTNULL(task_defs); - GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", - node->GetName().c_str(), (*task_defs).size()); + if (node_item->shape_inference_type != DEPEND_COMPUTE) { + GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, + "Node[%s] task_def num[%zu] != 1", node->GetName().c_str(), (*task_defs).size()); + } else { + // The number of tasks of the fourth type operator may be 2 + GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1 || (*task_defs).size() == 2, PARAM_INVALID, + "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 1 or 2", + node->GetName().c_str(), (*task_defs).size()); + } const auto &task_def = (*task_defs)[0]; std::shared_ptr aicpu_task; if (task_def.type() == RT_MODEL_TASK_KERNEL_EX) { @@ -733,13 +783,13 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node GELOGI("Node[%s] task type=%u is AicpuNodeTask.", node->GetName().c_str(), task_def.type()); aicpu_task = MakeShared(node_item, task_def); } else { - GELOGE(UNSUPPORTED, "Node[%s] task type=%u is not supported by aicpu node executor.", node->GetName().c_str(), - task_def.type()); + GELOGE(UNSUPPORTED, "Node[%s] task type=%u is not supported by aicpu node executor.", + node->GetName().c_str(), task_def.type()); return UNSUPPORTED; } - GE_CHK_BOOL_RET_STATUS(aicpu_task != nullptr, MEMALLOC_FAILED, "Load task for node %s failed.", - node->GetName().c_str()); + GE_CHK_BOOL_RET_STATUS(aicpu_task != nullptr, MEMALLOC_FAILED, + "Load task for node %s failed.", node->GetName().c_str()); GE_CHK_STATUS_RET(aicpu_task->Init(model), "Node[%s] task init failed.", node->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index 8aca6ff7..7caabd66 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -27,12 +27,12 @@ namespace hybrid { class AicpuNodeTaskBase : public NodeTask { public: AicpuNodeTaskBase(const NodeItem *node_item, const domi::TaskDef &task_def) - : node_item_(node_item), - task_def_(task_def), - node_name_(node_item->node_name), - node_type_(node_item->node_type), + : node_item_(node_item), task_def_(task_def), + node_name_(node_item->node_name), node_type_(node_item->node_type), unknown_type_(node_item->shape_inference_type), - aicpu_ext_handle_(node_item->node_name, node_item->num_inputs, node_item->num_outputs, + aicpu_ext_handle_(node_item->node_name, + node_item->num_inputs, + node_item->num_outputs, node_item->shape_inference_type) {} ~AicpuNodeTaskBase() override = default; @@ -42,7 +42,6 @@ class AicpuNodeTaskBase : public NodeTask { Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; - protected: virtual Status InitExtInfo(const std::string &kernel_ext_info); @@ -81,13 +80,15 @@ class AicpuNodeTaskBase : public NodeTask { class AicpuTfNodeTask : public AicpuNodeTaskBase { public: - AicpuTfNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def) : AicpuNodeTaskBase(node_item, task_def) {} + AicpuTfNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def) + : AicpuNodeTaskBase(node_item, task_def) {} ~AicpuTfNodeTask() override = default; Status Init(const HybridModel &model) override; protected: + Status LaunchTask(TaskContext &context) override; Status TaskCallback(TaskContext &context) override; @@ -107,17 +108,19 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { /// Status ReadResultSummaryAndPrepareMemory(TaskContext &context, std::vector> &out_shape_hbm); - Status CopyDataToHbm(TaskContext &context, const std::vector> &out_shape_hbm); + Status CopyDataToHbm(TaskContext &context, + const std::vector> &out_shape_hbm); - Status UpdateShapeByHbmBuffer(TaskContext &context, const std::vector> &out_shape_hbm); + Status UpdateShapeByHbmBuffer(TaskContext &context, + const std::vector> &out_shape_hbm); - Status PrepareCopyInputs(const TaskContext &context, const std::vector> &out_shape_hbm, + Status PrepareCopyInputs(const TaskContext &context, + const std::vector> &out_shape_hbm, uint64_t ©_num); static Status EnsureSessionCreated(uint64_t session_id); static Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, std::string &task_info); static uint64_t GetStepIdAddr(const HybridModel &model); - private: // kernel buf, device mem std::unique_ptr kernel_buf_; @@ -143,13 +146,15 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { class AicpuNodeTask : public AicpuNodeTaskBase { public: - AicpuNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def) : AicpuNodeTaskBase(node_item, task_def) {} + AicpuNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def) + : AicpuNodeTaskBase(node_item, task_def) {} ~AicpuNodeTask() override = default; Status Init(const HybridModel &model) override; protected: + Status LaunchTask(TaskContext &context) override; Status TaskCallback(TaskContext &context) override; @@ -166,10 +171,12 @@ class AicpuNodeTask : public AicpuNodeTaskBase { class AiCpuNodeExecutor : public NodeExecutor { public: - Status LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const override; + Status LoadTask(const HybridModel &model, + const NodePtr &node, + std::shared_ptr &task) const override; Status PrepareTask(NodeTask &task, TaskContext &context) const override; }; -} // namespace hybrid -} // namespace ge -#endif // GE_HYBRID_KERNEL_AICPU_NODE_EXECUTOR_H_ +} +} +#endif //GE_HYBRID_KERNEL_AICPU_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc old mode 100644 new mode 100755 index 122af0f5..3c4065ea --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,19 +29,23 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::COMPILED_SUBGRAPH, KnownNodeExecutor); -Status KnownNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { +Status KnownNodeTask:: ExecuteAsync(TaskContext &context, std::function done_callback) { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTaskExecuteAsync] Start"); GELOGI("[%s] KnownNodeTask::ExecuteAsync in.", context.GetNodeName()); if (davinci_model_->GetTaskList().size() == 0) { GELOGW("KnownNodeExecutor::ExecuteAsync davinci moel has no taskinfo."); // todo if data is connected to netoutput, forward address ? copy data? - if (context.NumInputs() == context.NumOutputs()) { - GELOGW("[%s] KnownNodeExecutor::ExecuteAsync davinci moel has no taskinfo.", context.GetNodeName()); + if (context.NumInputs() == context.NumOutputs()){ + GELOGW("[%s] KnownNodeExecutor::ExecuteAsync davinci moel has no taskinfo.", + context.GetNodeName()); for (int i = 0; i < context.NumInputs(); ++i) { auto tensor = context.MutableInput(i); GE_CHECK_NOTNULL(tensor); - GE_CHK_STATUS_RET(context.SetOutput(i, *tensor), "[%s] Failed to set output[%d]", context.GetNodeName(), i); + GE_CHK_STATUS_RET(context.SetOutput(i, *tensor), + "[%s] Failed to set output[%d]", + context.GetNodeName(), + i); } } @@ -54,8 +58,7 @@ Status KnownNodeTask::ExecuteAsync(TaskContext &context, std::function d RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodertModelExecute] Start"); rt_ret = rtModelExecute(davinci_model_->GetRtModelHandle(), context.GetStream(), 0); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - GELOGE(rt_ret, "rtModelExecute error, ret: hybrid_model_executorOx%X", rt_ret); - return FAILED;); + GELOGE(rt_ret, "rtModelExecute error, ret: hybrid_model_executorOx%X", rt_ret); return FAILED;); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodertModelExecute] End"); GELOGI("rtModelExecute end"); @@ -109,8 +112,8 @@ Status KnownNodeTask::Init(TaskContext &context) { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTask_AllocateWorkspace] Start"); GE_CHK_STATUS_RET( - context.AllocateWorkspace(davinci_model_->TotalMemSize(), &buffer, davinci_model_->GetRuntimeParam().mem_base), - "known node task allocate workspace failed."); + context.AllocateWorkspace(davinci_model_->TotalMemSize(), &buffer, davinci_model_->GetRuntimeParam().mem_base), + "known node task allocate workspace failed."); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTask_AllocateWorkspace] End, size %zu", davinci_model_->TotalMemSize()); bool addr_not_changed = false; @@ -120,16 +123,15 @@ Status KnownNodeTask::Init(TaskContext &context) { davinci_model_->SetKnownNodeAddrNotChanged(addr_not_changed); // update mem base davinci_model_->UpdateMemBase(static_cast(buffer)); - GELOGI("KnownNodeTask::Init mem base is %p, size %u.", davinci_model_->GetRuntimeParam().mem_base, - davinci_model_->GetRuntimeParam().mem_size); + GELOGI("KnownNodeTask::Init mem base is %p, size %u.", + davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); } if (!load_flag_) { GE_CHK_STATUS_RET(davinci_model_->Init(), "KnownNodeExecutor::InitDavinciModel failed."); load_flag_ = true; } else { - GE_CHK_STATUS_RET( - ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), davinci_model_->Id()), - "KnownNodeTask::Init destroy aicpu kernel failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), + davinci_model_->Id()), "KnownNodeTask::Init destroy aicpu kernel failed."); } GELOGI("[%s] KnownNodeExecutor::Init success.", context.GetNodeName()); return SUCCESS; @@ -150,7 +152,8 @@ Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) cons return SUCCESS; } -Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { +Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, + shared_ptr &task) const { GELOGI("[%s] KnownNodeExecutor::LoadTask in.", node->GetName().c_str()); GE_CHECK_NOTNULL(node); @@ -177,7 +180,8 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node Status KnownNodeExecutor::ExecuteTask(NodeTask &task, TaskContext &context, const std::function &callback) const { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorExecuteTask] Start"); - GE_CHK_STATUS_RET(task.ExecuteAsync(context, callback), "Failed to execute task. node = %s", + GE_CHK_STATUS_RET(task.ExecuteAsync(context, callback), + "Failed to execute task. node = %s", context.GetNodeItem().NodeName().c_str()); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorExecuteTask] End"); return SUCCESS; diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index 5847c833..fb1966b4 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,14 +27,15 @@ class HybridModel; class KnownNodeTask : public NodeTask { public: - KnownNodeTask(std::shared_ptr davinci_model) : davinci_model_(davinci_model) {} + KnownNodeTask(std::shared_ptr davinci_model) + : davinci_model_(davinci_model) + {} ~KnownNodeTask() {} Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; Status Init(TaskContext &context) override; - private: std::shared_ptr davinci_model_ = nullptr; bool load_flag_ = false; @@ -46,11 +47,10 @@ class KnownNodeExecutor : public NodeExecutor { Status PrepareTask(NodeTask &task, TaskContext &context) const; Status ExecuteTask(NodeTask &task, TaskContext &context, const std::function &callback) const; ~KnownNodeExecutor() {} - private: std::shared_ptr davinci_model_ = nullptr; }; } // namespace hybrid } // namespace ge -#endif // HYBRID_KNOWN_NODE_EXECUTOR_H_ +#endif //HYBRID_KNOWN_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.cc b/ge/hybrid/node_executor/controlop/control_op_executor.cc index 2bf7407c..5f9dde2a 100644 --- a/ge/hybrid/node_executor/controlop/control_op_executor.cc +++ b/ge/hybrid/node_executor/controlop/control_op_executor.cc @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "control_op_executor.h" #include "graph/utils/node_utils.h" #include "graph/utils/type_utils.h" @@ -24,21 +23,27 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::CONTROL_OP, ControlOpNodeExecutor); namespace { -template +template Status CopyScalarValueToHost(const TensorValue &tensor, T &value) { GE_CHECK_GE(tensor.GetSize(), sizeof(value)); - GE_CHK_RT_RET(rtMemcpy(&value, sizeof(value), tensor.GetData(), sizeof(value), RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(&value, + sizeof(value), + tensor.GetData(), + sizeof(value), + RT_MEMCPY_DEVICE_TO_HOST)); return SUCCESS; } -} // namespace +} -Status ControlOpNodeTask::ExecuteSubgraph(const GraphItem *subgraph, TaskContext &task_context, +Status ControlOpNodeTask::ExecuteSubgraph(const GraphItem *subgraph, + TaskContext &task_context, const std::function &done_callback) { GELOGD("[%s] Start to execute subgraph.", subgraph->GetName().c_str()); auto execution_context = const_cast(task_context.GetExecutionContext()); auto executor = MakeShared(subgraph, execution_context); GE_CHECK_NOTNULL(executor); - GE_CHK_STATUS_RET(executor->ExecuteAsync(task_context), "[%s] Failed to execute partitioned call.", + GE_CHK_STATUS_RET(executor->ExecuteAsync(task_context), + "[%s] Failed to execute partitioned call.", subgraph->GetName().c_str()); auto callback = [executor, done_callback]() mutable { @@ -56,12 +61,12 @@ Status ControlOpNodeTask::ExecuteSubgraph(const GraphItem *subgraph, TaskContext Status ControlOpNodeTask::ToBool(const TensorValue &tensor, DataType data_type, bool &value) { switch (data_type) { -#define CASE(DT, T) \ - case (DT): { \ - T val{}; \ - GE_CHK_STATUS_RET(CopyScalarValueToHost(tensor, val)); \ - value = val != 0; \ - break; \ +#define CASE(DT, T) \ + case (DT): { \ + T val{}; \ + GE_CHK_STATUS_RET(CopyScalarValueToHost(tensor, val)); \ + value = val != 0; \ + break; \ } // DT_STRING was handled in CondPass CASE(DT_FLOAT, float) @@ -120,19 +125,24 @@ Status IfOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::functi if (shape.IsScalar()) { auto cond_tensor = task_context.GetInput(kIfCondIndex); GE_CHECK_NOTNULL(cond_tensor); - GE_CHK_STATUS_RET(ToBool(*cond_tensor, data_type, cond_val), "[%s] Failed to get cond value.", + GE_CHK_STATUS_RET(ToBool(*cond_tensor, data_type, cond_val), + "[%s] Failed to get cond value.", task_context.GetNodeName()); } else { // true if num elements is non-zero cond_val = shape.GetShapeSize() != 0; - GELOGD("[%s] Cond tensor shape = [%s], cond value = %d", task_context.GetNodeName(), shape.ToString().c_str(), + GELOGD("[%s] Cond tensor shape = [%s], cond value = %d", + task_context.GetNodeName(), + shape.ToString().c_str(), cond_val); } auto subgraph = cond_val ? then_ : else_; GELOGD("[%s] Taking subgraph [%s] by cond = [%d]", task_context.GetNodeName(), subgraph->GetName().c_str(), cond_val); GE_CHK_STATUS_RET(ExecuteSubgraph(subgraph, task_context, done_callback), - "[%s] Failed to execute subgraph. cond = %d", task_context.GetNodeName(), cond_val); + "[%s] Failed to execute subgraph. cond = %d", + task_context.GetNodeName(), + cond_val); GELOGD("[%s] Done executing with cond = %d successfully.", task_context.GetNodeName(), cond_val); return SUCCESS; @@ -161,7 +171,8 @@ Status CaseOpNodeTask::Init(const NodePtr &node, const HybridModel &model) { const GraphItem *CaseOpNodeTask::SelectBranch(int32_t branch_index) const { // subgraphs_ is non-empty. checked int Init if (branch_index < 0 || static_cast(branch_index) >= subgraphs_.size()) { - GELOGI("Branch index out of range. index = %d, num_subgraphs = %zu, will taking last branch.", branch_index, + GELOGI("Branch index out of range. index = %d, num_subgraphs = %zu, will taking last branch.", + branch_index, subgraphs_.size()); branch_index = subgraphs_.size() - 1; } @@ -175,7 +186,9 @@ Status CaseOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::func int32_t branch_index = 0; GE_CHK_STATUS_RET(CopyScalarValueToHost(*branch_tensor, branch_index)); const GraphItem *subgraph = SelectBranch(branch_index); - GELOGI("[%s] Taking subgraph [%s] by branch = [%d]", task_context.GetNodeName(), subgraph->GetName().c_str(), + GELOGI("[%s] Taking subgraph [%s] by branch = [%d]", + task_context.GetNodeName(), + subgraph->GetName().c_str(), branch_index); std::vector inputs; @@ -186,7 +199,8 @@ Status CaseOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::func inputs.emplace_back(*input_tensor); } - GE_CHK_STATUS_RET(ExecuteSubgraph(subgraph, task_context, done_callback), "[%s] Failed to execute else-subgraph.", + GE_CHK_STATUS_RET(ExecuteSubgraph(subgraph, task_context, done_callback), + "[%s] Failed to execute else-subgraph.", task_context.GetNodeName()); GELOGD("[%s] Done executing subgraph[%d] successfully.", task_context.GetNodeName(), branch_index); @@ -213,13 +227,17 @@ Status WhileOpNodeTask::Init(const NodePtr &node, const HybridModel &model) { Status WhileOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::function &done_callback) const { if (task_context.NumInputs() != task_context.NumOutputs()) { - GELOGE(INTERNAL_ERROR, "[%s] Invalid while args. num_inputs = %d, num_outputs = %d", task_context.GetNodeName(), - task_context.NumInputs(), task_context.NumOutputs()); + GELOGE(INTERNAL_ERROR, + "[%s] Invalid while args. num_inputs = %d, num_outputs = %d", + task_context.GetNodeName(), + task_context.NumInputs(), + task_context.NumOutputs()); return INTERNAL_ERROR; } bool is_continue = false; - GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), "[%s] Failed to execute iteration 0.", + GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), + "[%s] Failed to execute iteration 0.", task_context.GetNodeName()); if (!is_continue) { for (int i = 0; i < task_context.NumInputs(); ++i) { @@ -250,8 +268,10 @@ Status WhileOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::fun int iteration = 1; while (true) { GELOGD("[%s] Start to execute, iteration = %d", task_context.GetNodeName(), iteration); - GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), "[%s] Failed to execute iteration %d.", - task_context.GetNodeName(), iteration); + GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), + "[%s] Failed to execute iteration %d.", + task_context.GetNodeName(), + iteration); if (!is_continue) { GELOGD("[%s] Quit from loop. current iteration = %d", task_context.GetNodeName(), iteration); @@ -294,16 +314,21 @@ Status WhileOpNodeTask::ExecuteCond(TaskContext &task_context, bool &is_continue GELOGD("[%s] Start to execute cond-subgraph.", task_context.GetNodeName()); GE_CHK_STATUS_RET(executor->ExecuteAsync(inputs, input_desc), "Failed to execute partitioned call."); GELOGD("[%s] Done executing cond-subgraph successfully.", cond_->GetName().c_str()); - GE_CHK_STATUS_RET_NOLOG(task_context.RegisterCallback([executor]() mutable { executor.reset(); })); + GE_CHK_STATUS_RET_NOLOG(task_context.RegisterCallback([executor]() mutable { + executor.reset(); + })); // get cond output GE_CHK_STATUS_RET(executor->Synchronize(), "[%s] Failed to sync cond-subgraph result.", cond_->GetName().c_str()); std::vector cond_outputs; std::vector cond_output_desc_list; - GE_CHK_STATUS_RET(executor->GetOutputs(cond_outputs, cond_output_desc_list), "[%s] Failed to get cond-output.", + GE_CHK_STATUS_RET(executor->GetOutputs(cond_outputs, cond_output_desc_list), + "[%s] Failed to get cond-output.", cond_->GetName().c_str()); if (cond_outputs.size() != kCondOutputSize || cond_output_desc_list.size() != kCondOutputSize) { - GELOGE(INTERNAL_ERROR, "[%s] Number of cond outputs is invalid. number = %zu", task_context.GetNodeName(), + GELOGE(INTERNAL_ERROR, + "[%s] Number of cond outputs is invalid. number = %zu", + task_context.GetNodeName(), cond_outputs.size()); return INTERNAL_ERROR; } @@ -312,12 +337,15 @@ Status WhileOpNodeTask::ExecuteCond(TaskContext &task_context, bool &is_continue const auto &shape = cond_tensor_desc->GetShape(); if (shape.IsScalar()) { auto data_type = cond_tensor_desc->GetDataType(); - GE_CHK_STATUS_RET(ToBool(cond_outputs[0], data_type, is_continue), "[%s] Failed to get cond value.", + GE_CHK_STATUS_RET(ToBool(cond_outputs[0], data_type, is_continue), + "[%s] Failed to get cond value.", task_context.GetNodeName()); } else { // true if num elements is non-zero is_continue = shape.GetShapeSize() > 0; - GELOGD("[%s] Cond tensor shape = [%s], is_continue = %d", task_context.GetNodeName(), shape.ToString().c_str(), + GELOGD("[%s] Cond tensor shape = [%s], is_continue = %d", + task_context.GetNodeName(), + shape.ToString().c_str(), is_continue); } @@ -336,7 +364,9 @@ Status WhileOpNodeTask::MoveOutputs2Inputs(TaskContext &task_context) { auto output_tensor_desc = task_context.MutableOutputDesc(i); GE_CHECK_NOTNULL(output_tensor_desc); - GELOGD("[%s] To update input shape[%d] by output shape. from [%s] to [%s]", task_context.GetNodeName(), i, + GELOGD("[%s] To update input shape[%d] by output shape. from [%s] to [%s]", + task_context.GetNodeName(), + i, task_context.MutableInputDesc(i)->GetShape().ToString().c_str(), output_tensor_desc->GetShape().ToString().c_str()); *task_context.MutableInputDesc(i) = *output_tensor_desc; @@ -346,25 +376,28 @@ Status WhileOpNodeTask::MoveOutputs2Inputs(TaskContext &task_context) { } Status WhileOpNodeTask::ExecuteOneLoop(TaskContext &task_context, bool &is_continue) const { - GE_CHK_STATUS_RET(ExecuteCond(task_context, is_continue), "[%s] Failed to execute cond-subgraph", + GE_CHK_STATUS_RET(ExecuteCond(task_context, is_continue), + "[%s] Failed to execute cond-subgraph", task_context.GetNodeName()); if (!is_continue) { return SUCCESS; } GELOGD("[%s] Start to execute body-subgraph.", task_context.GetNodeName()); - GE_CHK_STATUS_RET(ExecuteSubgraph(body_, task_context, nullptr), "[%s] Failed to execute cond-subgraph", - task_context.GetNodeName()); + GE_CHK_STATUS_RET(ExecuteSubgraph(body_, task_context, nullptr), + "[%s] Failed to execute cond-subgraph", task_context.GetNodeName()); GELOGD("[%s] Done executing body-subgraph successfully.", task_context.GetNodeName()); // set outputs to inputs for next iteration - GE_CHK_STATUS_RET(MoveOutputs2Inputs(task_context), "[%s] Failed to move outputs to inputs", + GE_CHK_STATUS_RET(MoveOutputs2Inputs(task_context), + "[%s] Failed to move outputs to inputs", task_context.GetNodeName()); return SUCCESS; } -Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, +Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, + const NodePtr &node, shared_ptr &task) const { auto node_item = model.GetNodeItem(node); GE_CHECK_NOTNULL(node_item); @@ -372,11 +405,11 @@ Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, const NodePtr & unique_ptr node_task; auto node_type = node->GetType(); if (node_type == IF) { - node_task.reset(new (std::nothrow) IfOpNodeTask()); + node_task.reset(new(std::nothrow) IfOpNodeTask()); } else if (node_type == CASE) { - node_task.reset(new (std::nothrow) CaseOpNodeTask()); + node_task.reset(new(std::nothrow) CaseOpNodeTask()); } else if (node_type == WHILE) { - node_task.reset(new (std::nothrow) WhileOpNodeTask()); + node_task.reset(new(std::nothrow) WhileOpNodeTask()); } else { GELOGE(PARAM_INVALID, "[%s] Unsupported type: %s", node->GetName().c_str(), node_type.c_str()); return PARAM_INVALID; @@ -389,6 +422,8 @@ Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, const NodePtr & return SUCCESS; } -Status ControlOpNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { return SUCCESS; } +Status ControlOpNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { + return SUCCESS; +} } // namespace hybrid } // namespace ge \ No newline at end of file diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.h b/ge/hybrid/node_executor/controlop/control_op_executor.h index 68db7e91..7520afd1 100644 --- a/ge/hybrid/node_executor/controlop/control_op_executor.h +++ b/ge/hybrid/node_executor/controlop/control_op_executor.h @@ -33,7 +33,8 @@ class ControlOpNodeTask : public NodeTask { protected: virtual Status DoExecuteAsync(TaskContext &task_context, const std::function &done_callback) const = 0; static Status ToBool(const TensorValue &tensor_value, DataType data_type, bool &value); - static Status ExecuteSubgraph(const GraphItem *subgraph, TaskContext &task_context, + static Status ExecuteSubgraph(const GraphItem *subgraph, + TaskContext &task_context, const std::function &done_callback); }; @@ -58,7 +59,7 @@ class CaseOpNodeTask : public ControlOpNodeTask { Status Init(const NodePtr &node, const HybridModel &model) override; protected: - const GraphItem *SelectBranch(int32_t branch_index) const; + const GraphItem* SelectBranch(int32_t branch_index) const; Status DoExecuteAsync(TaskContext &task_context, const std::function &done_callback) const override; private: @@ -97,4 +98,4 @@ class ControlOpNodeExecutor : public NodeExecutor { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_CONTROLOP_CONTROL_OP_EXECUTOR_H_ +#endif // GE_HYBRID_CONTROLOP_CONTROL_OP_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc old mode 100644 new mode 100755 index cc140b08..ee45964c --- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc +++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc @@ -27,8 +27,14 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::GE_LOCAL, GeLocalNodeExecutor); -const std::unordered_map> RefInputTask::out_ref_input_index_ = { - {DATA, {}}, {AIPPDATA, {}}, {RESHAPE, {}}, {EXPANDDIMS, {}}, {SQUEEZE, {}}, {BROADCASTGRADIENTARGS, {}}}; +const std::unordered_map> + RefInputTask::out_ref_input_index_ = {{DATA, {}}, + {AIPPDATA, {}}, + {RESHAPE, {}}, + {EXPANDDIMS, {}}, + {SQUEEZE, {}}, + {BROADCASTGRADIENTARGS, {}} + }; const std::unordered_set DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE}; @@ -40,7 +46,8 @@ Status RefInputTask::UpdateArgs(TaskContext &) { Status RefInputTask::Execute(TaskContext &context) { auto iter = out_ref_input_index_.find(node_type_); if (iter == out_ref_input_index_.end()) { - GELOGE(UNSUPPORTED, "node %s type %s can not use RefInputTask.", node_name_.c_str(), node_type_.c_str()); + GELOGE(UNSUPPORTED, "node %s type %s can not use RefInputTask.", + node_name_.c_str(), node_type_.c_str()); return UNSUPPORTED; } @@ -65,8 +72,8 @@ Status RefInputTask::RefOneByOne(TaskContext &context) { auto input = context.GetInput(out_index); GE_CHECK_NOTNULL(input); GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); - GELOGD("node %s type %s output[%u] ref input[%u] addr=%p.", node_name_.c_str(), node_type_.c_str(), out_index, - out_index, input->GetData()); + GELOGD("node %s type %s output[%u] ref input[%u] addr=%p.", + node_name_.c_str(), node_type_.c_str(), out_index, out_index, input->GetData()); } GELOGI("node %s type %s ref input one by one end.", node_name_.c_str(), node_type_.c_str()); return SUCCESS; @@ -76,8 +83,8 @@ Status RefInputTask::RefByOrder(const std::vector &ref_order, TaskCont GELOGI("node %s type %s ref input by order begin.", node_name_.c_str(), node_type_.c_str()); int32_t output_num = context.NumOutputs(); if (ref_order.size() != static_cast(output_num)) { - GELOGE(INTERNAL_ERROR, "node %s type %s has %d outputs but only has %zu out ref index.", node_name_.c_str(), - node_type_.c_str(), output_num, ref_order.size()); + GELOGE(INTERNAL_ERROR, "node %s type %s has %d outputs but only has %zu out ref index.", + node_name_.c_str(), node_type_.c_str(), output_num, ref_order.size()); return INTERNAL_ERROR; } for (auto out_index = 0; out_index < output_num; ++out_index) { @@ -85,8 +92,8 @@ Status RefInputTask::RefByOrder(const std::vector &ref_order, TaskCont auto input = context.GetInput(ref_input_index); GE_CHECK_NOTNULL(input); GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); - GELOGD("node %s type %s output[%d] ref input[%u] addr=%p.", node_name_.c_str(), node_type_.c_str(), out_index, - ref_input_index, input->GetData()); + GELOGD("node %s type %s output[%d] ref input[%u] addr=%p.", + node_name_.c_str(), node_type_.c_str(), out_index, ref_input_index, input->GetData()); } GELOGI("node %s type %s ref input by order end.", node_name_.c_str(), node_type_.c_str()); return SUCCESS; @@ -94,8 +101,8 @@ Status RefInputTask::RefByOrder(const std::vector &ref_order, TaskCont Status RefInputTask::ExecuteAsync(TaskContext &context, std::function done_callback) { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[RefInputTaskExecuteAsync] Start"); - GE_CHK_STATUS_RET(Execute(context), "node:%s type:%s ref input task execute failed", node_name_.c_str(), - node_type_.c_str()); + GE_CHK_STATUS_RET(Execute(context), "node:%s type:%s ref input task execute failed", + node_name_.c_str(), node_type_.c_str()); if (done_callback != nullptr) { // host cpu no need register callback, call it directly. GE_CHK_STATUS_RET(context.TryExecuteCallback(done_callback)); @@ -104,7 +111,9 @@ Status RefInputTask::ExecuteAsync(TaskContext &context, std::function do return SUCCESS; } -bool RefInputTask::IsBelong(const std::string &op_type) { return out_ref_input_index_.count(op_type) > 0; } +bool RefInputTask::IsBelong(const std::string &op_type) { + return out_ref_input_index_.count(op_type) > 0; +} Status DependInputShapeTask::UpdateArgs(TaskContext &) { // no need update args @@ -116,14 +125,15 @@ Status DependInputShapeTask::Execute(TaskContext &context) { std::string node_type = node_->GetType(); auto kernel = factory.Create(node_type); if (kernel == nullptr) { - GELOGE(UNSUPPORTED, "node %s type %s is not supported by host kernel.", node_->GetName().c_str(), - node_type.c_str()); + GELOGE(UNSUPPORTED, "node %s type %s is not supported by host kernel.", + node_->GetName().c_str(), node_type.c_str()); return UNSUPPORTED; } std::vector outputs; Status compute_ret = kernel->Compute(node_, outputs); if (compute_ret != SUCCESS) { - GELOGE(compute_ret, "node %s type %s compute failed or not imply.", node_->GetName().c_str(), node_type.c_str()); + GELOGE(compute_ret, "node %s type %s compute failed or not imply.", + node_->GetName().c_str(), node_type.c_str()); return compute_ret; } int32_t output_num = context.NumOutputs(); @@ -149,15 +159,19 @@ Status DependInputShapeTask::Execute(TaskContext &context) { return INTERNAL_ERROR; } - GELOGI("node:%s type:%s [%d]th output data=%p, out size=%zu, data size=%zu.", node_->GetName().c_str(), - node_type.c_str(), i, tensor_value->GetData(), tensor_value->GetSize(), tensor_data.GetSize()); + GELOGI("node:%s type:%s [%d]th output data=%p, out size=%zu, data size=%zu.", + node_->GetName().c_str(), node_type.c_str(), i, + tensor_value->GetData(), tensor_value->GetSize(), tensor_data.GetSize()); if (tensor_data.GetSize() > 0) { - GE_CHK_RT_RET(rtMemcpy(tensor_value->MutableData(), tensor_value->GetSize(), tensor_data.GetData(), - tensor_data.GetSize(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(tensor_value->MutableData(), + tensor_value->GetSize(), + tensor_data.GetData(), + tensor_data.GetSize(), + RT_MEMCPY_HOST_TO_DEVICE)); } - GELOGI("node:%s type:%s [%d]th set data success, data size=%zu.", node_->GetName().c_str(), node_type.c_str(), i, - tensor_data.GetSize()); + GELOGI("node:%s type:%s [%d]th set data success, data size=%zu.", + node_->GetName().c_str(), node_type.c_str(), i, tensor_data.GetSize()); } return SUCCESS; } @@ -176,7 +190,9 @@ Status DependInputShapeTask::ExecuteAsync(TaskContext &context, std::function 0; } +bool DependInputShapeTask::IsBelong(const std::string &op_type) { + return depend_input_shape_ops_.count(op_type) > 0; +} Status GeLocalNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), @@ -186,24 +202,26 @@ Status GeLocalNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) co return status; } -Status GeLocalNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, +Status GeLocalNodeExecutor::LoadTask(const HybridModel &model, + const NodePtr &node, std::shared_ptr &task) const { GE_CHECK_NOTNULL(node); std::string node_type = node->GetType(); if (RefInputTask::IsBelong(node_type)) { - GELOGI("node %s type %s is ref input task, use RefInputTask.", node->GetName().c_str(), node_type.c_str()); + GELOGI("node %s type %s is ref input task, use RefInputTask.", + node->GetName().c_str(), node_type.c_str()); task = MakeShared(node); if (task == nullptr) { GELOGE(MEMALLOC_FAILED, "create RefInputTask for node %s failed.", node->GetName().c_str()); return MEMALLOC_FAILED; } } else if (DependInputShapeTask::IsBelong(node_type)) { - GELOGI("node %s type %s is depend input shape task, use DependInputShapeTask.", node->GetName().c_str(), - node_type.c_str()); + GELOGI("node %s type %s is depend input shape task, use DependInputShapeTask.", + node->GetName().c_str(), node_type.c_str()); task = MakeShared(node); if (task == nullptr) { - GELOGE(MEMALLOC_FAILED, "create DependInputShapeTask for node %s type %s failed.", node->GetName().c_str(), - node_type.c_str()); + GELOGE(MEMALLOC_FAILED, "create DependInputShapeTask for node %s type %s failed.", + node->GetName().c_str(), node_type.c_str()); return MEMALLOC_FAILED; } } else if (node_type == CONSTANTOP || node_type == VARIABLE) { @@ -217,8 +235,8 @@ Status GeLocalNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &no task = MakeShared(tensor); GE_CHECK_NOTNULL(task); } else { - GELOGE(UNSUPPORTED, "node %s type %s is not support in GeLocalNodeExecutor now.", node->GetName().c_str(), - node_type.c_str()); + GELOGE(UNSUPPORTED, "node %s type %s is not support in GeLocalNodeExecutor now.", + node->GetName().c_str(), node_type.c_str()); return UNSUPPORTED; } return SUCCESS; @@ -226,7 +244,9 @@ Status GeLocalNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &no ConstantNodeTask::ConstantNodeTask(const TensorValue *tensor) : tensor_(tensor) {} -Status ConstantNodeTask::UpdateArgs(TaskContext &context) { return SUCCESS; } +Status ConstantNodeTask::UpdateArgs(TaskContext &context) { + return SUCCESS; +} Status ConstantNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { GELOGD("[%s] Start execute.", context.GetNodeName()); diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h index 0195e76c..9de8d0f9 100644 --- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h +++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h @@ -25,14 +25,16 @@ namespace ge { namespace hybrid { class RefInputTask : public NodeTask { public: - explicit RefInputTask(const NodePtr &node) : node_name_(node->GetName()), node_type_(node->GetType()) {} + explicit RefInputTask(const NodePtr &node) + : node_name_(node->GetName()), + node_type_(node->GetType()) { + } ~RefInputTask() = default; virtual Status UpdateArgs(TaskContext &context) override; virtual Status ExecuteAsync(TaskContext &context, std::function done_callback) override; static bool IsBelong(const std::string &op_type); - private: Status Execute(TaskContext &context); Status RefOneByOne(TaskContext &context); @@ -49,17 +51,16 @@ class RefInputTask : public NodeTask { class DependInputShapeTask : public NodeTask { public: - explicit DependInputShapeTask(const NodePtr &node) : node_(node) {} + explicit DependInputShapeTask(const NodePtr &node) : node_(node) { + } ~DependInputShapeTask() = default; virtual Status UpdateArgs(TaskContext &context) override; virtual Status ExecuteAsync(TaskContext &context, std::function done_callback) override; static bool IsBelong(const std::string &op_type); - private: Status Execute(TaskContext &context); - private: const NodePtr node_; @@ -81,11 +82,13 @@ class ConstantNodeTask : public NodeTask { class GeLocalNodeExecutor : public NodeExecutor { public: + Status PrepareTask(NodeTask &task, TaskContext &context) const override; - virtual Status LoadTask(const HybridModel &model, const NodePtr &node, + virtual Status LoadTask(const HybridModel &model, + const NodePtr &node, std::shared_ptr &task) const override; }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_KERNEL_GE_LOCAL_NODE_EXECUTOR_H_ +#endif // GE_HYBRID_KERNEL_GE_LOCAL_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index f2cd1888..0d6f52e8 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include "hybrid/node_executor/hccl/hccl_node_executor.h" #include "common/ge/ge_util.h" #include "common/ge/plugin_manager.h" +#include "common/math/math_util.h" #include "framework/common/debug/ge_log.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" @@ -41,8 +42,8 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function do GELOGE(FAILED, "hccl handle is nullptr! "); return FAILED; } - auto EnqueueHcomOpertion = - (HcclResult(*)(HcomOpertion, std::function))dlsym(context.handle_, "EnqueueHcomOpertion"); + auto EnqueueHcomOpertion = (HcclResult(*)(HcomOpertion, std::function))dlsym( + context.handle_, "EnqueueHcomOpertion"); if (EnqueueHcomOpertion == nullptr) { GELOGE(FAILED, "Failed to invoke EnqueueHcomOpertion hcom unknown node function."); if (dlclose(context.handle_) != 0) { @@ -162,12 +163,13 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector(reinterpret_cast(tv->MutableData())); + addr_infos.resize(dims.front()); for (auto idx = 0; idx < dims.front(); ++idx) { - addr_infos.push_back({static_cast(data[idx * kVarTableRowCnt]), - data[idx * kVarTableRowCnt + kVarTableIdxAddr], local_addr, - data[idx * kVarTableRowCnt + kVarTableIdxLen]}); - local_addr += data[idx * kVarTableRowCnt + kVarTableIdxLen]; + FMK_INT64_MULCHECK(idx, kVarTableRowCnt); + auto line_idx = idx * kVarTableRowCnt; + addr_infos[idx] = {static_cast(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr, + data[line_idx + kVarTableIdxLen]}; + local_addr += data[line_idx + kVarTableIdxLen]; } return SUCCESS; @@ -200,8 +204,8 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector done_callback) { GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName()); auto EnqueueRemoteAccess = - (HcclResult(*)(const string &, const vector &, - std::function))dlsym(context.handle_, "EnqueueRemoteAccess"); + (HcclResult(*)(const string &, const vector &, + std::function))dlsym(context.handle_, "EnqueueRemoteAccess"); if (EnqueueRemoteAccess == nullptr) { GELOGE(FAILED, "Failed to invoke EnqueueRemoteAccess hcom unknown node function."); if (dlclose(context.handle_) != 0) { diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h index ddf6eb3a..8aecc3ad 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc old mode 100644 new mode 100755 index 1c98abee..49fc3de4 --- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc +++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc @@ -31,8 +31,8 @@ Status HostNodeTaskBase::UpdateArgs(TaskContext &) { Status HostNodeTaskBase::ExecuteAsync(TaskContext &context, std::function done_callback) { GELOGD("[%s] Start execute.", context.GetNodeName()); - GE_CHK_STATUS_RET(Execute(context), "node:%s type:%s, task execute failed.", node_->GetName().c_str(), - node_->GetType().c_str()) + GE_CHK_STATUS_RET(Execute(context), "node:%s type:%s, task execute failed.", + node_->GetName().c_str(), node_->GetType().c_str()) if (done_callback) { GELOGD("[%s] Start invoke callback.", context.GetNodeName()); done_callback(); @@ -49,7 +49,8 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { for (int32_t i = 0; i < context.NumInputs(); ++i) { const auto &input_desc = op_desc->GetInputDesc(i); GE_CHECK_NOTNULL(context.GetInput(i)); - auto in_tensor = MakeShared(input_desc, reinterpret_cast(context.GetInput(i)->GetData()), + auto in_tensor = MakeShared(input_desc, + reinterpret_cast(context.GetInput(i)->GetData()), context.GetInput(i)->GetSize()); GE_CHECK_NOTNULL(in_tensor); in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); @@ -70,8 +71,9 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { } auto tensor = context.GetOutput(i); GE_CHECK_NOTNULL(tensor); - auto out_tensor = - MakeShared(output_desc, reinterpret_cast(tensor->GetData()), tensor->GetSize()); + auto out_tensor = MakeShared(output_desc, + reinterpret_cast(tensor->GetData()), + tensor->GetSize()); GE_CHECK_NOTNULL(out_tensor); out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); @@ -87,22 +89,24 @@ Status HostCpuNodeTask::Execute(TaskContext &context) { RunContext run_context; auto host_kernel = hybrid::host_cpu::KernelFactory::Instance().CreateKernel(node_); if (host_kernel == nullptr) { - GELOGE(UNSUPPORTED, "node %s type %s is not supported by host kernel.", node_->GetName().c_str(), - node_->GetType().c_str()); + GELOGE(UNSUPPORTED, "node %s type %s is not supported by host kernel.", + node_->GetName().c_str(), node_->GetType().c_str()); return UNSUPPORTED; } Status compute_ret = host_kernel->Compute(context); if (compute_ret != SUCCESS) { - GELOGE(compute_ret, "node %s type %s compute failed or not imply.", node_->GetName().c_str(), - node_->GetType().c_str()); + GELOGE(compute_ret, "node %s type %s compute failed or not imply.", + node_->GetName().c_str(), node_->GetType().c_str()); return compute_ret; } return SUCCESS; } -Status HostCpuNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { return task.UpdateArgs(context); } +Status HostCpuNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { + return task.UpdateArgs(context); +} Status HostCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const { @@ -110,7 +114,9 @@ Status HostCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &no auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); auto mem_type = static_cast(HOST_DDR); - (void)AttrUtils::SetInt(op_desc, ATTR_OUTPUT_MEMORY_TYPE, mem_type); + for (size_t i = 0; i < op_desc->GetOutputsSize(); i++) { + (void)AttrUtils::SetInt(op_desc->MutableOutputDesc(i), ATTR_OUTPUT_MEMORY_TYPE, mem_type); + } const std::string &name = node->GetName(); const std::string &type = node->GetType(); if (HostCpuEngine::GetInstance().CheckSupported(type)) { @@ -128,4 +134,4 @@ Status HostCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &no return SUCCESS; } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.h b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.h index 036a0c60..10657379 100644 --- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.h +++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.h @@ -58,8 +58,10 @@ class HostCpuNodeExecutor : public NodeExecutor { public: Status PrepareTask(NodeTask &task, TaskContext &context) const override; - Status LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const override; + Status LoadTask(const HybridModel &model, + const NodePtr &node, + std::shared_ptr &task) const override; }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_KERNEL_HOST_CPU_NODE_EXECUTOR_H_ +#endif // GE_HYBRID_KERNEL_HOST_CPU_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc index 3655fcdb..3bf71013 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ const size_t kAssignInputNum = 2; const size_t kAssignRefInputIndex = 0; const size_t kAssignValueInputIndex = 1; const size_t kAssignRefOutputIndex = 0; -} // namespace +} namespace ge { namespace hybrid { @@ -37,19 +37,19 @@ Status AssignKernel::Compute(TaskContext& context) { const auto value_tensor = context.GetInput(kAssignValueInputIndex); GE_CHECK_NOTNULL(value_tensor); if (value_tensor->GetSize() > ref_tensor->GetSize()) { - GELOGE(INTERNAL_ERROR, "[%s] value_input_size=%zu, but ref_input_size=%zu.", node_->GetName().c_str(), - value_tensor->GetSize(), ref_tensor->GetSize()); + GELOGE(INTERNAL_ERROR, "[%s] value_input_size=%zu, but ref_input_size=%zu.", + node_->GetName().c_str(), value_tensor->GetSize(), ref_tensor->GetSize()); return INTERNAL_ERROR; } - GELOGI("[%s] value_input_data=%p, ref_input_size=%zu, value_input_size=%zu.", node_->GetName().c_str(), - ref_tensor->GetData(), ref_tensor->GetSize(), value_tensor->GetSize()); + GELOGI("[%s] value_input_data=%p, ref_input_size=%zu, value_input_size=%zu.", + node_->GetName().c_str(), ref_tensor->GetData(), ref_tensor->GetSize(), value_tensor->GetSize()); if (value_tensor->GetSize() > 0) { GE_CHK_RT_RET(rtMemcpy(ref_tensor->MutableData(), ref_tensor->GetSize(), value_tensor->GetData(), value_tensor->GetSize(), RT_MEMCPY_HOST_TO_HOST)); } - GE_CHK_STATUS_RET(context.SetOutput(kAssignRefOutputIndex, *ref_tensor), "[%s] Failed to set output.", - context.GetNodeName()); + GE_CHK_STATUS_RET(context.SetOutput(kAssignRefOutputIndex, *ref_tensor), + "[%s] Failed to set output.", context.GetNodeName()); GELOGI("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h index c3b4862b..bfa24325 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ class AssignKernel : public Kernel { * @brief compute for node_task. * @return result */ - Status Compute(TaskContext &context) override; + Status Compute(TaskContext& context) override; }; } // namespace host_cpu } // namespace hybrid diff --git a/ge/hybrid/node_executor/host_cpu/kernel/kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/kernel.h index 4fe8f8a3..0a9f32b7 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ class Kernel { public: Kernel(const NodePtr &node) : node_(node) {} virtual ~Kernel() = default; - virtual Status Compute(TaskContext &context) = 0; + virtual Status Compute(TaskContext& context) = 0; protected: const NodePtr &node_; diff --git a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc index 47e6e534..ff5a7c6d 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h index 302a7e16..6677ce4a 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ class NoOpKernel : public Kernel { * @brief compute for node_task. * @return result */ - Status Compute(TaskContext &context) override; + Status Compute(TaskContext& context) override; }; } // namespace host_cpu } // namespace hybrid diff --git a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc old mode 100644 new mode 100755 index 7e87c114..37b07e37 --- a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ const char *const kAttrDtype = "dtype"; namespace ge { namespace hybrid { namespace host_cpu { -Status RandomUniformKernel::Compute(TaskContext &context) { +Status RandomUniformKernel::Compute(TaskContext& context) { GELOGI("[%s] compute begin.", node_->GetName().c_str()); int64_t seed = 0; @@ -72,7 +72,7 @@ Status RandomUniformKernel::Compute(TaskContext &context) { template Status RandomUniformKernel::Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, - TaskContext &context) { + TaskContext& context) { GE_CHECK_NOTNULL(op_desc_ptr); // RandomUniformOp has and only has one output int64_t data_num = op_desc_ptr->GetOutputDesc(0).GetShape().GetShapeSize(); @@ -80,8 +80,10 @@ Status RandomUniformKernel::Generate(const ge::OpDescPtr &op_desc_ptr, int64_t s attr.SetMemType(HOST_DDR); auto tensor_size = data_num * sizeof(T); TensorValue tensor; - GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr), "[%s] Failed to allocate output of size %zu", - context.GetNodeName(), tensor_size); + GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr), + "[%s] Failed to allocate output of size %zu", + context.GetNodeName(), + tensor_size); auto *buf = reinterpret_cast(tensor.MutableData()); int64_t final_seed; @@ -106,7 +108,7 @@ Status RandomUniformKernel::Generate(const ge::OpDescPtr &op_desc_ptr, int64_t s } Status RandomUniformKernel::GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, - TaskContext &context) { + TaskContext& context) { GE_CHECK_NOTNULL(op_desc_ptr); // RandomUniformOp has and only has one output int64_t data_num = op_desc_ptr->GetOutputDesc(0).GetShape().GetShapeSize(); @@ -114,8 +116,10 @@ Status RandomUniformKernel::GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64 attr.SetMemType(HOST_DDR); auto tensor_size = data_num * sizeof(fp16_t); TensorValue tensor; - GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr), "[%s] Failed to allocate output of size %zu", - context.GetNodeName(), tensor_size); + GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr), + "[%s] Failed to allocate output of size %zu", + context.GetNodeName(), + tensor_size); auto *buf = reinterpret_cast(tensor.MutableData()); int64_t final_seed; diff --git a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h old mode 100644 new mode 100755 index 7024b103..30557064 --- a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,13 +33,13 @@ class RandomUniformKernel : public Kernel { * @brief compute for node_task. * @return result */ - Status Compute(TaskContext &context) override; + Status Compute(TaskContext& context) override; private: template - Status Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext &context); + Status Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext& context); - static Status GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext &context); + static Status GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext& context); }; } // namespace host_cpu } // namespace hybrid diff --git a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc index db5c0f9c..2a836458 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h index 1625e49e..f20d6221 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ class VariableKernel : public Kernel { * @brief compute for node_task. * @return result */ - Status Compute(TaskContext &context) override; + Status Compute(TaskContext& context) override; }; } // namespace host_cpu } // namespace hybrid diff --git a/ge/hybrid/node_executor/host_cpu/kernel_factory.cc b/ge/hybrid/node_executor/host_cpu/kernel_factory.cc index 83899fa6..aabae999 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel_factory.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel_factory.h b/ge/hybrid/node_executor/host_cpu/kernel_factory.h index 4923756b..d03f12fc 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel_factory.h +++ b/ge/hybrid/node_executor/host_cpu/kernel_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,8 +76,10 @@ class KernelRegistrar { KernelRegistrar &operator=(KernelRegistrar &&) = delete; }; -#define REGISTER_KERNEL_CREATOR(type, clazz) \ - std::shared_ptr Creator_##type##Kernel(const NodePtr &node) { return MakeShared(node); } \ +#define REGISTER_KERNEL_CREATOR(type, clazz) \ + std::shared_ptr Creator_##type##Kernel(const NodePtr &node) { \ + return MakeShared(node); \ + } \ KernelRegistrar g_##type##Kernel_creator(#type, Creator_##type##Kernel) } // namespace host_cpu } // namespace hybrid diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc old mode 100644 new mode 100755 index 8de15ea0..6fdfe8e1 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -19,27 +19,30 @@ #include "graph/utils/node_utils.h" #include "init/gelib.h" #include "hybrid/model/hybrid_model.h" +#include "graph/debug/ge_attr_define.h" namespace ge { namespace hybrid { namespace { const char *const kEngineNameAiCore = "AIcoreEngine"; const char *const kEngineNameGeLocal = "DNN_VM_GE_LOCAL_OP_STORE"; -const char *const kEngineNameAiCpu = "aicpu_kernel"; +const char *const kEngineNameAiCpu = "aicpu_ascend_kernel"; +const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel"; const char *const kEngineNameHccl = "ops_kernel_info_hccl"; const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; -} // namespace +} Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); - GE_CHK_STATUS_RET_NOLOG(task.UpdateTilingData(context)); // update op_desc before alloc ws + GE_CHK_STATUS_RET_NOLOG(task.UpdateTilingData(context)); // update op_desc before alloc ws GE_CHK_STATUS_RET_NOLOG(context.AllocateWorkspaces()); GE_CHK_STATUS_RET_NOLOG(task.UpdateArgs(context)); return SUCCESS; } Status NodeExecutor::ExecuteTask(NodeTask &task, TaskContext &context, const std::function &callback) const { - GE_CHK_STATUS_RET(task.ExecuteAsync(context, callback), "Failed to execute task. node = %s", + GE_CHK_STATUS_RET(task.ExecuteAsync(context, callback), + "Failed to execute task. node = %s", context.GetNodeItem().NodeName().c_str()); return SUCCESS; } @@ -61,6 +64,7 @@ Status NodeExecutorManager::EnsureInitialized() { engine_mapping_.emplace(kEngineNameAiCore, NodeExecutorManager::ExecutorType::AICORE); engine_mapping_.emplace(kEngineNameGeLocal, NodeExecutorManager::ExecutorType::GE_LOCAL); + engine_mapping_.emplace(kEngineNameAiCpuTf, NodeExecutorManager::ExecutorType::AICPU_TF); engine_mapping_.emplace(kEngineNameAiCpu, NodeExecutorManager::ExecutorType::AICPU_TF); engine_mapping_.emplace(kEngineNameHccl, NodeExecutorManager::ExecutorType::HCCL); engine_mapping_.emplace(kEngineNameRts, NodeExecutorManager::ExecutorType::RTS); @@ -86,8 +90,13 @@ Status NodeExecutorManager::EnsureInitialized() { NodeExecutorManager::ExecutorType NodeExecutorManager::ResolveExecutorType(Node &node) const { auto op_type = node.GetType(); if (op_type == PARTITIONEDCALL) { + const auto &subgraph = NodeUtils::GetSubgraph(node, 0); + if (subgraph != nullptr && subgraph->GetGraphUnknownFlag()) { + GELOGD("node %s was marked as unknown shape in node_executor.", node.GetName().c_str()); + return ExecutorType::DYNAMIC_SUBGRAPH; + } bool is_dynamic = false; - (void)NodeUtils::GetNodeUnknownShapeStatus(node, is_dynamic); + (void) NodeUtils::GetNodeUnknownShapeStatus(node, is_dynamic); if (is_dynamic) { return ExecutorType::DYNAMIC_SUBGRAPH; } @@ -103,7 +112,7 @@ NodeExecutorManager::ExecutorType NodeExecutorManager::ResolveExecutorType(Node return ExecutorType::CONTROL_OP; } - auto op_desc = node.GetOpDesc(); // checked before + auto op_desc = node.GetOpDesc(); // checked before const auto &lib_name = op_desc->GetOpKernelLibName(); auto it = engine_mapping_.find(lib_name); if (it == engine_mapping_.end()) { @@ -146,8 +155,10 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const { auto it = kernel_stores_.find(op_desc->GetOpKernelLibName()); if (it == kernel_stores_.end()) { - GELOGE(INTERNAL_ERROR, "Failed to get OpKernelStore. libName = %s, node = %s", - op_desc->GetOpKernelLibName().c_str(), op_desc->GetName().c_str()); + GELOGE(INTERNAL_ERROR, + "Failed to get OpKernelStore. libName = %s, node = %s", + op_desc->GetOpKernelLibName().c_str(), + op_desc->GetName().c_str()); return INTERNAL_ERROR; } @@ -163,8 +174,8 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const { int64_t output_mem_size = 0; GE_CHK_STATUS_RET(TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size), "hccl calc tensor mem size failed."); - output_mem_size = - ((output_mem_size + MEMORY_ALIGN_RATIO * MEMORY_ALIGN_SIZE - 1) / MEMORY_ALIGN_SIZE) * MEMORY_ALIGN_SIZE; + output_mem_size = ((output_mem_size + + MEMORY_ALIGN_RATIO * MEMORY_ALIGN_SIZE - 1) / MEMORY_ALIGN_SIZE) * MEMORY_ALIGN_SIZE; TensorUtils::SetSize(output_tensor, output_mem_size); GE_CHK_STATUS_RET(op_desc->UpdateOutputDesc(static_cast(i), output_tensor), "hccl update output size failed."); diff --git a/ge/hybrid/node_executor/node_executor.h b/ge/hybrid/node_executor/node_executor.h index 79726b09..93b152e2 100644 --- a/ge/hybrid/node_executor/node_executor.h +++ b/ge/hybrid/node_executor/node_executor.h @@ -38,20 +38,26 @@ class NodeTask { * @param context instance of TaskContext * @return SUCCESS on success, error code otherwise */ - virtual Status UpdateTilingData(TaskContext &context) { return SUCCESS; } + virtual Status UpdateTilingData(TaskContext &context) { + return SUCCESS; + } /** * Init * @param context instance of TaskContext * @return SUCCESS on success, error code otherwise */ - virtual Status Init(TaskContext &context) { return SUCCESS; } + virtual Status Init(TaskContext &context) { + return SUCCESS; + } /** * Whether this task supports dynamic shape * @return true if this task supports dynamic shape, false otherwise */ - virtual bool IsSupportDynamicShape() { return true; } + virtual bool IsSupportDynamicShape() { + return true; + } /** * Update args for execution @@ -79,13 +85,17 @@ class NodeExecutor { * Initialize node executor * @return SUCCESS on success, error code otherwise */ - virtual Status Initialize() { return SUCCESS; } + virtual Status Initialize() { + return SUCCESS; + } /** * Finalize node executor * @return SUCCESS on success, error code otherwise */ - virtual Status Finalize() { return SUCCESS; } + virtual Status Finalize() { + return SUCCESS; + } /** * Load task in load stage @@ -94,7 +104,9 @@ class NodeExecutor { * @param task generated node task * @return SUCCESS on success, error code otherwise */ - virtual Status LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const; + virtual Status LoadTask(const HybridModel &model, + const NodePtr &node, + std::shared_ptr &task) const; /** * Compile task in run stage @@ -103,7 +115,9 @@ class NodeExecutor { * @param task generated node task * @return SUCCESS on success, error code otherwise */ - virtual Status CompileTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const; + virtual Status CompileTask(const HybridModel &model, + const NodePtr &node, + std::shared_ptr &task) const; /** * Preparation actions before execution @@ -196,21 +210,24 @@ class NodeExecutorManager { class NodeExecutorRegistrar { public: - NodeExecutorRegistrar(NodeExecutorManager::ExecutorType executor_type, NodeExecutor *(*builder)()); + NodeExecutorRegistrar(NodeExecutorManager::ExecutorType executor_type, + NodeExecutor *(*builder)()); ~NodeExecutorRegistrar() = default; }; } // namespace hybrid } // namespace ge #define REGISTER_NODE_EXECUTOR_BUILDER(engine_type, executor) \ - REGISTER_NODE_EXECUTOR_BUILDER_UNIQ_HELPER(__COUNTER__, engine_type, executor) + REGISTER_NODE_EXECUTOR_BUILDER_UNIQ_HELPER(__COUNTER__, engine_type, executor) #define REGISTER_NODE_EXECUTOR_BUILDER_UNIQ_HELPER(ctr, engine_type, executor) \ - REGISTER_NODE_EXECUTOR_BUILDER_UNIQ(ctr, engine_type, executor) + REGISTER_NODE_EXECUTOR_BUILDER_UNIQ(ctr, engine_type, executor) -#define REGISTER_NODE_EXECUTOR_BUILDER_UNIQ(ctr, engine_type, executor) \ - static ::ge::hybrid::NodeExecutorRegistrar register_##ctr __attribute__((unused)) = \ - ::ge::hybrid::NodeExecutorRegistrar( \ - engine_type, []() -> ::ge::hybrid::NodeExecutor * { return new (std::nothrow) executor(); }) +#define REGISTER_NODE_EXECUTOR_BUILDER_UNIQ(ctr, engine_type, executor) \ + static ::ge::hybrid::NodeExecutorRegistrar register_##ctr \ + __attribute__((unused)) = \ + ::ge::hybrid::NodeExecutorRegistrar(engine_type, []()->::ge::hybrid::NodeExecutor* { \ + return new (std::nothrow) executor(); \ + }) -#endif // GE_HYBRID_NODE_EXECUTOR_NODE_EXECUTOR_H_ +#endif // GE_HYBRID_NODE_EXECUTOR_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc old mode 100644 new mode 100755 index 4c9cf7bf..f01cb21e --- a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc +++ b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc @@ -21,7 +21,9 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::DYNAMIC_SUBGRAPH, PartitionedCallNodeExecutor); -PartitionedCallNodeTask::PartitionedCallNodeTask(const GraphItem *graph_item) : graph_item_(graph_item) {} +PartitionedCallNodeTask::PartitionedCallNodeTask(const GraphItem *graph_item) + : graph_item_(graph_item) { +} PartitionedCallNodeTask::~PartitionedCallNodeTask() { GELOGD("[%s] PartitionedCallNodeTask destroyed.", graph_item_->GetName().c_str()); @@ -29,18 +31,21 @@ PartitionedCallNodeTask::~PartitionedCallNodeTask() { Status PartitionedCallNodeTask::Init(TaskContext &context) { auto execution_context = const_cast(context.GetExecutionContext()); - subgraph_executor_.reset(new (std::nothrow) SubgraphExecutor(graph_item_, execution_context)); + subgraph_executor_.reset(new(std::nothrow)SubgraphExecutor(graph_item_, execution_context)); GE_CHECK_NOTNULL(subgraph_executor_); return SUCCESS; } Status PartitionedCallNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { - GE_CHK_STATUS_RET(subgraph_executor_->ExecuteAsync(context), "[%s] Failed to set inputs", - graph_item_->GetName().c_str()); + GE_CHK_STATUS_RET(subgraph_executor_->ExecuteAsync(context), + "[%s] Failed to set inputs", graph_item_->GetName().c_str()); - auto callback = [=]() { Callback(done_callback); }; + auto callback = [=]() { + Callback(done_callback); + }; - GE_CHK_STATUS_RET(context.RegisterCallback(callback), "[%s] Failed to register callback", + GE_CHK_STATUS_RET(context.RegisterCallback(callback), + "[%s] Failed to register callback", graph_item_->GetName().c_str()); GELOGD("[%s] Done executing subgraph successfully.", graph_item_->GetName().c_str()); return SUCCESS; @@ -58,16 +63,19 @@ Status PartitionedCallNodeTask::Callback(const std::function &done_callb return SUCCESS; } -Status PartitionedCallNodeTask::UpdateArgs(TaskContext &context) { return SUCCESS; } +Status PartitionedCallNodeTask::UpdateArgs(TaskContext &context) { + return SUCCESS; +} -Status PartitionedCallNodeExecutor::LoadTask(const ge::hybrid::HybridModel &model, const ge::NodePtr &node, +Status PartitionedCallNodeExecutor::LoadTask(const ge::hybrid::HybridModel &model, + const ge::NodePtr &node, std::shared_ptr &task) const { GELOGD("Load dynamic partitioned call: [%s]", node->GetName().c_str()); auto subgraph = NodeUtils::GetSubgraph(*node, 0); GE_CHECK_NOTNULL(subgraph); auto partitioned_call = model.GetSubgraphItem(subgraph); GE_CHECK_NOTNULL(partitioned_call); - task.reset(new (std::nothrow) PartitionedCallNodeTask(partitioned_call)); + task.reset(new(std::nothrow) PartitionedCallNodeTask(partitioned_call)); GE_CHECK_NOTNULL(task); GELOGD("Done loading dynamic partitioned call: [%s]", node->GetName().c_str()); return SUCCESS; diff --git a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h index fd87d6c1..9ea544a1 100644 --- a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h +++ b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h @@ -51,4 +51,4 @@ class PartitionedCallNodeExecutor : public NodeExecutor { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_NODE_EXECUTOR_SUBGRAPH_SUBGRAPH_EXECUTOR_H_ +#endif // GE_HYBRID_NODE_EXECUTOR_SUBGRAPH_SUBGRAPH_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.cc b/ge/hybrid/node_executor/rts/rts_node_executor.cc index 51241e55..18b875fd 100644 --- a/ge/hybrid/node_executor/rts/rts_node_executor.cc +++ b/ge/hybrid/node_executor/rts/rts_node_executor.cc @@ -36,8 +36,12 @@ Status IdentityNodeTask::DoCopyTensor(TaskContext &context, int index) { auto output = context.MutableOutput(index); GE_CHECK_NOTNULL(input); GE_CHECK_NOTNULL(output); - GE_CHK_RT_RET(rtMemcpyAsync(output->MutableData(), output->GetSize(), input->GetData(), copy_size, - RT_MEMCPY_DEVICE_TO_DEVICE, context.GetStream())); + GE_CHK_RT_RET(rtMemcpyAsync(output->MutableData(), + output->GetSize(), + input->GetData(), + copy_size, + RT_MEMCPY_DEVICE_TO_DEVICE, + context.GetStream())); } else { GELOGW("[%s] index = %d, copy size = 0", context.GetNodeName(), index); } @@ -57,7 +61,9 @@ Status IdentityNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { GELOGD("[%s] Start to execute.", context.GetNodeName()); diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.h b/ge/hybrid/node_executor/rts/rts_node_executor.h index 9da28966..2576b73b 100644 --- a/ge/hybrid/node_executor/rts/rts_node_executor.h +++ b/ge/hybrid/node_executor/rts/rts_node_executor.h @@ -42,4 +42,4 @@ class RtsNodeExecutor : public NodeExecutor { } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_EXECUTOR_H_ +#endif // GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index e49a2b43..29fc777b 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -24,9 +24,11 @@ namespace ge { namespace hybrid { -TaskContext::TaskContext(GraphExecutionContext *execution_context, const NodeItem *node_item, +TaskContext::TaskContext(GraphExecutionContext *execution_context, + const NodeItem *node_item, SubgraphContext *subgraph_context) - : node_item_(node_item), execution_context_(execution_context), subgraph_context_(subgraph_context) {} + : node_item_(node_item), execution_context_(execution_context), subgraph_context_(subgraph_context) { +} TaskContext::~TaskContext() { GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str()); @@ -43,19 +45,25 @@ TaskContext::~TaskContext() { } } -std::unique_ptr TaskContext::Create(const NodeItem &node_item, GraphExecutionContext *execution_context, +std::unique_ptr TaskContext::Create(const NodeItem &node_item, + GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) { GELOGI("[%s] To create task context, input start = %d, num_inputs = %d, output start = %d, num_outputs = %d.", - node_item.NodeName().c_str(), node_item.input_start, node_item.num_inputs, node_item.output_start, + node_item.NodeName().c_str(), + node_item.input_start, + node_item.num_inputs, + node_item.output_start, node_item.num_outputs); if (node_item.input_start < 0 || node_item.output_start < 0) { - GELOGE(INTERNAL_ERROR, "NodeItem not property initialized. input_start = %d, output_start = %d", - node_item.input_start, node_item.output_start); + GELOGE(INTERNAL_ERROR, + "NodeItem not property initialized. input_start = %d, output_start = %d", + node_item.input_start, + node_item.output_start); return nullptr; } - auto task_context = - std::unique_ptr(new (std::nothrow) TaskContext(execution_context, &node_item, subgraph_context)); + auto task_context = std::unique_ptr( + new(std::nothrow)TaskContext(execution_context, &node_item, subgraph_context)); if (task_context == nullptr) { GELOGE(MEMALLOC_FAILED, "[%s] Failed to create instance of TaskContext.", node_item.NodeName().c_str()); return nullptr; @@ -68,9 +76,13 @@ std::unique_ptr TaskContext::Create(const NodeItem &node_item, Grap return task_context; } -int TaskContext::NumInputs() const { return node_item_->num_inputs; } +int TaskContext::NumInputs() const { + return node_item_->num_inputs; +} -int TaskContext::NumOutputs() const { return node_item_->num_outputs; } +int TaskContext::NumOutputs() const { + return node_item_->num_outputs; +} TensorValue *TaskContext::MutableInput(int index) { if (index < 0 || index >= node_item_->num_inputs) { @@ -99,7 +111,9 @@ TensorValue *TaskContext::MutableOutput(int index) { return outputs_start_ + index; } -std::size_t TaskContext::NumWorkspaces() const { return workspaces_.size(); } +std::size_t TaskContext::NumWorkspaces() const { + return workspaces_.size(); +} void *TaskContext::MutableWorkspace(int index) { if (index < 0 || static_cast(index) >= workspaces_.size()) { @@ -175,9 +189,13 @@ Status TaskContext::AllocateTensor(const GeTensorDesc &tensor_desc, TensorValue return SUCCESS; } -Status TaskContext::AllocateOutput(int index, const GeTensorDesc &tensor_desc, TensorValue **tensor, +Status TaskContext::AllocateOutput(int index, + const GeTensorDesc &tensor_desc, + TensorValue **tensor, AllocationAttr *attr) { - GELOGI("To allocate output for node: %s. index = %d, tensor desc = %s", node_item_->NodeName().c_str(), index, + GELOGI("To allocate output for node: %s. index = %d, tensor desc = %s", + node_item_->NodeName().c_str(), + index, TensorDesc2String(tensor_desc).c_str()); if (index < 0 || index >= node_item_->num_outputs) { @@ -193,8 +211,11 @@ Status TaskContext::AllocateOutput(int index, const GeTensorDesc &tensor_desc, T auto it = node_item_->ref_outputs.find(index); if (it != node_item_->ref_outputs.end()) { auto &ref_node = it->second; - GELOGD("source node of %s:%d = %s, op_type = %s", node_item_->NodeName().c_str(), index, - ref_node->GetName().c_str(), ref_node->GetType().c_str()); + GELOGD("source node of %s:%d = %s, op_type = %s", + node_item_->NodeName().c_str(), + index, + ref_node->GetName().c_str(), + ref_node->GetType().c_str()); TensorValue *ref_tensor = execution_context_->model->GetVariable(ref_node->GetName()); GE_CHECK_NOTNULL(ref_tensor); @@ -206,7 +227,9 @@ Status TaskContext::AllocateOutput(int index, const GeTensorDesc &tensor_desc, T outputs_start_[index] = inputs_start_[reuse_input->second]; } else { GE_CHK_STATUS_RET_NOLOG(AllocateTensor(tensor_desc, outputs_start_[index], attr)); - GELOGD("Allocating output successfully. node: %s. index = %d, size = %zu", node_item_->NodeName().c_str(), index, + GELOGD("Allocating output successfully. node: %s. index = %d, size = %zu", + node_item_->NodeName().c_str(), + index, outputs_start_[index].GetSize()); } } @@ -227,7 +250,7 @@ Status TaskContext::AllocateOutputs(AllocationAttr *attr) { const auto &output_desc = node_item_->op_desc->MutableOutputDesc(i); GE_CHECK_NOTNULL(output_desc); uint32_t mem_type = 0; - (void)AttrUtils::GetInt(node_item_->op_desc, ATTR_OUTPUT_MEMORY_TYPE, mem_type); + (void)AttrUtils::GetInt(output_desc, ATTR_OUTPUT_MEMORY_TYPE, mem_type); if (attr == nullptr) { auto tmp_attr = AllocationAttr(0, nullptr, static_cast(mem_type)); GE_CHK_STATUS_RET_NOLOG(AllocateOutput(i, *output_desc, nullptr, &tmp_attr)); @@ -251,7 +274,9 @@ Status TaskContext::AllocateTensor(size_t size, TensorValue &tensor, AllocationA return SUCCESS; } -const NodeItem &TaskContext::GetNodeItem() const { return *node_item_; } +const NodeItem &TaskContext::GetNodeItem() const { + return *node_item_; +} Status TaskContext::SetOutput(int index, const TensorValue &tensor) { if (index < 0 || index >= node_item_->num_outputs) { @@ -259,16 +284,25 @@ Status TaskContext::SetOutput(int index, const TensorValue &tensor) { return PARAM_INVALID; } - GELOGD("Set %s:%d with tensor: %s", node_item_->NodeName().c_str(), index, tensor.DebugString().c_str()); + GELOGD("Set %s:%d with tensor: %s", + node_item_->NodeName().c_str(), + index, + tensor.DebugString().c_str()); outputs_start_[index] = tensor; return SUCCESS; } -rtStream_t TaskContext::GetStream() { return execution_context_->stream; } +rtStream_t TaskContext::GetStream() { + return execution_context_->stream; +} -int64_t TaskContext::GetSessionId() const { return execution_context_->session_id; } +int64_t TaskContext::GetSessionId() const { + return execution_context_->session_id; +} -Status TaskContext::GetStatus() const { return status_; } +Status TaskContext::GetStatus() const { + return status_; +} void TaskContext::SetStatus(Status status) { status_ = status; @@ -310,20 +344,27 @@ Status TaskContext::PropagateOutputs() { auto dst_node_item = dst_input_index_and_node.second; auto input_offset = dst_node_item->input_start + dst_input_idx; GELOGI( - "Propagate output of node %s, output index = %d, dst node = %s, " - "dst_input_index = %d, dst_input_offset = %d.", - node_item_->NodeName().c_str(), i, dst_node_item->NodeName().c_str(), dst_input_idx, input_offset); + "Propagate output of node %s, output index = %d, dst node = %s, " + "dst_input_index = %d, dst_input_offset = %d.", + node_item_->NodeName().c_str(), + i, + dst_node_item->NodeName().c_str(), + dst_input_idx, + input_offset); if (subgraph_context_->all_inputs_.size() <= static_cast(input_offset)) { - GELOGE(INTERNAL_ERROR, "[%s] input index out of range. index = %d, total input num = %zu", GetNodeName(), - input_offset, subgraph_context_->all_inputs_.size()); + GELOGE(INTERNAL_ERROR, + "[%s] input index out of range. index = %d, total input num = %zu", + GetNodeName(), + input_offset, + subgraph_context_->all_inputs_.size()); return INTERNAL_ERROR; } subgraph_context_->all_inputs_[input_offset] = *tensor; if (execution_context_->trace_enabled) { - subgraph_context_->all_inputs_[input_offset].SetName(node_item_->NodeName() + "_in_" + - std::to_string(dst_input_idx)); + subgraph_context_->all_inputs_[input_offset].SetName( + node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx)); } } } @@ -331,9 +372,13 @@ Status TaskContext::PropagateOutputs() { return SUCCESS; } -const void *TaskContext::GetVarBaseAddr() { return execution_context_->model->GetVarMemBase(); } +const void *TaskContext::GetVarBaseAddr() { + return execution_context_->model->GetVarMemBase(); +} -const char *TaskContext::GetNodeName() const { return node_item_->NodeName().c_str(); } +const char *TaskContext::GetNodeName() const { + return node_item_->NodeName().c_str(); +} void TaskContext::ReleaseInput(int index) { auto input_tensor = MutableInput(index); @@ -359,24 +404,38 @@ GeTensorDescPtr TaskContext::MutableOutputDesc(int index) { return node_item_->op_desc->MutableOutputDesc(static_cast(index)); } -bool TaskContext::IsForceInferShape() const { return force_infer_shape_; } +bool TaskContext::IsForceInferShape() const { + return force_infer_shape_; +} -void TaskContext::SetForceInferShape(bool force_infer_shape) { force_infer_shape_ = force_infer_shape; } +void TaskContext::SetForceInferShape(bool force_infer_shape) { + force_infer_shape_ = force_infer_shape; +} -void TaskContext::NodeDone() { subgraph_context_->NodeDone(node_item_->node); } +void TaskContext::NodeDone() { + subgraph_context_->NodeDone(node_item_->node); +} void TaskContext::OnError(Status error) { subgraph_context_->OnError(error); execution_context_->SetErrorCode(error); } -bool TaskContext::IsTraceEnabled() const { return execution_context_->trace_enabled; } +bool TaskContext::IsTraceEnabled() const { + return execution_context_->trace_enabled; +} -TensorValue *TaskContext::GetVariable(const std::string &name) { return execution_context_->model->GetVariable(name); } +TensorValue *TaskContext::GetVariable(const std::string &name) { + return execution_context_->model->GetVariable(name); +} -uint64_t TaskContext::GetIterationNumber() const { return iteration_; } +uint64_t TaskContext::GetIterationNumber() const { + return iteration_; +} -bool TaskContext::IsDumpEnabled() const { return execution_context_->dump_enabled; } +bool TaskContext::IsDumpEnabled() const { + return execution_context_->dump_enabled; +} Status TaskContext::TryExecuteCallback(const function &callback_fun) const { if (!callback_fun) { @@ -390,6 +449,8 @@ Status TaskContext::TryExecuteCallback(const function &callback_fun) con callback_fun(); return SUCCESS; } -const DumpProperties &TaskContext::GetDumpProperties() const { return execution_context_->dump_properties; } +const DumpProperties &TaskContext::GetDumpProperties() const { + return execution_context_->dump_properties; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index ed45116d..d52ab0be 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -34,7 +34,8 @@ class SubgraphContext; class TaskContext { public: - static std::unique_ptr Create(const NodeItem &node_item, GraphExecutionContext *execution_context, + static std::unique_ptr Create(const NodeItem &node_item, + GraphExecutionContext *execution_context, SubgraphContext *subgraph_context); ~TaskContext(); @@ -62,7 +63,9 @@ class TaskContext { void OnError(Status error); Status SetOutput(int index, const TensorValue &tensor); - Status AllocateOutput(int index, const GeTensorDesc &tensor_desc, TensorValue **tensor, + Status AllocateOutput(int index, + const GeTensorDesc &tensor_desc, + TensorValue **tensor, AllocationAttr *attr = nullptr); Status AllocateOutputs(AllocationAttr *attr = nullptr); Status AllocateWorkspaces(); @@ -72,9 +75,11 @@ class TaskContext { bool IsDumpEnabled() const; - const DumpProperties &GetDumpProperties() const; + const DumpProperties& GetDumpProperties() const; - const GraphExecutionContext *GetExecutionContext() { return execution_context_; } + const GraphExecutionContext *GetExecutionContext() { + return execution_context_; + } Status AllocateTensor(size_t size, TensorValue &tensor, AllocationAttr *attr = nullptr); void *MutableWorkspace(int index); @@ -94,7 +99,9 @@ class TaskContext { void *handle_ = nullptr; private: - TaskContext(GraphExecutionContext *execution_context, const NodeItem *node_item, SubgraphContext *subgraph_context); + TaskContext(GraphExecutionContext *execution_context, + const NodeItem *node_item, + SubgraphContext *subgraph_context); static string TensorDesc2String(const GeTensorDesc &desc); Status AllocateTensor(const GeTensorDesc &tensor_desc, TensorValue &tensor, AllocationAttr *attr); @@ -111,4 +118,4 @@ class TaskContext { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_KERNEL_TASK_CONTEXT_H_ +#endif // GE_HYBRID_KERNEL_TASK_CONTEXT_H_ diff --git a/ge/inc/graph_pass.h b/ge/inc/graph_pass.h index d4abdd2f..a8732cb4 100644 --- a/ge/inc/graph_pass.h +++ b/ge/inc/graph_pass.h @@ -1,92 +1,93 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_INC_GRAPH_PASS_H_ -#define GE_INC_GRAPH_PASS_H_ - -#include -#include - -#include "common/op/attr_value_util.h" -#include "common/op/ge_op_utils.h" -#include "framework/common/debug/ge_log.h" -#include "graph/compute_graph.h" -#include "graph/utils/attr_utils.h" -#include "graph/utils/graph_utils.h" -#include "inc/pass.h" - -namespace ge { -/// -/// @ingroup domi_omg -/// @brief graph pass -/// @author -/// -class GraphPass : public Pass { - public: - /// - /// run graph pass - /// @param [in] graph graph to be optimized - /// @return SUCCESS optimize successfully - /// @return NOT_CHANGED not optimized - /// @return others optimized failed - /// @author - /// - virtual Status Run(ge::ComputeGraphPtr graph) = 0; - virtual Status ClearStatus() { return SUCCESS; }; - static void RecordOriginalNames(std::vector original_nodes, const ge::NodePtr &node) { - GE_CHECK_NOTNULL_JUST_RETURN(node); - std::vector original_names; - for (ge::NodePtr &node_tmp : original_nodes) { - std::vector names_tmp; - ge::OpDescPtr opdesc_tmp = node_tmp->GetOpDesc(); - GE_CHECK_NOTNULL_JUST_RETURN(opdesc_tmp); - Status ret = ge::AttrUtils::GetListStr(opdesc_tmp, "_datadump_original_op_names", names_tmp); - if (ret != domi::SUCCESS) { - GELOGW("get the original_op_names fail."); - } - if (names_tmp.size() != 0) { - original_names.insert(original_names.end(), names_tmp.begin(), names_tmp.end()); - } else { - original_names.push_back(opdesc_tmp->GetName()); - } - } - - if (original_names.size() == 0) { - std::string tmp; - original_names.push_back(tmp); - } - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(node->GetOpDesc(), "_datadump_original_op_names", original_names), - return, "Set original_op_names fail."); - } - - static bool IsConstNode(const ge::NodePtr &node) { - GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, GELOGE(FAILED, "Node GetOpDesc is nullptr"); return false); - if (node->GetOpDesc()->GetType() == CONSTANTOP) { - return true; - } else if (node->GetOpDesc()->GetType() == FRAMEWORKOP) { - string type; - GE_CHK_BOOL_EXEC(ge::AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type), return false, - "Get original_type for op %s fail!", node->GetName().c_str()); - GE_IF_BOOL_EXEC(type == CONSTANT, GELOGI("Is const op"); return true); - return false; - } else { - return false; - } - } -}; -} // namespace ge - -#endif // GE_INC_GRAPH_PASS_H_ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_INC_GRAPH_PASS_H_ +#define GE_INC_GRAPH_PASS_H_ + +#include +#include + +#include "common/op/attr_value_util.h" +#include "common/op/ge_op_utils.h" +#include "common/types.h" +#include "framework/common/debug/ge_log.h" +#include "graph/compute_graph.h" +#include "graph/utils/attr_utils.h" +#include "graph/utils/graph_utils.h" +#include "inc/pass.h" + +namespace ge { +/// +/// @ingroup domi_omg +/// @brief graph pass +/// @author +/// +class GraphPass : public Pass { + public: + /// + /// run graph pass + /// @param [in] graph graph to be optimized + /// @return SUCCESS optimize successfully + /// @return NOT_CHANGED not optimized + /// @return others optimized failed + /// @author + /// + virtual Status Run(ge::ComputeGraphPtr graph) = 0; + virtual Status ClearStatus() { return SUCCESS; }; + static void RecordOriginalNames(std::vector original_nodes, const ge::NodePtr &node) { + GE_CHECK_NOTNULL_JUST_RETURN(node); + std::vector original_names; + for (ge::NodePtr &node_tmp : original_nodes) { + std::vector names_tmp; + ge::OpDescPtr opdesc_tmp = node_tmp->GetOpDesc(); + GE_CHECK_NOTNULL_JUST_RETURN(opdesc_tmp); + Status ret = ge::AttrUtils::GetListStr(opdesc_tmp, "_datadump_original_op_names", names_tmp); + if (ret != domi::SUCCESS) { + GELOGW("get the original_op_names fail."); + } + if (names_tmp.size() != 0) { + original_names.insert(original_names.end(), names_tmp.begin(), names_tmp.end()); + } else { + original_names.push_back(opdesc_tmp->GetName()); + } + } + + if (original_names.size() == 0) { + std::string tmp; + original_names.push_back(tmp); + } + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(node->GetOpDesc(), "_datadump_original_op_names", original_names), + return, "Set original_op_names fail."); + } + + static bool IsConstNode(const ge::NodePtr &node) { + GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, GELOGE(FAILED, "Node GetOpDesc is nullptr"); return false); + if (node->GetOpDesc()->GetType() == CONSTANTOP) { + return true; + } else if (node->GetOpDesc()->GetType() == FRAMEWORKOP) { + string type; + GE_CHK_BOOL_EXEC(ge::AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type), + return false, "Get original_type for op %s fail!", node->GetName().c_str()); + GE_IF_BOOL_EXEC(type == CONSTANT, GELOGI("Is const op"); return true); + return false; + } else { + return false; + } + } +}; +} // namespace ge + +#endif // GE_INC_GRAPH_PASS_H_ diff --git a/ge/inc/kernel.h b/ge/inc/kernel.h index 9f7e1308..84af5234 100644 --- a/ge/inc/kernel.h +++ b/ge/inc/kernel.h @@ -24,9 +24,9 @@ #include "graph/graph.h" #include "graph/op_desc.h" -using std::shared_ptr; -using std::unique_ptr; using std::vector; +using std::unique_ptr; +using std::shared_ptr; namespace ge { /// diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc old mode 100644 new mode 100755 index ec56cc0a..fda21f63 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,6 +39,7 @@ #include "graph/ge_global_options.h" #include "graph/load/new_model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/host_mem_manager.h" #include "graph/manager/graph_var_manager.h" #include "omm/csa_interact.h" #include "runtime/kernel.h" @@ -287,6 +288,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt return initMmStatus; } + GE_CHK_STATUS_RET(HostMemManager::Instance().Initialize()); // Update CSA file CsaInteract::GetInstance().Init(options.device_id, GetContext().TraceId()); Status ret = CsaInteract::GetInstance().WriteJobState(JOBSTATE_RUNNING, JOBSUBSTATE_ENV_INIT); @@ -339,6 +341,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithout GELOGE(initMmStatus, "[Initialize] MemoryAllocatorManager initialize failed."); return initMmStatus; } + GE_CHK_STATUS_RET(HostMemManager::Instance().Initialize()); static bool is_inited = false; if (is_inited) { @@ -392,6 +395,9 @@ Status GELib::Finalize() { GELOGI("MemManager finalization."); MemManager::Instance().Finalize(); + GELOGI("HostMemManager finalization."); + HostMemManager::Instance().Finalize(); + GELOGI("HostCpuEngine finalization."); HostCpuEngine::GetInstance().Finalize(); @@ -453,6 +459,7 @@ void GELib::RollbackInit() { (void)sessionManager_.Finalize(); } MemManager::Instance().Finalize(); + HostMemManager::Instance().Finalize(); VarManagerPool::Instance().Destory(); } } // namespace ge diff --git a/ge/init/gelib.h b/ge/init/gelib.h index c8b3ff8a..cefbaa50 100644 --- a/ge/init/gelib.h +++ b/ge/init/gelib.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,8 +26,8 @@ #include "common/ge_inner_error_codes.h" #include "common/ge_types.h" -using std::map; using std::string; +using std::map; using std::vector; namespace ge { diff --git a/ge/ir_build/atc_ir_common.cc b/ge/ir_build/atc_ir_common.cc old mode 100644 new mode 100755 index 82ed40bd..e4bfe978 --- a/ge/ir_build/atc_ir_common.cc +++ b/ge/ir_build/atc_ir_common.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "atc_ir_common.h" #include "common/util/error_manager/error_manager.h" #include "external/ge/ge_api_types.h" @@ -34,7 +33,7 @@ const size_t kMaxNDDimNum = 4; const size_t kMinNDDimNum = 1; // datatype/formats from user to GE, Unified to util interface file later const std::map kOutputTypeSupportDatatype = { - {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; const char *const kOutputTypeSupport = "only support FP32, FP16, UINT8"; const std::set kBufferOptimizeSupportOption = {"l1_optimize", "l2_optimize", "off_optimize", "l1_and_l2_optimize"}; @@ -68,21 +67,18 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map> int32_t size = 0; for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) { vector shape = iter->second; - if (shape.size() < 1) { + if (shape.empty()) { ErrorManager::GetInstance().ATCReportErrMessage("E10012"); GELOGE(ge::PARAM_INVALID, "--input_shape's shape size can not be less than 1 when set --dynamic_batch_size."); return false; } - if (shape[0] == kDynamicInputDim) { - for (size_t i = 1; i < shape.size(); ++i) { - if (shape[i] < 1) { - ErrorManager::GetInstance().ATCReportErrMessage("E10018", {"index", "shape"}, - {std::to_string(i), std::to_string(shape[i])}); - GELOGE(ge::PARAM_INVALID, "Only batch N can be -1 when set --dynamic_batch_size, current shape[%zu] is %ld", - i, shape[i]); - return false; - } - } + + if (std::count(shape.begin(), shape.end(), kDynamicInputDim) == 0) { + continue; + } + + bool ret = multibatch::CheckDynamicBatchShape(shape, iter->first); + if (ret) { size++; } } @@ -95,8 +91,8 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map> for (char c : dynamic_batch_size) { if (!isdigit(c) && (c != ',') && (c != ' ')) { - ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"value", "reason"}, - {dynamic_batch_size, kDynamicBatchSizeError}); + ErrorManager::GetInstance().ATCReportErrMessage( + "E10033", {"value", "reason"}, {dynamic_batch_size, kDynamicBatchSizeError}); GELOGE(ge::PARAM_INVALID, "Input parameter[--dynamic_batch_size]'s value[%s] is invalid. reason: %s", dynamic_batch_size.c_str(), kDynamicBatchSizeError); return false; @@ -111,7 +107,7 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map> bool CheckDynamicImagesizeInputShapeValid(unordered_map> shape_map, const std::string input_format, std::string &dynamic_image_size) { int32_t size = 0; - for (unordered_map>::iterator iter = shape_map.begin(); iter != shape_map.end(); ++iter) { + for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) { vector shape = iter->second; // only support four dim if (shape.size() != DIM_DEFAULT_SIZE) { @@ -124,28 +120,14 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map> continue; } - int64_t height = 0; - int64_t width = 0; - if (input_format == "NCHW") { - height = shape[NCHW_DIM_H]; - width = shape[NCHW_DIM_W]; - } - - if (input_format == "NHWC") { - height = shape[NHWC_DIM_H]; - width = shape[NHWC_DIM_W]; + if (std::count(shape.begin(), shape.end(), kDynamicInputDim) == 0) { + continue; } - - if (height == kDynamicInputDim && width == kDynamicInputDim && - std::count(shape.begin(), shape.end(), kDynamicInputDim) == kDynamicImageSizeNum) { + auto ret = multibatch::CheckDynamicImageSizeShape(shape, iter->first, input_format); + if (ret) { size++; - } else if (std::count(shape.begin(), shape.end(), kDynamicInputDim) == 0) { - continue; } else { - ErrorManager::GetInstance().ATCReportErrMessage("E10019"); - GELOGE(ge::PARAM_INVALID, - "--input_shape's shape is invalid, only height and width can be -1 when set --dynamic_image_size."); - return false; + return ret; } } if (size == 0) { @@ -176,12 +158,12 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map> return true; } -bool CheckDynamicDimsInputShapeValid(const unordered_map> &shape_map, string input_format, - string &dynamic_dims) { +bool CheckDynamicDimsInputShapeValid(const unordered_map> &shape_map, + string input_format, string &dynamic_dims) { if (input_format != "ND") { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--input_format", input_format.c_str(), "input_format must be ND when set dynamic_dims"}); + "E10001", {"parameter", "value", "reason"}, + {"--input_format", input_format.c_str(), "input_format must be ND when set dynamic_dims"}); GELOGE(ge::PARAM_INVALID, "input_format must be ND when set dynamic_dims."); return false; } @@ -191,8 +173,8 @@ bool CheckDynamicDimsInputShapeValid(const unordered_map auto &shapes = info_shapes.second; if (shapes.size() > kMaxNDDimNum || shapes.size() < kMinNDDimNum) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--input_shape's dim", std::to_string(shapes.size()), "Dim num must within [1, 4] when set dynamic_dims"}); + "E10001", {"parameter", "value", "reason"}, + {"--input_shape's dim", std::to_string(shapes.size()), "Dim num must within [1, 4] when set dynamic_dims"}); GELOGE(ge::PARAM_INVALID, "Dim num must within [%zu, %zu] when set dynamic_dims.", kMinNDDimNum, kMaxNDDimNum); return false; } @@ -200,8 +182,8 @@ bool CheckDynamicDimsInputShapeValid(const unordered_map } if (dynamic_dim == 0) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--input_shape's dynamic dim num", "0", "at least one dim should be -1 when set dynamic_dims"}); + "E10001", {"parameter", "value", "reason"}, + {"--input_shape's dynamic dim num", "0", "at least one dim should be -1 when set dynamic_dims"}); GELOGE(ge::PARAM_INVALID, "input_shape's shape is invalid, at least one dim should be -1 when set dynamic_dims."); return false; } @@ -218,8 +200,8 @@ bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims EraseEndSemicolon(dynamic_dims); if (dynamic_dims.empty()) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--dynamic_dims", dynamic_dims.c_str(), "dynamic_dims can not be empty"}); + "E10001", {"parameter", "value", "reason"}, + {"--dynamic_dims", dynamic_dims.c_str(), "dynamic_dims can not be empty"}); GELOGE(ge::PARAM_INVALID, "dynamic_dims can not be empty."); return false; } @@ -227,7 +209,7 @@ bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims vector split_set = StringUtils::Split(dynamic_dims, ';'); if (split_set.size() > kMaxDynamicDimNum) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10042", {"parameter", "reason"}, {"dynamic_dims", "dynamic_dims's num of parameter set can not exceed 100"}); + "E10042", {"parameter", "reason"}, {"dynamic_dims", "dynamic_dims's num of parameter set can not exceed 100"}); GELOGE(ge::PARAM_INVALID, "dynamic_dims's num of parameter set can not exceed %zu.", kMaxDynamicDimNum); return false; } @@ -235,19 +217,18 @@ bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims vector one_set = StringUtils::Split(split_dim, ','); if (one_set.size() != static_cast(dynamic_dim_num)) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10042", {"parameter", "reason"}, - {"dynamic_dims", "Each gear setting needs to be consistent with the number of -1 in the inputshape"}); - GELOGE(ge::PARAM_INVALID, - "Input parameter --dynamic_dims parse failed, " - "reason: Each gear setting needs to be consistent with the number of -1 in the inputshape."); + "E10042", {"parameter", "reason"}, + {"dynamic_dims", "Each gear setting needs to be consistent with the number of -1 in the inputshape"}); + GELOGE(ge::PARAM_INVALID, "Input parameter --dynamic_dims parse failed, " + "reason: Each gear setting needs to be consistent with the number of -1 in the inputshape."); return false; } for (auto dim : one_set) { for (auto c : dim) { if (!isdigit(c)) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--dynamic_dims's parameter", dim.c_str(), "must be positive integer"}); + "E10001", {"parameter", "value", "reason"}, + {"--dynamic_dims's parameter", dim.c_str(), "must be positive integer"}); GELOGE(ge::PARAM_INVALID, "dynamic_dims's parameter must be positive integer."); return false; } @@ -379,9 +360,9 @@ bool ParseInputShape(const string &input_shape, unordered_map caffe_support_input_format = {"NCHW", "ND"}; @@ -37,9 +37,15 @@ static const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, static const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; static std::map input_format_str_to_geformat = { - {"ND", domi::DOMI_TENSOR_ND}, {"NCHW", domi::DOMI_TENSOR_NCHW}, {"NHWC", domi::DOMI_TENSOR_NHWC}, - {"CHWN", domi::DOMI_TENSOR_CHWN}, {"NC1HWC0", domi::DOMI_TENSOR_NC1HWC0}, {"NHWC1C0", domi::DOMI_TENSOR_NHWC1C0}, - {"NCDHW", domi::DOMI_TENSOR_NCDHW}, {"NDHWC", domi::DOMI_TENSOR_NDHWC}}; + {"ND", domi::DOMI_TENSOR_ND}, + {"NCHW", domi::DOMI_TENSOR_NCHW}, + {"NHWC", domi::DOMI_TENSOR_NHWC}, + {"CHWN", domi::DOMI_TENSOR_CHWN}, + {"NC1HWC0", domi::DOMI_TENSOR_NC1HWC0}, + {"NHWC1C0", domi::DOMI_TENSOR_NHWC1C0}, + {"NCDHW", domi::DOMI_TENSOR_NCDHW}, + {"NDHWC", domi::DOMI_TENSOR_NDHWC} +}; static const std::string kEnableCompressWeightTrue = "1"; static const std::string kEnableCompressWeightFalse = "0"; @@ -71,5 +77,5 @@ Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream) Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode); void PrintOptionMap(std::map &options, std::string tips); void EraseEndSemicolon(std::string ¶m); -} // namespace ge +} #endif // FRAMEWORK_DOMI_ATC_IR_COMMON_H_ diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 90f7a8ca..544bcc21 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "external/ge/ge_ir_build.h" #include @@ -35,6 +34,7 @@ #include "init/gelib.h" #include "ir_build/atc_ir_common.h" #include "model/ge_model.h" +#include "graph/shape_refiner.h" using std::string; using namespace std; @@ -52,48 +52,51 @@ const std::string IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT = "false"; static graphStatus CheckGlobalOptions(std::map &global_options) { // check param disable_reuse_memory - std::string disable_reuse_memory = - global_options.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY) == global_options.end() - ? IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT - : global_options[ge::ir_option::EXEC_DISABLE_REUSED_MEMORY]; + std::string disable_reuse_memory = global_options.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY) == + global_options.end() + ? IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT + : global_options[ge::ir_option::EXEC_DISABLE_REUSED_MEMORY]; GE_CHK_BOOL_EXEC(ge::CheckDisableReuseMemoryParamValid(disable_reuse_memory) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check disable_reuse_memory failed!"); + return ge::GRAPH_PARAM_INVALID, "check disable_reuse_memory failed!"); global_options[ge::ir_option::EXEC_DISABLE_REUSED_MEMORY] = disable_reuse_memory; // check buffer_optimize std::string buffer_optimize = global_options.find(ge::ir_option::BUFFER_OPTIMIZE) == global_options.end() - ? IR_OPTION_BUFFER_OPTIMIZE_DEFAULT - : global_options[ge::ir_option::BUFFER_OPTIMIZE]; - GE_CHK_BOOL_EXEC(ge::CheckBufferOptimizeParamValid(buffer_optimize) == ge::SUCCESS, return ge::GRAPH_PARAM_INVALID, - "check buffer optimize failed!"); + ? IR_OPTION_BUFFER_OPTIMIZE_DEFAULT + : global_options[ge::ir_option::BUFFER_OPTIMIZE]; + GE_CHK_BOOL_EXEC(ge::CheckBufferOptimizeParamValid(buffer_optimize) == ge::SUCCESS, + return ge::GRAPH_PARAM_INVALID, "check buffer optimize failed!"); global_options[ge::ir_option::BUFFER_OPTIMIZE] = buffer_optimize; // check enable_single_stream std::string enable_single_stream = global_options.find(ge::ir_option::ENABLE_SINGLE_STREAM) == global_options.end() - ? "" - : global_options[ge::ir_option::ENABLE_SINGLE_STREAM]; + ? "" + : global_options[ge::ir_option::ENABLE_SINGLE_STREAM]; GE_CHK_BOOL_EXEC(ge::CheckEnableSingleStreamParamValid(enable_single_stream) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check enable single stream failed!"); + return ge::GRAPH_PARAM_INVALID, "check enable single stream failed!"); // check compress_weight - std::string enable_compress_weight = - global_options.find(ge::ir_option::ENABLE_COMPRESS_WEIGHT) == global_options.end() - ? IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT - : global_options[ge::ir_option::ENABLE_COMPRESS_WEIGHT]; + std::string enable_compress_weight = global_options.find(ge::ir_option::ENABLE_COMPRESS_WEIGHT) == + global_options.end() + ? IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT + : global_options[ge::ir_option::ENABLE_COMPRESS_WEIGHT]; std::string compress_weight_conf = global_options.find(ge::ir_option::COMPRESS_WEIGHT_CONF) == global_options.end() - ? "" - : global_options[ge::ir_option::COMPRESS_WEIGHT_CONF]; + ? "" + : global_options[ge::ir_option::COMPRESS_WEIGHT_CONF]; GE_CHK_BOOL_EXEC(ge::CheckCompressWeightParamValid(enable_compress_weight, compress_weight_conf) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check compress weight failed!"); - global_options[ge::ir_option::ENABLE_COMPRESS_WEIGHT] = - (enable_compress_weight == "true") ? ge::kEnableCompressWeightTrue : ge::kEnableCompressWeightFalse; + return ge::GRAPH_PARAM_INVALID, "check compress weight failed!"); + global_options[ge::ir_option::ENABLE_COMPRESS_WEIGHT] = (enable_compress_weight == "true") ? + ge::kEnableCompressWeightTrue : + ge::kEnableCompressWeightFalse; // check optypelist_for_implmode and op_select_implmode - std::string optypelist_for_implmode = - global_options.find(ge::ir_option::OPTYPELIST_FOR_IMPLMODE) == global_options.end() - ? "" - : global_options[ge::ir_option::OPTYPELIST_FOR_IMPLMODE]; - std::string op_select_implmode = global_options.find(ge::ir_option::OP_SELECT_IMPL_MODE) == global_options.end() - ? "" - : global_options[ge::ir_option::OP_SELECT_IMPL_MODE]; - GE_CHK_BOOL_EXEC(ge::CheckImplmodeParamValid(optypelist_for_implmode, op_select_implmode) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!"); + std::string optypelist_for_implmode = global_options.find(ge::ir_option::OPTYPELIST_FOR_IMPLMODE) == + global_options.end() + ? "" + : global_options[ge::ir_option::OPTYPELIST_FOR_IMPLMODE]; + std::string op_select_implmode = global_options.find(ge::ir_option::OP_SELECT_IMPL_MODE) == + global_options.end() + ? "" + : global_options[ge::ir_option::OP_SELECT_IMPL_MODE]; + GE_CHK_BOOL_EXEC( + ge::CheckImplmodeParamValid(optypelist_for_implmode, op_select_implmode) == ge::SUCCESS, + return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!"); global_options[ge::ir_option::OP_SELECT_IMPL_MODE] = op_select_implmode; return GRAPH_SUCCESS; @@ -175,7 +178,8 @@ graphStatus Impl::CheckOptions(const std::map &options if (it == ge::ir_option::ir_builder_suppported_options.end()) { auto it_lx_fusion = ir_builder_supported_options_for_lx_fusion.find(ele.first); if (it_lx_fusion == ir_builder_supported_options_for_lx_fusion.end()) { - GELOGE(GRAPH_PARAM_INVALID, "input options include unsupported option(%s).Please check!", ele.first.c_str()); + GELOGE(GRAPH_PARAM_INVALID, "input options include unsupported option(%s).Please check!", + ele.first.c_str()); return GRAPH_PARAM_INVALID; } } @@ -217,12 +221,12 @@ graphStatus Impl::Init(const std::map &options) { GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); GetThreadLocalContext().SetGraphOption(options_); std::string build_mode = (options_.find(BUILD_MODE) == options_.end() || options_[BUILD_MODE] == BUILD_MODE_NORMAL) - ? "" - : options_[BUILD_MODE]; + ? "" : options_[BUILD_MODE]; options_[BUILD_MODE] = build_mode; // set log level - std::string log = options_.find(ge::ir_option::LOG_LEVEL) == options_.end() ? IR_OPTION_LOG_LEVEL_DEFAULT - : options_[ge::ir_option::LOG_LEVEL]; + std::string log = options_.find(ge::ir_option::LOG_LEVEL) == options_.end() + ? IR_OPTION_LOG_LEVEL_DEFAULT + : options_[ge::ir_option::LOG_LEVEL]; GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID); options_[ge::ir_option::LOG_LEVEL] = log; @@ -230,13 +234,13 @@ graphStatus Impl::Init(const std::map &options) { string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"]; string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"]; string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end() - ? "" - : options_[ge::ir_option::DYNAMIC_BATCH_SIZE]; + ? "" + : options_[ge::ir_option::DYNAMIC_BATCH_SIZE]; string dynamic_image_size = options_.find(ge::ir_option::DYNAMIC_IMAGE_SIZE) == options_.end() - ? "" - : options_[ge::ir_option::DYNAMIC_IMAGE_SIZE]; + ? "" + : options_[ge::ir_option::DYNAMIC_IMAGE_SIZE]; string dynamic_dims = - options_.find(ge::ir_option::DYNAMIC_DIMS) == options_.end() ? "" : options_[ge::ir_option::DYNAMIC_DIMS]; + options_.find(ge::ir_option::DYNAMIC_DIMS) == options_.end() ? "" : options_[ge::ir_option::DYNAMIC_DIMS]; auto status = CheckDynamicInputParamValid(dynamic_batch_size, dynamic_image_size, dynamic_dims, input_shape, input_format, is_dynamic_input_); @@ -250,15 +254,20 @@ graphStatus Impl::Init(const std::map &options) { omg_context_.dynamic_image_size = dynamic_image_size; omg_context_.dynamic_dims = dynamic_dims; // check output_type - std::string output_type = - options_.find(ge::ir_option::OUTPUT_TYPE) == options_.end() ? "" : options_[ge::ir_option::OUTPUT_TYPE]; - GE_CHK_BOOL_EXEC(ge::CheckOutputTypeParamValid(output_type) == ge::SUCCESS, return ge::GRAPH_PARAM_INVALID, - "check output type failed!"); + std::string output_type = options_.find(ge::ir_option::OUTPUT_TYPE) == options_.end() + ? "" + : options_[ge::ir_option::OUTPUT_TYPE]; + GE_CHK_BOOL_EXEC(ge::CheckOutputTypeParamValid(output_type) == ge::SUCCESS, + return ge::GRAPH_PARAM_INVALID, "check output type failed!"); // check insert_op_conf - std::string insert_op_conf = - options_.find(ge::ir_option::INSERT_OP_FILE) == options_.end() ? "" : options_[ge::ir_option::INSERT_OP_FILE]; + std::string insert_op_conf = options_.find(ge::ir_option::INSERT_OP_FILE) == options_.end() + ? "" + : options_[ge::ir_option::INSERT_OP_FILE]; GE_CHK_BOOL_EXEC(ge::CheckInsertOpConfParamValid(std::string(insert_op_conf)) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check insert op conf failed!"); + return ge::GRAPH_PARAM_INVALID, "check insert op conf failed!"); + + GE_CHK_BOOL_EXEC(insert_op_conf.empty() || dynamic_dims.empty(), + return ge::GRAPH_PARAM_INVALID, "dynamic dims function does not support aipp"); // for IR builder.Only support om mode, so here fixed; options_.insert(std::pair(string(IR_OPTION_MODE), to_string(0))); @@ -402,7 +411,7 @@ graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &m GELOGE(GRAPH_PARAM_INVALID, "input model is illegal"); return GRAPH_PARAM_INVALID; } - return FileSaver::SaveToFile((output_file + ".om"), reinterpret_cast(model.data.get()), + return FileSaver::SaveToFile((output_file + ".om"), reinterpret_cast(model.data.get()), static_cast(model.length)); } @@ -416,4 +425,77 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat *patch_version = IR_PATCH_VERSION; return GRAPH_SUCCESS; } + +graphStatus aclgrphInferShapeAndType(ge::Graph &graph) { + auto compute_graph = GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + + for (auto &node: compute_graph->GetAllNodes()) { + graphStatus ret = ShapeRefiner::InferShapeAndType(node); + if (ret == GRAPH_PARAM_INVALID) { + GELOGW("Can not find infershape func."); + continue; + } else if (ret != GRAPH_SUCCESS) { + GELOGE(ret, "Acl infershape failed."); + return ret; + } + } + + return GRAPH_SUCCESS; +} + +graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len) { + GE_CHECK_NOTNULL(file); + + if (len > PATH_MAX || len != strlen(file) || strlen(file) == 0) { + GELOGE(GRAPH_PARAM_INVALID, "File path invalid."); + return GRAPH_PARAM_INVALID; + } + + auto compute_graph = GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + + string full_path(file, len); + for (size_t i = 0; i < len; i++) { + if (full_path[i] == '\\') { + full_path.replace(i, 1, "/"); + } + } + + string suffix; + string file_path; + int pos = full_path.rfind("/"); + if (pos != -1) { + suffix = full_path.substr(pos + 1, -1); + file_path = full_path.substr(0, pos); + } else { + suffix = full_path; + file_path = "./"; + } + + if (suffix.empty()) { + suffix = compute_graph->GetName(); + if (suffix.empty()) { + suffix = "graph"; + } + } + + char path[PATH_MAX] = {0}; + if (realpath(file_path.c_str(), path) == nullptr) { + GELOGE(GRAPH_PARAM_INVALID, "Dump file path:%s is invalid.", file); + return GRAPH_PARAM_INVALID; + } + + GraphUtils::DumpGEGrph(compute_graph, string(path), suffix); + GraphUtils::DumpGrphToOnnx(*compute_graph, string(path), suffix); + uint64_t i = 0; + for (const auto &sub_graph_func : compute_graph->GetAllSubgraphs()) { + auto sub_graph_func_name = suffix + std::string("_sub_graph_") + std::to_string(i++); + GraphUtils::DumpGEGrph(sub_graph_func, string(path), sub_graph_func_name); + GraphUtils::DumpGrphToOnnx(*sub_graph_func, string(path), sub_graph_func_name); + } + + return GRAPH_SUCCESS; +} + } // namespace ge diff --git a/ge/model/ge_model.cc b/ge/model/ge_model.cc old mode 100644 new mode 100755 index 70251876..eb6ca158 --- a/ge/model/ge_model.cc +++ b/ge/model/ge_model.cc @@ -59,7 +59,9 @@ void GeModel::SetGraph(const Graph &graph) { this->graph_ = graph; } void GeModel::SetModelTaskDef(const std::shared_ptr &task) { this->task_ = task; } -void GeModel::SetTBEKernelStore(const TBEKernelStore &tbe_kernal_store) { this->tbe_kernal_store_ = tbe_kernal_store; } +void GeModel::SetTBEKernelStore(const TBEKernelStore &tbe_kernal_store) { + this->tbe_kernal_store_ = tbe_kernal_store; +} void GeModel::SetCustAICPUKernelStore(const CustAICPUKernelStore &cust_aicpu_kernal_store) { this->cust_aicpu_kernal_store_ = cust_aicpu_kernal_store; diff --git a/ge/model/ge_model.h b/ge/model/ge_model.h old mode 100644 new mode 100755 index 288b834f..5676c3b6 --- a/ge/model/ge_model.h +++ b/ge/model/ge_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -64,9 +64,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeModel : public AttrHolder ProtoAttrMapHelper MutableAttrMap() override; - using AttrHolder::GetAllAttrNames; - using AttrHolder::GetAllAttrs; using AttrHolder::SetAttr; + using AttrHolder::GetAllAttrs; + using AttrHolder::GetAllAttrNames; void SetModelId(uint32_t model_id) { model_id_ = model_id; } uint32_t GetModelId() const { return model_id_; } diff --git a/ge/model/ge_root_model.cc b/ge/model/ge_root_model.cc index aee119fa..68f868dd 100644 --- a/ge/model/ge_root_model.cc +++ b/ge/model/ge_root_model.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/model/ge_root_model.h b/ge/model/ge_root_model.h old mode 100644 new mode 100755 index 2b73c868..53174064 --- a/ge/model/ge_root_model.h +++ b/ge/model/ge_root_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include #include "graph/compute_graph.h" #include "model/ge_model.h" @@ -24,7 +23,7 @@ namespace ge { class GeRootModel { public: - explicit GeRootModel(ComputeGraphPtr &root_graph) : root_graph_(root_graph), model_id_(INVALID_MODEL_ID){}; + explicit GeRootModel(ComputeGraphPtr &root_graph) : root_graph_(root_graph), model_id_(INVALID_MODEL_ID) {}; ~GeRootModel() = default; void SetSubgraphInstanceNameToModel(string instance_name, GeModelPtr ge_model); diff --git a/ge/module.mk b/ge/module.mk old mode 100644 new mode 100755 diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt new file mode 100644 index 00000000..85f6715f --- /dev/null +++ b/ge/offline/CMakeLists.txt @@ -0,0 +1,69 @@ +set(PROTO_LIST + "${METADEF_DIR}/proto/om.proto" + "${METADEF_DIR}/proto/ge_ir.proto" + "${METADEF_DIR}/proto/insert_op.proto" + "${METADEF_DIR}/proto/task.proto" +) + +protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) + +set(SRC_LIST + "main.cc" + "single_op_parser.cc" + "../session/omg.cc" + "../ir_build/atc_ir_common.cc" +) + +############ atc ############ +add_executable(atc ${SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(atc PRIVATE + -Werror + -O2 +) + +target_compile_definitions(atc PRIVATE + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + COMPILE_OMG_PACKAGE +) + +target_include_directories(atc PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/graphengine + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/common/inc/external + ${GE_CODE_DIR}/common/inc/external/graph + ${GE_CODE_DIR}/inc/framework + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/common +) + +target_link_libraries(atc PRIVATE + $ + protobuf + ge_common + register + c_sec + graph + error_manager + ge_compiler + parser_common + gflags + json + runtime_compile + slog + mmpa + -lrt + -ldl +) + +############ install ############ +set(INSTALL_BASE_DIR "") +set(INSTALL_LIBRARY_DIR lib) + +install(TARGETS atc OPTIONAL + LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} +) diff --git a/ge/offline/main.cc b/ge/offline/main.cc new file mode 100755 index 00000000..854e5092 --- /dev/null +++ b/ge/offline/main.cc @@ -0,0 +1,1332 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common/gflags_util.h" +#include "common/util.h" +#include "common/util/error_manager/error_manager.h" +#include "framework/common/debug/ge_log.h" +#include "ge/ge_api.h" +#include "generator/ge_generator.h" +#include "graph/anchor.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/graph.h" +#include "graph/op_desc.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/type_utils.h" +#include "init/gelib.h" +#include "ir_build/atc_ir_common.h" +#include "omg/omg.h" +#include "omg/parser/parser_factory.h" +#include "omg/parser/parser_inner_ctx.h" +#include "parser/common/register_tbe.h" +#include "register/op_registry.h" +#include "single_op_parser.h" + +using domi::BuildMode; +using domi::OpRegistrationData; +using domi::OpRegistry; +using domi::Status; +using domi::SUCCESS; +using ge::GEN_OM_MODEL; +using ge::GflagsUtils; +using ge::MODEL_TO_JSON; +using ge::ONLY_PRE_CHECK; +using ge::ParseInputShape; +using ge::PBTXT_TO_JSON; +using std::map; +using std::pair; +using std::shared_ptr; +using std::string; +using std::vector; + +static bool is_dynamic_input = false; + +// 310 limited 8G size +const char *const kGraphMemoryManagerMallocMaxSize = "8*1024*1024*1024"; +const char *const kModeSupport = "only support 0(model to framework model), " + "1(framework model to json), 3(only pre-check), 5(pbtxt to json)"; +const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow)"; + +// limit available mem size 2G +const long kMinAvailableMem = 2 * 1024 * 1024; + +DEFINE_string(model, "", "The model file."); +DEFINE_string(output, "", "The output file path&name."); +DEFINE_int32(framework, -1, "Framework type(0:Caffe; 1:MindSpore; 3:Tensorflow)."); +DEFINE_string(weight, "", "Optional; weight file. Required when framework is Caffe."); + +DEFINE_string(input_shape, "", + "Optional; shape of input data. Required when framework is caffe " + "or TensorFLow or MindSpore." + "Format: \"input_name1:n1,c1,h1,w1;input_name2:n2,c2,h2,w2\""); +DEFINE_bool(h, false, "show this help message"); +DEFINE_string(cal_conf, "", "Optional; the calibration config file."); + +DEFINE_string(insert_op_conf, "", "Optional; the config file to insert new op, for example AIPP op."); +DEFINE_string(op_name_map, "", "Optional; custom op name mapping file."); + +DEFINE_string(target, "", "Optional; mini."); + +DEFINE_string(om, "", "The model file to be converted to json."); +DEFINE_string(json, "", "The output json file path&name which is converted from a model."); +DEFINE_int32(mode, 0, + "Optional; run mode, 0(default): model => framework model; 1: " + "framework model => json; 3: only pre-check; 5: pbtxt => json."); + +#if !defined(__ANDROID__) && !defined(ANDROID) +DEFINE_int32(encrypt_mode, -1, "Optional; the encrypt flag. 0: encrypt; -1(default): not encrypt"); +DEFINE_string(encrypt_key, "", "Optional; the encrypt_key file."); +DEFINE_string(certificate, "", "Optional; the certificate file."); +DEFINE_string(hardware_key, "", "Optional; the ISV key file."); +DEFINE_string(private_key, "", "Optional; the private key file."); +#endif + +DEFINE_string(out_nodes, "", + "Optional; output nodes designated by users." + "Format: \"node_name1:0;node_name1:1;node_name2:0\""); + +DEFINE_string(precision_mode, "", + "Optional; precision mode." + "Support force_fp16, allow_mix_precision, allow_fp32_to_fp16, must_keep_origin_dtype."); + +DEFINE_string(input_format, "", + "Optional; input_format, format of input data, NCHW;NHWC." + "Format:\"NHWC\""); + +DEFINE_string(check_report, "check_result.json", "Optional; the pre-checking report file."); + +DEFINE_string(input_fp16_nodes, "", + "Optional; input node datatype is fp16 and format is NC1HWC0." + "Format:\"node_name1;node_name2\""); + +DEFINE_string(is_output_adjust_hw_layout, "", + "Optional; Net output node's datatype is fp16 and format is " + "NC1HWC0, or not." + "Format:\"false,true,false,true\""); + +DEFINE_string(is_input_adjust_hw_layout, "", + "Optional; Intput node's datatype is fp16 and format is " + "NC1HWC0, or not." + "Format:\"false,true,false,true\""); + +DEFINE_string(output_type, "", + "Optional; output type! " + "Support FP32,FP16,INT8,INT16,UINT16,UINT8,INT32,INT64,UINT32,UINT64,DOUBLE."); + +DEFINE_string(op_select_implmode, "", + "Optional; op select implmode! " + "Support high_precision, high_performance."); + +DEFINE_string(optypelist_for_implmode, "", + "Optional; Nodes need use implmode selected in op_select_implmode " + "Format:\"node_name1,node_name2\""); + +DEFINE_string(singleop, "", "Optional; If set, generate single op model with the given json file."); + +DEFINE_int32(disable_reuse_memory, 0, "Optional; If set to 1, disable reuse memory when generating if."); + +DEFINE_string(auto_tune_mode, "", "Optional; Set tune mode."); + +DEFINE_string(soc_version, "", "The soc version."); + +DEFINE_string(core_type, "AiCore", "Optional; If set to VectorCore, only use vector core."); + +DEFINE_string(aicore_num, "", "Optional; Set aicore num"); + +DEFINE_string(buffer_optimize, "l2_optimize", "Optional; buffer optimize"); + +DEFINE_string(fusion_switch_file, "", "Optional; Set fusion switch file path"); + +DEFINE_string(save_original_model, "", "Optional; enable output original offline model. false(default)"); + +DEFINE_string(dynamic_batch_size, "", + "Optional; If set, generate dynamic multi batch model. " + "Different batch sizes are split by ','." + "dynamic_batch_size, dynamic_image_size and dynamic_dims can only be set one."); + +DEFINE_string(dynamic_image_size, "", + "Optional; If set, generate dynamic multi image size model." + "Different groups of image size are split by ';'," + "while different dimensions of each group are split by ','." + "dynamic_batch_size, dynamic_image_size and dynamic_dims can only be set one."); + +DEFINE_string(dynamic_dims, "", + "Optional; If set, generate dynamic input size model. " + "Different groups of size are split by ';', while different dimensions of each group are split by ','." + "dynamic_batch_size, dynamic_image_size and dynamic_dims can only be set one."); + +DEFINE_string(enable_small_channel, "0", "Optional; If set to 1, small channel is enabled."); + +DEFINE_string(enable_compress_weight, "false", + "Optional; enable compress weight. true: enable; false(default): disable"); + +DEFINE_string(compress_weight_conf, "", "Optional; the config file to compress weight"); + +DEFINE_string(enable_single_stream, "", "Optional; enable single stream. true: enable; false(default): disable"); + +DEFINE_string(log, "null", "Optional; generate atc log. Support debug, info, warning, error, null"); + +DEFINE_string(dump_mode, "0", "Optional; generate infershape json,only support 1 , 0."); + +DEFINE_int32(op_debug_level, 0, "Optional; configure debug level of compiler. 0(default): close debug;" + "1: open TBE compiler, export ccec file and TBE instruction mapping file; 2: open ccec compiler"); +DEFINE_string(enable_scope_fusion_passes, "", "Optional; validate the non-general scope fusion pass," + "multiple names can be set and separated by ','."); + +class GFlagUtils { + public: + /** + * @name InitGFlag + * @brief initialize gflag + * @return void + */ + static void InitGFlag(int argc, char *argv[]) { + // -help + gflags::SetUsageMessage( + "usage: ./atc \n" + "generate offline model example:\n" + "./atc --model=./alexnet.prototxt --weight=./alexnet.caffemodel \n" + "--framework=0 --output=./domi \n" + "generate offline model for single op example:\n" + "./atc --singleop=./op_list.json --output=./op_model \n" + "===== Basic Functionality =====\n" + "[General]\n" + " --h/help Show this help message\n" + " --mode Run mode. 0(default): generate offline model; 1: convert model to JSON format " + "3: only pre-check; 5: convert pbtxt file to JSON format\n" + "\n[Input]\n" + " --model Model file\n" + " --weight Weight file. Required when framework is Caffe\n" + " --om The model file to be converted to json\n" + " --framework Framework type. 0:Caffe; 1:MindSpore; 3:Tensorflow\n" + " --input_format Format of input data. E.g.: \"NCHW\"\n" + " --input_shape Shape of input data. Separate multiple nodes with semicolons (;)." + "Use double quotation marks (\") to enclose each argument.\n" + " E.g.: \"input_name1:n1,c1,h1,w1;input_name2:n2,c2,h2,w2\"\n" + " --dynamic_batch_size Set dynamic batch size. E.g: \"batchsize1,batchsize2,batchsize3\"\n" + " --dynamic_image_size Set dynamic image size. Separate multiple nodes with semicolons (;)." + "Use double quotation marks (\") to enclose each argument.\n" + " E.g: \"imagesize1_height,imagesize1_width;imagesize2_height,imagesize2_width\"\n" + " --dynamic_dims Set dynamic dims. Separate multiple nodes with semicolons (;)." + "Use double quotation marks (\") to enclose each argument. E.g: \"dims1_n1,dims1_n2;dims2_n1,dims2_n2\"\n" + " --singleop Single op definition file. atc will generate offline " + "model(s) for single op if --singleop is set.\n" + "\n[Output]\n" + " --output Output file path&name(needn't suffix, will add " + ".om automatically). \n" + " If --singleop is set, this arg specifies the directory to " + "which the single op offline model will be generated\n" + " --output_type Set net output type. Support FP32, FP16, UINT8." + "E.g.: FP16, indicates that all out nodes are set to FP16.\n" + " \"node1:0:FP16;node2:1:FP32\", indicates setting the datatype of multiple out nodes.\n" + " --check_report The pre-checking report file. Default value is: " + "\"check_result.json\"\n" + " --json The output json file path&name which is " + "converted from a model\n" + "\n[Target]\n" + " --soc_version The soc version.\n" + " --core_type Set core type AiCore or VectorCore. VectorCore: use vector core. " + "Default value is: AiCore\n" + " --aicore_num Set aicore num\n" + "===== Advanced Functionality =====\n" + "[Feature]\n" + " --out_nodes Output nodes designated by users. Separate multiple nodes with semicolons (;)." + "Use double quotation marks (\") to enclose each argument.\n" + " E.g.: \"node_name1:0;node_name1:1;node_name2:0\"\n" + " --input_fp16_nodes Input node datatype is fp16. Separate multiple nodes with semicolons " + "(;)." + "Use double quotation marks (\") to enclose each argument." + "E.g.: \"node_name1;node_name2\"\n" + " --insert_op_conf Config file to insert new op\n" + " --op_name_map Custom op name mapping file\n" + " Note: A semicolon(;) cannot be included in each " + "path, otherwise the resolved path will not match the expected one.\n" + " --is_input_adjust_hw_layout Intput node datatype is fp16 and format is " + "NC1HWC0, used with input_fp16_nodes E.g.: \"true,true,false,true\"\n" + " --is_output_adjust_hw_layout Net output node datatype is fp16 and format is " + "NC1HWC0, used with out_nodes. E.g.: \"true,true,false,true\"\n" + "\n[Model Tuning]\n" + " --disable_reuse_memory The switch of reuse memory. Default value is : 0." + "0 means reuse memory, 1 means do not reuse memory.\n" + " --fusion_switch_file Set fusion switch file path\n" + " --enable_scope_fusion_passes validate the non-general scope fusion passes," + "multiple names can be set and separated by ','. E.g.: ScopePass1,ScopePass2,...\n" + " --enable_single_stream Enable single stream. true: enable; false(default): disable\n" + " --enable_small_channel Set enable small channel. 0(default): disable; 1: enable\n" + " --enable_compress_weight Enable compress weight. true: enable; false(default): disable\n" + " --compress_weight_conf Config file to compress weight\n" + " --buffer_optimize Set buffer optimize. \"l2_optimize\" (default). Set \"off_optimize\" to close\n" + "\n[Operator Tuning]\n" + " --precision_mode precision mode, support force_fp16, allow_mix_precision, " + "allow_fp32_to_fp16, must_keep_origin_dtype.\n" + " --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" + " --op_select_implmode Set op select implmode. Support high_precision, high_performance." + "default: high_performance\n" + " --optypelist_for_implmode Appoint which op to select implmode, cooperated with op_select_implmode.\n" + " Separate multiple nodes with commas (,). Use double quotation marks (\") " + " to enclose each argument. E.g.: \"node_name1,node_name2\"\n" + " --op_debug_level Debug enable for TBE operator building.\n" + " 0 (default): Disable debug; 1: Enable TBE pipe_all, " + "and generate the operator CCE file and Python-CCE mapping file (.json);\n" + " 2: Enable TBE pipe_all, generate the operator CCE file and Python-CCE mapping file " + "(.json), and enable the CCE compiler -O0-g.\n" + "\n[Debug]\n" + " --save_original_model Control whether to output original model. E.g.: true: output original model\n" + " --log Generate log with level. Support debug, info, warning, error, null\n" + " --dump_mode The switch of dump json with shape, to be used with mode 1." + "0(default): disable; 1: enable."); + + gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); + // Using gflags to analyze input parameters + GflagsUtils::ChangeHelpFlags(FLAGS_h); + gflags::HandleCommandLineHelpFlags(); + } + + static Status CheckDumpInfershapeJsonFlags() { + Status ret = CheckFrameWorkValid(FLAGS_framework, FLAGS_weight); + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, + "check custom aicpu run so failed!"); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_weight != "" && !ge::CheckInputPathValid(FLAGS_weight, "--weight"), + return domi::FAILED, "Input parameter[--weight]'s value[%s] is invalid!", + FLAGS_weight.c_str()); + return domi::SUCCESS; + } + + static Status CheckFlags() { + Status ret = ge::SUCCESS; + // No model file information passed in + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_model == "", + ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"model"}); + ret = ge::FAILED, "Input parameter[--model]'s value is empty!"); + + // check param disable_reuse_memory + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + ge::CheckDisableReuseMemoryParamValid(to_string(FLAGS_disable_reuse_memory)) != ge::SUCCESS, + ret = ge::FAILED, "check disable_reuse_memory failed!"); + + // check optypelist_for_implmode and op_select_implmode + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + ge::CheckImplmodeParamValid(FLAGS_optypelist_for_implmode, + FLAGS_op_select_implmode) != ge::SUCCESS, + ret = ge::FAILED, "check optypelist_for_implmode and op_select_implmode failed!"); + // No output file information passed in + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_mode == GEN_OM_MODEL && FLAGS_output == "", + ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"output"}); + ret = ge::FAILED, "Input parameter[--output]'s value is empty!"); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + CheckFrameWorkValid(FLAGS_framework, FLAGS_weight) != ge::SUCCESS, + ret = ge::FAILED, + "CheckFrameWorkValid failed"); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + ge::CheckDynamicInputParamValid(FLAGS_dynamic_batch_size, FLAGS_dynamic_image_size, + FLAGS_dynamic_dims, FLAGS_input_shape, + FLAGS_input_format, is_dynamic_input) != ge::SUCCESS, + ret = ge::FAILED, "check dynamic size(batch size, image size or dims) failed!"); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + !FLAGS_insert_op_conf.empty() && !FLAGS_dynamic_dims.empty(), + ErrorManager::GetInstance().ATCReportErrMessage("E10001", + {"parameter", "value", "reason"}, + {"--insert_op_conf", FLAGS_insert_op_conf, + "dynamic dims function does not support aipp"}); + ret = ge::FAILED, "dynamic dims function does not support aipp"); + +#if !defined(__ANDROID__) && !defined(ANDROID) + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!CheckEncryptModeValid(FLAGS_encrypt_mode), ret = ge::FAILED, + "encrypt_mode %d not valid!!", FLAGS_encrypt_mode); + + if (FLAGS_encrypt_mode == 0) { // Encryption mode + GELOGI("ge will run with encrypt!"); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_encrypt_key), ret = ge::FAILED, + "encrypt_key file not found!!"); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_certificate), ret = ge::FAILED, + "certificate file not found!!"); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_hardware_key), ret = ge::FAILED, + "hardware_key file not found!!"); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_private_key), ret = ge::FAILED, + "private_key file not found!!"); + } else { // No encryption + GELOGI("ge will run without encrypt!"); + } +#endif + + /** + * Check the validity of the I / O file path + */ + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_model != "" && !ge::CheckInputPathValid(FLAGS_model, "--model"), ret = ge::FAILED, + "model file %s not found!!", FLAGS_model.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_weight != "" && !ge::CheckInputPathValid(FLAGS_weight, "--weight"), + ret = ge::FAILED, "weight file %s not found!!", + FLAGS_weight.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_cal_conf != "" && !ge::CheckInputPathValid(FLAGS_cal_conf, "--cal_conf"), + ret = ge::FAILED, "calibration config file %s not found!!", + FLAGS_cal_conf.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_op_name_map != "" && !ge::CheckInputPathValid(FLAGS_op_name_map, "--op_name_map"), + ret = ge::FAILED, "op config file %s not found!!", + FLAGS_op_name_map.c_str()); + + GE_CHK_BOOL_EXEC(ge::CheckInsertOpConfParamValid(std::string(FLAGS_insert_op_conf)) == ge::SUCCESS, + ret = ge::FAILED, "check insert op conf failed!"); + + GE_CHK_BOOL_EXEC(ge::CheckCompressWeightParamValid( + FLAGS_enable_compress_weight, FLAGS_compress_weight_conf) == ge::SUCCESS, + ret = ge::FAILED, "check compress weight failed!"); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + !ge::CheckOutputPathValid(FLAGS_check_report, "--check_report"), ret = ge::FAILED, + "check_report file %s not found!!", FLAGS_check_report.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_mode == GEN_OM_MODEL && FLAGS_output != "" && + (!ge::CheckOutputPathValid(FLAGS_output, "--output") || !CheckPathWithName(FLAGS_output)), + ret = ge::FAILED, "output path %s is not valid!!", FLAGS_output.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_save_original_model != "" && + FLAGS_save_original_model != "true" && + FLAGS_save_original_model != "false", + ErrorManager::GetInstance().ATCReportErrMessage( + "E10005", {"parameter", "value"}, {"save_original_model", FLAGS_save_original_model}); + ret = ge::FAILED, + "Input parameter[--save_original_model]'s value[%s] must be true or false.", + FLAGS_save_original_model.c_str()); + GE_CHK_BOOL_EXEC(ge::CheckBufferOptimizeParamValid(FLAGS_buffer_optimize) == ge::SUCCESS, + ret = ge::FAILED, "check output type failed!"); + + GE_CHK_BOOL_EXEC( + ge::CheckEnableSingleStreamParamValid(std::string(FLAGS_enable_single_stream)) == ge::SUCCESS, + ret = ge::FAILED, "check enable single stream failed!"); + + return ret; + } + + /** + * Verifying the parameters of converting model to JSON + * 1. Fmk_model + * 2. out_json + **/ + static Status CheckConverJsonParamFlags() { + Status ret = ge::SUCCESS; + + // No model path passed in + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_om == "", + ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"om"}); + ret = ge::FAILED, + "Input parameter[--om]'s value is empty!!"); + + // JSON path not passed in + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_json == "", + ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"json"}); + ret = ge::FAILED, + "Input parameter[--json]'s value is empty!!"); + + // Check if the model path is valid + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_om != "" && !ge::CheckInputPathValid(FLAGS_om, "--om"), + ret = ge::FAILED, + "model file path is invalid: %s.", FLAGS_om.c_str()); + + // Check whether the JSON path is valid + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_json != "" && !ge::CheckOutputPathValid(FLAGS_json, "--json"), + ret = ge::FAILED, + "json file path is invalid: %s.", FLAGS_json.c_str()); + + return ret; + } + + /** + * Check command line parameters for explicit settings + * true: Explicit setup + * false: Not set up + * */ + static bool CheckFlagSet(string flag) { + gflags::CommandLineFlagInfo info; + return !(gflags::GetCommandLineFlagInfo(flag.c_str(), &info) && info.is_default); + } + + private: + static bool CheckEncryptModeValid(const int encrypt_mode) { +#if !defined(__ANDROID__) && !defined(ANDROID) + if (encrypt_mode != 0 && encrypt_mode != -1) { + DOMI_LOGE("encrypt mode must be 0 or -1"); + return false; + } +#else + if (encrypt_mode != -1) { + DOMI_LOGE("encrypt mode must be -1"); + return false; + } +#endif + + return true; + } + + static Status CheckFrameWorkValid(int framework, const std::string weight_file) { + if (framework != (int32_t)domi::CAFFE && framework != (int32_t)domi::TENSORFLOW && + framework != (int32_t)domi::MINDSPORE && framework != (int32_t)domi::ONNX) { + // No framework information was passed in or the entered framework is illegal + ErrorManager::GetInstance().ATCReportErrMessage( + "E10007", {"parameter", "support"}, + {"framework", "0(Caffe) or 1(MindSpore) or 3(TensorFlow)"}); + DOMI_LOGE("Input parameter[--framework] is mandatory and it's value must be: " + "0(Caffe) or 1(MindSpore) or 3(TensorFlow)."); + return domi::PARAM_INVALID; + } + + if ((framework == (int32_t)domi::CAFFE) && (weight_file == "")) { + ErrorManager::GetInstance().ATCReportErrMessage("E10008", {"parameter"}, {"weight"}); + DOMI_LOGE("Input parameter[--weight]'s value is empty when framework is 0(CAFFE)!"); + return domi::PARAM_INVALID; + } + + if ((framework == (int32_t)domi::TENSORFLOW) && (weight_file != "")) { + GELOGW("Parameter weight is ignored for TensorFlow."); + } + + if ((framework == (int32_t)domi::ONNX) && (weight_file != "")) { + GELOGW("Parameter weight is ignored for Onnx."); + } + return domi::SUCCESS; + } + + static bool CheckPathWithName(const std::string &fileName) { + // Determine file path length + if (fileName.size() > static_cast(PATH_MAX)) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10021", {"parameter", "size"}, {"output", std::to_string(PATH_MAX)}); + GELOGE(ge::FAILED, "Input parameter[--output]'s path is too long, it must be less than %d", PATH_MAX); + return false; + } + + // Find the last separator + int slashPosition = fileName.size() - 1; + for (; slashPosition >= 0; slashPosition--) { + if (fileName[slashPosition] == '\\' || fileName[slashPosition] == '/') { + break; + } + } + + // Failure if no filename follows the path + if (slashPosition == static_cast(fileName.size() - 1)) { + ErrorManager::GetInstance().ATCReportErrMessage("E10022", {"parameter", "filename"}, {"output", fileName}); + DOMI_LOGE("Input parameter[--output]'s path[%s] not include file name", fileName.c_str()); + return false; + } + + return true; + } +}; + +void SetDynamicInputSizeOptions() { + if (!FLAGS_dynamic_batch_size.empty()) { + domi::GetContext().dynamic_batch_size = FLAGS_dynamic_batch_size; + } + if (!FLAGS_dynamic_image_size.empty()) { + domi::GetContext().dynamic_image_size = FLAGS_dynamic_image_size; + } + if (!FLAGS_dynamic_dims.empty()) { + domi::GetContext().dynamic_dims = FLAGS_dynamic_dims; + } +} + +/// Validate the non-general scope fusion pass. +/// The parameter is set to the name of the fusion rule. +/// Multiple names can be set and separated by ",". +void SetEnableScopeFusionPasses(const std::string pass_names) { + ge::GetParserContext().enable_scope_fusion_passes = pass_names; +} + +static bool CheckInputFormat() { + if (FLAGS_input_format.empty()) { + // Set default format + if (FLAGS_framework == static_cast(domi::TENSORFLOW)) { + FLAGS_input_format = "NHWC"; + } else { + FLAGS_input_format = "NCHW"; + } + return true; + } else if ((FLAGS_framework == static_cast(domi::CAFFE))) { // caffe + if (ge::caffe_support_input_format.find(FLAGS_input_format) != ge::caffe_support_input_format.end()) { + return true; + } + // only support NCHW ND + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, {"--input_format", FLAGS_input_format, ge::kCaffeFormatSupport}); + GELOGE(ge::FAILED, + "Invalid value for --input_format[%s], %s.", FLAGS_input_format.c_str(), ge::kCaffeFormatSupport); + return false; + } else if ((FLAGS_framework == static_cast(domi::TENSORFLOW))) { // tf + if (ge::tf_support_input_format.find(FLAGS_input_format) != ge::tf_support_input_format.end()) { + return true; + } + // only support NCHW NHWC ND NCDHW NDHWC + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, {"--input_format", FLAGS_input_format, ge::kTFFormatSupport}); + GELOGE(ge::FAILED, + "Invalid value for --input_format[%s], %s.", FLAGS_input_format.c_str(), ge::kTFFormatSupport); + return false; + } else if (FLAGS_framework == static_cast(domi::ONNX)) { + if (ge::onnx_support_input_format.find(FLAGS_input_format) != ge::onnx_support_input_format.end()) { + return true; + } + // only support NCHW ND + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, {"--input_format", FLAGS_input_format, ge::kONNXFormatSupport}); + GELOGE(ge::FAILED, + "Invalid value for --input_format[%s], %s.", FLAGS_input_format.c_str(), ge::kONNXFormatSupport); + return false; + } + return true; +} + +#if !defined(__ANDROID__) && !defined(ANDROID) +static void GetCustomOpPath(std::string &customop_path) { + GELOGI("Enter get custom op path schedule"); + std::string fmk_type = ge::TypeUtils::FmkTypeToSerialString(static_cast(FLAGS_framework)); + GELOGI("Framework type is %s.", fmk_type.c_str()); + + const char *path_env = std::getenv("ASCEND_OPP_PATH"); + if (path_env != nullptr) { + std::string path = path_env; + customop_path = (path + "/framework/custom" + "/:") + (path + "/framework/built-in/" + fmk_type); + GELOGI("Get custom so path from env : %s", path_env); + return; + } + std::string path_base = ge::GELib::GetPath(); + GELOGI("path_base is %s", path_base.c_str()); + path_base = path_base.substr(0, path_base.rfind('/')); + path_base = path_base.substr(0, path_base.rfind('/') + 1); + customop_path = (path_base + "ops/framework/custom" + "/:") + (path_base + "ops/framework/built-in/" + fmk_type); + return; +} + +void GetPluginSoFileList(const string &path, vector &fileList, string &caffe_parser_path) { + // Support to split multiple so directories by ":" + GELOGI("path is %s", path.c_str()); + vector v_path = ge::StringUtils::Split(path, ':'); + for (size_t i = 0; i < v_path.size(); ++i) { + ge::FindParserSo(v_path[i], fileList, caffe_parser_path); + GELOGI("CustomOpLib full name = %s", v_path[i].c_str()); + } +} + +void LoadModelParserLib(std::string caffe_parser_path) { + if (FLAGS_framework == static_cast(domi::TENSORFLOW)) { + void *tf_handle = dlopen("libfmk_parser.so", RTLD_NOW | RTLD_GLOBAL); + if (tf_handle == nullptr) { + GELOGW("dlopen fmk library [libfmk_parser.so] failed."); + return; + } + GELOGI("plugin load libfmk_parser.so success."); + } else if (FLAGS_framework == static_cast(domi::CAFFE)) { + // What we are dealing with here is that the user modifies the caffe.proto scenario. + // If no lib_Caffe_Parser.so is found under the plugin path, use the default lib_Caffe_Parser.so path. + caffe_parser_path = caffe_parser_path.empty() ? "lib_caffe_parser.so" : caffe_parser_path; + + void *handle = dlopen(caffe_parser_path.c_str(), RTLD_NOW | RTLD_GLOBAL); + if (handle == nullptr) { + GELOGW("dlopen failed, plugin name:%s. Message(%s).", caffe_parser_path.c_str(), dlerror()); + return; + } + GELOGI("plugin load %s success.", caffe_parser_path.c_str()); + // According to the dependency, the Caffe parsing module of the framework is loaded here( libfmk_parser.so). + // (depend on the lib_caffe_parser.so) + void *fmk_handle = dlopen("libfmk_parser.so", RTLD_NOW | RTLD_GLOBAL); + if (fmk_handle == nullptr) { + GELOGW("dlopen fmk library [libfmk_parser.so] failed."); + if (dlclose(handle) != 0) { + GELOGW("dlclose lib_caffe_parser.so failed."); + } + return; + } + GELOGI("plugin load libfmk_parser.so success."); + } else if (FLAGS_framework == static_cast(domi::ONNX)) { + void *handle = dlopen("libfmk_onnx_parser.so", RTLD_NOW | RTLD_GLOBAL); + if (handle == nullptr) { + GELOGW("dlopen fmk library [libfmk_onnx_parser.so] failed."); + return; + } + GELOGI("plugin load libfmk_onnx_parser.so success."); + } else { + GELOGW("Framework:%s is not support.", + ge::TypeUtils::FmkTypeToSerialString(static_cast(FLAGS_framework)).c_str()); + return; + } + return; +} + +void LoadCustomOpLib(bool need_load_ops_plugin) { + std::string plugin_path; + GetCustomOpPath(plugin_path); + + vector fileList; + string caffe_parser_path = ""; + + // whether there are files in the plugin so path + GetPluginSoFileList(plugin_path, fileList, caffe_parser_path); + + // no file + if (fileList.empty() && caffe_parser_path.empty()) { + GELOGW("can not find any plugin file in plugin_path: %s", plugin_path.c_str()); + } + + LoadModelParserLib(caffe_parser_path); + if (!need_load_ops_plugin) { + GELOGI("No need to load ops plugin so."); + return; + } + OpRegistry::Instance()->registrationDatas.clear(); + // load other so files except lib_caffe_parser.so in the plugin so path + for (auto elem : fileList) { + ge::StringUtils::Trim(elem); + + void *handle = dlopen(elem.c_str(), RTLD_NOW | RTLD_GLOBAL); + if (handle == nullptr) { + GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), dlerror()); + } else { + GELOGI("plugin load %s success.", elem.c_str()); + } + } + + std::vector registrationDatas = OpRegistry::Instance()->registrationDatas; + for (OpRegistrationData reg_data : registrationDatas) { + if (reg_data.GetFrameworkType() == static_cast(FLAGS_framework)) { + (void)ge::OpRegistrationTbe::Instance()->Finalize(reg_data); + (void)OpRegistry::Instance()->Register(reg_data); + } + } +} + +void SaveCustomCaffeProtoPath() { + GELOGI("Enter save custom caffe proto path."); + + std::string path_base = ge::GELib::GetPath(); + GELOGI("path_base is %s", path_base.c_str()); + path_base = path_base.substr(0, path_base.rfind('/')); + path_base = path_base.substr(0, path_base.rfind('/') + 1); + ge::GetParserContext().caffe_proto_path = path_base + "include/proto/"; + + string customop_path; + const char *path_env = std::getenv("ASCEND_OPP_PATH"); + if (path_env != nullptr) { + std::string path = path_env; + customop_path = path + "/framework/custom/caffe/"; + GELOGI("Get custom proto path from env : %s", path_env); + ge::GetParserContext().custom_proto_path = customop_path; + return; + } + customop_path = path_base + "ops/framework/custom/caffe/"; + ge::GetParserContext().custom_proto_path = customop_path; + return; +} + +#endif + +Status CreateInputsForInference(const ge::Graph &graph, vector &inputs) { + auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + for (ge::NodePtr &input_node : compute_graph->GetAllNodes()) { + GE_CHECK_NOTNULL(input_node); + ge::OpDescPtr op = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(op); + if (op->GetType() == ge::DATA) { + GELOGI("Data op inputDesc size is: %zu", op->GetAllInputsDesc().size()); + ge::GeTensorDesc tensor = op->GetInputDesc(0); + string data_op_name = op->GetName(); + GELOGI("Data op name is: %s", data_op_name.c_str()); + ge::GeShape data_shape; + auto iter = domi::GetContext().input_dims.find(data_op_name); + if (iter != domi::GetContext().input_dims.end()) { + data_shape = ge::GeShape(iter->second); + GELOGI("Data op get shape from Context."); + } else { + data_shape = tensor.GetShape(); + GELOGI("Data op get shape from InputDesc in geir graph."); + } + + ge::DataType data_type = tensor.GetDataType(); + string data_type_str = ge::TypeUtils::DataTypeToSerialString(data_type); + GELOGI("Data op get data type:%s from InputDesc in geir graph.", data_type_str.c_str()); + + ge::GeTensor input_tensor; + ge::GeTensorDesc desc(data_shape, ge::Format(domi::GetContext().format), data_type); + input_tensor.SetTensorDesc(desc); + inputs.push_back(input_tensor); + } + } + GELOGI("Build ME model, inputs size is: %zu", inputs.size()); + return ge::SUCCESS; +} + +domi::Status GenerateInfershapeJson() { + if (!CheckInputFormat()) { + GELOGE(ge::FAILED, "Check input_format failed"); + return domi::FAILED; + } + Status ret = GFlagUtils::CheckDumpInfershapeJsonFlags(); + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "Check flags failed!"); + + ge::GeGenerator ge_generator; + std::map options; + ge::Status geRet = ge_generator.Initialize(options, domi::GetContext()); + if (geRet != ge::SUCCESS) { + DOMI_LOGE("GeGenerator initialize failed!"); + return domi::FAILED; + } + + ge::Graph graph; + std::map atc_params; + atc_params.insert(std::pair("input_format", FLAGS_input_format)); + ret = ParseGraph(graph, atc_params, FLAGS_om.c_str(), FLAGS_weight.c_str(), (domi::FrameworkType) FLAGS_framework, + "", FLAGS_target.c_str(), (ge::RunMode) FLAGS_mode, false); + if (ret != ge::SUCCESS) { + DOMI_LOGE("ATC Parse graph domi::FAILED"); + (void)ge_generator.Finalize(); + return domi::FAILED; + } + + geRet = ge_generator.GenerateInfershapeGraph(graph); + if (geRet != ge::SUCCESS) { + DOMI_LOGE("ATC GenerateInfershapeJson failed"); + (void)ge_generator.Finalize(); + return domi::FAILED; + } + if (DumpInfershapeJson(graph, FLAGS_json.c_str()) != SUCCESS) { + DOMI_LOGE("ATC DumpInfershapeJson failed"); + (void)ge_generator.Finalize(); + return domi::FAILED; + } + (void)ge_generator.Finalize(); + return ge::SUCCESS; +} + +static Status ConvertModelToJson(int fwk_type, const string &model_file, const string &json_file) { + Status ret = ge::SUCCESS; + if (fwk_type == -1) { + ret = ge::ConvertOmModelToJson(model_file.c_str(), json_file.c_str()); + return ret; + } + + if ((fwk_type != domi::TENSORFLOW) && (fwk_type != domi::CAFFE) && (fwk_type != domi::ONNX)) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, + {"--framework", std::to_string(fwk_type), kModelToJsonSupport}); + GELOGE(ge::FAILED, "Invalid value for --framework[%d], %s.", fwk_type, kModelToJsonSupport); + ret = ge::FAILED; + } + + if (FLAGS_dump_mode != "0" && FLAGS_dump_mode != "1") { + ErrorManager::GetInstance().ATCReportErrMessage("E10006", {"parameter"}, {"dump_mode"}); + GELOGE(ge::FAILED, "Input parameter[--dump_mode]'s value must be 1 or 0."); + ret = ge::FAILED; + } + + if (ret != ge::SUCCESS) return ret; + + // Need to save caffe.proto path + SaveCustomCaffeProtoPath(); + + if (FLAGS_dump_mode == "0") { + // Caffe or tf model to json depend on lib_caffe_parser.so or libfmk_parser.so. + LoadCustomOpLib(false); + ret = ge::ConvertFwkModelToJson((domi::FrameworkType)fwk_type, model_file.c_str(), json_file.c_str()); + } else if (FLAGS_dump_mode == "1") { + // Caffe or tf model to json depend on lib_caffe_parser.so or libfmk_parser.so and ops plugin so. + LoadCustomOpLib(true); + ret = GenerateInfershapeJson(); + } + + return ret; +} + +domi::Status GenerateModel(std::map &options, std::string output) { + ge::GeGenerator ge_generator; + ge::Status geRet = ge::SUCCESS; + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + geRet = ge::GELib::Initialize(options); + if (geRet != ge::SUCCESS) { + DOMI_LOGE("GE initialize failed!"); + return domi::FAILED; + } + } + geRet = ge_generator.Initialize(options, domi::GetContext()); + if (geRet != ge::SUCCESS) { + DOMI_LOGE("GeGenerator initialize failed!"); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + + ge::Graph graph; + std::vector inputs; + if (FLAGS_framework == domi::MINDSPORE) { + // load model from file + ge::Model load_model = ge::Model("loadmodel", "version2"); + auto ret1 = load_model.LoadFromFile(FLAGS_model); + if (ret1 != ge::GRAPH_SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage("E10041", {"parameter"}, {FLAGS_model}); + DOMI_LOGE("Load model from %s failed, please check model file or " + "input parameter[--framework] is correct", FLAGS_model.c_str()); + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + + graph = load_model.GetGraph(); + + GE_CHK_STATUS_EXEC(ge::InitDomiOmgContext(FLAGS_input_shape, FLAGS_input_format, "", is_dynamic_input), + GELOGE(ge::FAILED, "ATC Generate call InitDomiOmgContext ret fail"); + (void)ge_generator.Finalize(); (void)ge::GELib::GetInstance()->Finalize(); return domi::FAILED); + + Status ret = CreateInputsForInference(graph, inputs); + if (ret != ge::SUCCESS) { + GELOGE(ge::FAILED, "create inputs for inference failed."); + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + + } else { + std::map atc_params; + atc_params.insert(std::pair("input_shape", FLAGS_input_shape)); + atc_params.insert(std::pair("out_nodes", FLAGS_out_nodes)); + atc_params.insert(std::pair("input_format", FLAGS_input_format)); + atc_params.insert(std::pair("check_report", FLAGS_check_report)); + atc_params.insert(std::pair("input_fp16_nodes", FLAGS_input_fp16_nodes)); + atc_params.insert(std::pair("is_input_adjust_hw_layout", FLAGS_is_input_adjust_hw_layout)); + atc_params.insert(std::pair("is_output_adjust_hw_layout", FLAGS_is_output_adjust_hw_layout)); + atc_params.insert(std::pair("compress_weight_conf", FLAGS_compress_weight_conf)); + atc_params.insert(std::pair(string(ge::OUTPUT_DATATYPE), FLAGS_output_type)); + atc_params.insert(std::pair("output", output)); + + Status ret = + ParseGraph(graph, atc_params, FLAGS_model.c_str(), FLAGS_weight.c_str(), (domi::FrameworkType)FLAGS_framework, + FLAGS_op_name_map.c_str(), FLAGS_target.c_str(), (ge::RunMode)FLAGS_mode, is_dynamic_input); + + // in ONLY_PRE_CHECK mode, pre-checking report has already saved in ParseGraph + if (FLAGS_mode == ge::ONLY_PRE_CHECK) { + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + if (ret != ge::SUCCESS) { + DOMI_LOGE("ATC precheck fail."); + return domi::FAILED; + } + return domi::SUCCESS; + } + + if (ret != ge::SUCCESS) { + DOMI_LOGE("ATC Parse graph domi::FAILED"); + DOMI_LOGE("ATC Generate execute failed"); // Duplicate log. (for test case + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + if (ge::SetOutputNodeInfo(graph, FLAGS_output_type, "") != domi::SUCCESS) { + DOMI_LOGE("Set output node info fail."); + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + } + + geRet = ge_generator.GenerateOfflineModel(graph, output, inputs); + if (geRet != ge::SUCCESS) { + DOMI_LOGE("GE GenerateOfflineModel execute failed"); + DOMI_LOGE("ATC Generate execute failed"); // Duplicate log. (for test case + // checking error log) + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return ge::SUCCESS; +} + +static void SetEnvForSingleOp(std::map &options) { + string flag_on = "1"; + string flag_off = "0"; + options.emplace(ge::GE_FE_FLAG, flag_on); + options.emplace(ge::STREAM_NUM, "1"); // single op only use one stream + options.emplace(ge::RUN_FLAG, flag_off); + options.emplace(ge::OPTION_GRAPH_RUN_MODE, flag_off); + options.emplace(ge::SINGLE_OP_FLAG, flag_on); + options.emplace(ge::PRECISION_MODE, FLAGS_precision_mode); + options.emplace(ge::SOC_VERSION, FLAGS_soc_version); + options.emplace(ge::CORE_TYPE, FLAGS_core_type); + options.emplace(ge::AICORE_NUM, FLAGS_aicore_num); + options.emplace(ge::OP_SELECT_IMPL_MODE, FLAGS_op_select_implmode); + options.emplace(ge::OPTYPELIST_FOR_IMPLMODE, FLAGS_optypelist_for_implmode); + options.emplace(ge::AUTO_TUNE_MODE, FLAGS_auto_tune_mode); + options.emplace(ge::GRAPH_MEMORY_MAX_SIZE, kGraphMemoryManagerMallocMaxSize); + options.emplace(ge::OP_DEBUG_LEVEL, to_string(FLAGS_op_debug_level)); +} + +domi::Status GenerateSingleOp(const std::string& json_file_path) { + if (!FLAGS_output.empty() && !ge::CheckOutputPathValid(FLAGS_output, "--output")) { + DOMI_LOGE("output path %s is not valid!", FLAGS_output.c_str()); + return domi::FAILED; + } + // check optypelist_for_implmode and op_select_implmode + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + ge::CheckImplmodeParamValid(FLAGS_optypelist_for_implmode, FLAGS_op_select_implmode) != ge::SUCCESS, + return ge::FAILED, "check optypelist_for_implmode and op_select_implmode failed!"); + + std::map options; + // need to be changed when ge.ini plan is done + SetEnvForSingleOp(options); + + auto ret = ge::GELib::Initialize(options); + if (ret != ge::SUCCESS) { + DOMI_LOGE("GE initialize failed!"); + return domi::FAILED; + } + + ge::GeGenerator generator; + ret = generator.Initialize(options, domi::GetContext()); + if (ret != SUCCESS) { + DOMI_LOGE("GeGenerator initialize failed!"); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + + vector build_params; + if (ge::SingleOpParser::ParseSingleOpList(json_file_path, build_params) != ge::SUCCESS) { + DOMI_LOGE("parse single op json file failed"); + (void)generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + + int index = 0; + for (auto ¶m : build_params) { + string output_path; + if (!FLAGS_output.empty()) { + output_path = FLAGS_output + "/"; + } + output_path += param.file_name; + ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path); + if (ret != SUCCESS) { + DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index); + ret = domi::FAILED; + break; + } + GELOGI("Compile op success. op index = %d, output = %s", index, output_path.c_str()); + index += 1; + } + + (void)generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return ret; +} + +domi::Status GenerateOmModel() { + if (!CheckInputFormat()) { + GELOGE(ge::FAILED, "Check input_format failed"); + return domi::FAILED; + } + Status ret = GFlagUtils::CheckFlags(); + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, + "Check flags failed! Please check whether some atc params that include semicolons[;] use double " + "quotation marks (\") to enclose each argument such as out_nodes, input_shape, dynamic_image_size"); +#if !defined(__ANDROID__) && !defined(ANDROID) + // Load custom operator Library + LoadCustomOpLib(true); + + SaveCustomCaffeProtoPath(); + + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "check custom aicpu run so failed!"); +#endif + + const int f_stream_num = 1; + std::map options; + options.insert(std::pair(string(ge::FRAMEWORK_TYPE), to_string(FLAGS_framework))); + options.insert(std::pair(string(ge::STREAM_NUM), to_string(f_stream_num))); + options.insert(std::pair(string(ge::CALIBRATION_CONF_FILE), FLAGS_cal_conf)); + options.insert(std::pair(string(ge::ENCRYPT_MODE), to_string(FLAGS_encrypt_mode))); + options.insert(std::pair(string(ge::EK_FILE), FLAGS_encrypt_key)); + options.insert(std::pair(string(ge::CERT_FILE), FLAGS_certificate)); + options.insert(std::pair(string(ge::HW_KEY_FILE), FLAGS_hardware_key)); + options.insert(std::pair(string(ge::PRIVATE_KEY_FILE), FLAGS_private_key)); + options.insert(std::pair(string(ge::OUTPUT_NODE_NAME), FLAGS_out_nodes)); + options.insert(std::pair(string(ge::INSERT_OP_FILE), FLAGS_insert_op_conf)); + options.insert(std::pair(string(ge::PRECISION_MODE), FLAGS_precision_mode)); + + options.insert(std::pair(string(ge::RUN_FLAG), to_string(0))); + options.insert(std::pair(string(ge::TRAIN_FLAG), to_string(0))); + + if (!FLAGS_output_type.empty()) { + options.insert(std::pair(string(ge::OUTPUT_DATATYPE), FLAGS_output_type)); + } + + options.insert(std::pair(string(ge::OP_SELECT_IMPL_MODE), FLAGS_op_select_implmode)); + options.insert(std::pair(string(ge::OPTYPELIST_FOR_IMPLMODE), FLAGS_optypelist_for_implmode)); + + if (!FLAGS_input_fp16_nodes.empty()) { + GELOGI("FLAGS_input_fp16_nodes : %s .", FLAGS_input_fp16_nodes.c_str()); + options.insert(std::pair(ge::INPUT_FP16_NODES, FLAGS_input_fp16_nodes)); + } + + options.insert(std::pair(string(ge::AUTO_TUNE_MODE), FLAGS_auto_tune_mode)); + + options.insert( + std::pair(string(ge::OPTION_EXEC_DISABLE_REUSED_MEMORY), to_string(FLAGS_disable_reuse_memory))); + + options.insert(std::pair(string(ge::SOC_VERSION), FLAGS_soc_version)); + + options.insert(std::pair(string(ge::CORE_TYPE), FLAGS_core_type)); + + options.insert(std::pair(string(ge::AICORE_NUM), FLAGS_aicore_num)); + + options.insert(std::pair(string(ge::BUFFER_OPTIMIZE), FLAGS_buffer_optimize)); + + options.insert(std::pair(string(ge::ENABLE_SMALL_CHANNEL), FLAGS_enable_small_channel)); + + options.insert(std::pair(string(ge::FUSION_SWITCH_FILE), FLAGS_fusion_switch_file)); + + options.insert(std::pair(string(ge::ENABLE_COMPRESS_WEIGHT), + (FLAGS_enable_compress_weight == "true") ? + ge::kEnableCompressWeightTrue : ge::kEnableCompressWeightFalse)); + + options.insert(std::pair(string(ge::GRAPH_MEMORY_MAX_SIZE), kGraphMemoryManagerMallocMaxSize)); + + options.insert(std::pair(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream)); + + SetDynamicInputSizeOptions(); + + if (!FLAGS_save_original_model.empty()) { + options.insert(std::pair(string(ge::SAVE_ORIGINAL_MODEL), FLAGS_save_original_model)); + options.insert(std::pair(string(ge::ORIGINAL_MODEL_FILE), FLAGS_output + "_original.om")); + } + + options.insert(std::pair(string(ge::OP_DEBUG_LEVEL), to_string(FLAGS_op_debug_level))); + // set enable scope fusion passes + SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes); + // print atc option map + ge::PrintOptionMap(options, "atc option"); + + // When the ATC module is transferred to a model, the suffix ".om" is automatically added to the model name + FLAGS_output = FLAGS_output + ".om"; + ret = GenerateModel(options, FLAGS_output); + if (ret != domi::SUCCESS) { + return domi::FAILED; + } + + return domi::SUCCESS; +} + +domi::Status ConvertModelToJson() { + Status ret = GFlagUtils::CheckConverJsonParamFlags(); + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "Check convert json params flags failed!"); + + ret = ConvertModelToJson(FLAGS_framework, FLAGS_om, FLAGS_json); + + GE_IF_BOOL_EXEC(ret != domi::SUCCESS, return domi::FAILED); + return domi::SUCCESS; +} + +bool CheckRet(domi::Status ret) { + if (ret != domi::SUCCESS) { + if (FLAGS_mode == ONLY_PRE_CHECK) { + GELOGW("ATC precheck failed."); + } else if (FLAGS_mode == GEN_OM_MODEL) { + GELOGW("ATC generate offline model failed."); + } else if (FLAGS_mode == MODEL_TO_JSON) { + GELOGW("ATC convert model to json file failed."); + } else if (FLAGS_mode == PBTXT_TO_JSON) { + GELOGW("ATC convert pbtxt to json file failed."); + } else { + return false; + } + return false; + } + + if (FLAGS_mode == ONLY_PRE_CHECK) { + GELOGI("ATC precheck success."); + } else if (FLAGS_mode == GEN_OM_MODEL) { + GELOGI("ATC generate offline model success."); + } else if (FLAGS_mode == MODEL_TO_JSON) { + GELOGI("ATC convert model to json file success."); + } else if (FLAGS_mode == PBTXT_TO_JSON) { + GELOGI("ATC convert pbtxt to json file success."); + } + return true; +} + +domi::Status ConvertPbtxtToJson() { + Status ret = GFlagUtils::CheckConverJsonParamFlags(); + if (ret != domi::SUCCESS) { + GELOGE(ge::FAILED, "Check convert json params flags failed!"); + return domi::FAILED; + } + + ret = ge::ConvertPbtxtToJson(FLAGS_om.c_str(), FLAGS_json.c_str()); + if (ret != domi::SUCCESS) { + GELOGE(ge::FAILED, "ConvertPbtxtToJson fail."); + return domi::FAILED; + } + + return domi::SUCCESS; +} + +int init(int argc, char* argv[]) { + GFlagUtils::InitGFlag(argc, argv); + // set log level + int ret = -1; + const std::set log_level = {"null", "debug", "info", "warning", "error"}; + if (log_level.count(FLAGS_log) == 0) { + std::cout << "E10010: invalid value for --log:" << FLAGS_log + <<", only support debug, info, warning, error, null"<< std::endl; + return ret; + } + + ret = ge::CheckLogParamValidAndSetLogLevel(FLAGS_log); + if (ret != 0) { + return ret; + } + + std::string path_base = ge::GELib::GetPath(); + ret = ErrorManager::GetInstance().Init(path_base); + if (ret != 0) { + DOMI_LOGE("ErrorManager init fail !"); + return ret; + } + + return 0; +} + +long GetMemInfo(const std::string &key) { + std::string file_path = "/proc/meminfo"; + std::ifstream fs(file_path, std::ifstream::in); + if (!fs.is_open()) { + GELOGW("Can not open %s .", file_path.c_str()); + return 0; + } + std::string line; + while (getline(fs, line)) { // line not with \n + if (line.find(key) != std::string::npos) { + GELOGI("Find mem [%s] info line [%s]", key.c_str(), line.c_str()); + fs.close(); + size_t pos = line.find(":"); + if (pos == std::string::npos) { + return 0; + } + std::string current_mem_info_str = line.substr(pos + 1); + ge::StringUtils::Trim(current_mem_info_str); + GELOGI("Find mem [%s] info [%s].", key.c_str(), current_mem_info_str.c_str()); + return stol(current_mem_info_str); + } + } + fs.close(); // close the file + return 0; +} + +bool CheckMemInfo() { + if (FLAGS_auto_tune_mode.empty()) { + return true; + } + // only check current available mem when auto_tune_mode is set. + long current_mem_available = GetMemInfo("MemAvailable"); + GELOGI("Get mem available [%lu].", current_mem_available); + std::cout << "Current available mem is " << current_mem_available << "kB." << std::endl; + if ((current_mem_available > 0) && (current_mem_available < kMinAvailableMem)) { + GELOGE(ge::PARAM_INVALID, "Current available mem [%lu] can not be smaller than [%lu] .", + current_mem_available, kMinAvailableMem); + ErrorManager::GetInstance().ATCReportErrMessage("E10044", {"value", "min_value"}, + {to_string(current_mem_available), to_string(kMinAvailableMem)}); + return false; + } + return true; +} + +int main(int argc, char* argv[]) { + Status ret = domi::SUCCESS; + std::cout << "ATC start working now, please wait for a moment." << std::endl; + + // Initialize + if (init(argc, argv) != 0) { + std::cout << "ATC run failed, Please check the detail log, Try \'atc --help\' for more information" << std::endl; + return -1; + } + do { + if (!CheckMemInfo()) { + GELOGE(ge::PARAM_INVALID, "Current available mem is too small"); + ret = domi::FAILED; + break; + } + if (!FLAGS_singleop.empty()) { + ret = GenerateSingleOp(FLAGS_singleop); + break; + } + + // default mode(mode:0), Open source model to model + if (GEN_OM_MODEL == FLAGS_mode || ONLY_PRE_CHECK == FLAGS_mode) { + GE_IF_BOOL_EXEC(GenerateOmModel() != domi::SUCCESS, ret = domi::FAILED; break); + } else if (MODEL_TO_JSON == FLAGS_mode) { // Mode 1, transfer model to JSON + GE_CHK_BOOL_EXEC(ConvertModelToJson() == domi::SUCCESS, ret = domi::FAILED; + break, "ATC ConvertJson execute failed!!"); + } else if (FLAGS_mode == ge::RunMode::PBTXT_TO_JSON) { + GE_CHK_BOOL_EXEC(ConvertPbtxtToJson() == domi::SUCCESS, ret = domi::FAILED; + break, "ATC convert pbtxt to json execute failed!!"); + } else { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, {"--mode", std::to_string(FLAGS_mode), kModeSupport}); + GELOGE(ge::PARAM_INVALID, "Invalid value for --mode[%d], %s.", FLAGS_mode, kModeSupport); + ret = domi::FAILED; + break; + } + } while (0); + + if (!CheckRet(ret)) { + std::cout << "ATC run failed, Please check the detail log, Try \'atc --help\' for more information" << std::endl; + int result = ErrorManager::GetInstance().OutputErrMessage(STDOUT_FILENO); + if (result != 0) { + DOMI_LOGE("ErrorManager outputErrMessage fail !"); + } + GELOGI("Current mem available mem is [%lu]", GetMemInfo("MemAvailable")); + return ret; + } else { + std::cout << "ATC run success, welcome to the next use." << std::endl; + (void)ErrorManager::GetInstance().OutputMessage(STDOUT_FILENO); + return 0; + } +} /*lint +e530*/ diff --git a/ge/offline/module.mk b/ge/offline/module.mk new file mode 100755 index 00000000..12b70260 --- /dev/null +++ b/ge/offline/module.mk @@ -0,0 +1,52 @@ + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := atc + +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 + +LOCAL_SRC_FILES := \ + main.cc \ + single_op_parser.cc \ + ../session/omg.cc \ + ../ir_build/atc_ir_common.cc \ + +LOCAL_C_INCLUDES := \ + $(LOCAL_PATH)/../ ./ \ + $(TOPDIR)inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/external/graph \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc/framework/domi \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)inc/common/util \ + third_party/json/include \ + third_party/gflags/include \ + third_party/protobuf/include \ + proto/om.proto \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/insert_op.proto \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libge_common \ + libprotobuf \ + libslog \ + libgraph \ + libregister \ + liberror_manager \ + libge_compiler \ + libruntime_compile \ + libparser_common \ + liberror_manager \ + +LOCAL_STATIC_LIBRARIES := libgflags + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_HOST_EXECUTABLE) + diff --git a/ge/offline/proto/ge_ir.proto b/ge/offline/proto/ge_ir.proto new file mode 120000 index 00000000..f60a0f89 --- /dev/null +++ b/ge/offline/proto/ge_ir.proto @@ -0,0 +1 @@ +../../../../inc/common/proto/ge_ir.proto \ No newline at end of file diff --git a/ge/offline/proto/insert_op.proto b/ge/offline/proto/insert_op.proto new file mode 120000 index 00000000..27b233e5 --- /dev/null +++ b/ge/offline/proto/insert_op.proto @@ -0,0 +1 @@ +../../../../inc/common/proto/insert_op.proto \ No newline at end of file diff --git a/ge/offline/proto/om.proto b/ge/offline/proto/om.proto new file mode 120000 index 00000000..91c581bb --- /dev/null +++ b/ge/offline/proto/om.proto @@ -0,0 +1 @@ +../../../../inc/common/proto/om.proto \ No newline at end of file diff --git a/ge/offline/proto/task.proto b/ge/offline/proto/task.proto new file mode 120000 index 00000000..36ae4847 --- /dev/null +++ b/ge/offline/proto/task.proto @@ -0,0 +1 @@ +../../proto/task.proto \ No newline at end of file diff --git a/ge/offline/single_op_parser.cc b/ge/offline/single_op_parser.cc new file mode 100755 index 00000000..77b353e8 --- /dev/null +++ b/ge/offline/single_op_parser.cc @@ -0,0 +1,448 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "single_op_parser.h" + +#include +#include +#include +#include + +#include + +#include "framework/common/debug/ge_log.h" +#include "common/util/error_manager/error_manager.h" +#include "common/ge_inner_error_codes.h" +#include "framework/common/util.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/operator_factory_impl.h" + +using Json = nlohmann::json; +using std::string; +using std::vector; +using std::map; + +namespace ge { +namespace { +constexpr char const *kKeyOp = "op"; +constexpr char const *kKeyInputDesc = "input_desc"; +constexpr char const *kKeyOutputDesc = "output_desc"; +constexpr char const *kKeyAttr = "attr"; +constexpr char const *kKeyName = "name"; +constexpr char const *kKeyType = "type"; +constexpr char const *kKeyShape = "shape"; +constexpr char const *kKeyShapeRange = "shape_range"; +constexpr char const *kKeyValue = "value"; +constexpr char const *kKeyFormat = "format"; +constexpr char const *kFileSuffix = ".om"; +constexpr int kDumpJsonIndent = 2; +constexpr int kShapeRangePairSize = 2; +constexpr int kShapeRangeLow = 0; +constexpr int kShapeRangeHigh = 1; + +map kAttrTypeDict = { + {"bool", GeAttrValue::VT_BOOL}, + {"int", GeAttrValue::VT_INT}, + {"float", GeAttrValue::VT_FLOAT}, + {"string", GeAttrValue::VT_STRING}, + {"list_bool", GeAttrValue::VT_LIST_BOOL}, + {"list_int", GeAttrValue::VT_LIST_INT}, + {"list_float", GeAttrValue::VT_LIST_FLOAT}, + {"list_string", GeAttrValue::VT_LIST_STRING}, + {"list_list_int", GeAttrValue::VT_LIST_LIST_INT}, + {"data_type", GeAttrValue::VT_DATA_TYPE}, +}; + +map kDataTypeDict = { + {"bool", DT_BOOL}, + {"int8", DT_INT8}, + {"uint8", DT_UINT8}, + {"int16", DT_INT16}, + {"uint16", DT_UINT16}, + {"int32", DT_INT32}, + {"uint32", DT_UINT32}, + {"int64", DT_INT64}, + {"uint64", DT_UINT64}, + {"float16", DT_FLOAT16}, + {"half", DT_FLOAT16}, + {"fp16", DT_FLOAT16}, + {"float", DT_FLOAT}, + {"float32", DT_FLOAT}, + {"double", DT_DOUBLE}, +}; + +map kFormatDict = { + {"nchw", FORMAT_NCHW}, + {"nhwc", FORMAT_NHWC}, + {"nd", FORMAT_ND}, + {"fractal_nz", FORMAT_FRACTAL_NZ}, + {"fractal_z", FORMAT_FRACTAL_Z}, + {"nc1hwc0", FORMAT_NC1HWC0}, +}; +} + +template +void SetAttrValue(const Json &j, SingleOpAttr &attr) { + attr.value.SetValue(j.at(kKeyValue).get()); +} + +template +T GetValue(const map &dict, string &key, T default_val) { + transform(key.begin(), key.end(), key.begin(), ::tolower); + auto it = dict.find(key); + if (it == dict.end()) { + return default_val; + } + + return it->second; +} + +void from_json(const Json &j, SingleOpTensorDesc &desc) { + desc.dims = j.at(kKeyShape).get>(); + auto it = j.find(kKeyShapeRange); + if (it != j.end()) { + desc.dim_ranges = j.at(kKeyShapeRange).get>>(); + } + string format_str = j.at(kKeyFormat).get(); + string type_str = j.at(kKeyType).get(); + desc.format = GetValue(kFormatDict, format_str, FORMAT_RESERVED); + desc.type = GetValue(kDataTypeDict, type_str, DT_UNDEFINED); + auto tensor_name = j.find(kKeyName); + if (tensor_name != j.end()) { + desc.name = tensor_name->get(); + } +} + +void from_json(const Json &j, SingleOpAttr &attr) { + attr.name = j.at(kKeyName).get(); + attr.type = j.at(kKeyType).get(); + auto it = kAttrTypeDict.find(attr.type); + if (it == kAttrTypeDict.end()) { + GELOGE(UNSUPPORTED, "Parse attr[%s] failed. Unsupported type: %s", attr.name.c_str(), attr.type.c_str()); + return; + } + + switch (it->second) { + case GeAttrValue::VT_BOOL: + SetAttrValue(j, attr); + break; + case GeAttrValue::VT_INT: + SetAttrValue(j, attr); + break; + case GeAttrValue::VT_FLOAT: + SetAttrValue(j, attr); + break; + case GeAttrValue::VT_STRING: + SetAttrValue(j, attr); + break; + case GeAttrValue::VT_LIST_BOOL: + SetAttrValue>(j, attr); + break; + case GeAttrValue::VT_LIST_INT: + SetAttrValue>(j, attr); + break; + case GeAttrValue::VT_LIST_FLOAT: + SetAttrValue>(j, attr); + break; + case GeAttrValue::VT_LIST_STRING: + SetAttrValue>(j, attr); + break; + case GeAttrValue::VT_LIST_LIST_INT: + SetAttrValue>>(j, attr); + break; + case GeAttrValue::VT_DATA_TYPE: + SetAttrValue(j, attr); + break; + default: + GELOGE(UNSUPPORTED, "Parse attr[%s] failed. Unsupported type: %s", attr.name.c_str(), attr.type.c_str()); + break; + } +} + +void from_json(const Json &j, SingleOpDesc &desc) { + desc.op = j.at(kKeyOp).get(); + + auto input_desc = j.find(kKeyInputDesc); + if (input_desc != j.end()) { + desc.input_desc = input_desc->get>(); + } + + auto output_desc = j.find(kKeyOutputDesc); + if (output_desc != j.end()) { + desc.output_desc = output_desc->get>(); + } + + auto attr_field = j.find(kKeyAttr); + if (attr_field != j.end()) { + desc.attrs = attr_field->get>(); + } +} + +Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) { + std::string real_path = RealPath(file.c_str()); + if (real_path.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10023", {"value"}, {file}); + GELOGE(FAILED, "Input parameter[--singleop]'s value[%s] is not a valid path.", file.c_str()); + return INTERNAL_ERROR; + } + + std::ifstream ifs(real_path); + if (!ifs.is_open()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10024", {"value"}, {file}); + GELOGE(FAILED, "Open file[%s] provided in input parameter[--singleop] failed.", file.c_str()); + return FAILED; + } + try { + ifs >> json_obj; + } catch (const std::exception &e) { + ErrorManager::GetInstance().ATCReportErrMessage("E10025", {"realpath", "errmsg"}, {real_path, e.what()}); + GELOGE(PARAM_INVALID, "Parse file[%s] provided in input parameter[--singleop] failed, exception = %s.", + real_path.c_str(), e.what()); + return PARAM_INVALID; + } + + ifs.close(); + return SUCCESS; +} + +bool SingleOpParser::Validate(const SingleOpDesc &op_desc) { + if (op_desc.op.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10026"); + GELOGE(PARAM_INVALID, "Op name is empty"); + return false; + } + + int index = 0; + for (auto &tensor_desc : op_desc.input_desc) { + if (tensor_desc.type == DT_UNDEFINED) { + ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "index"}, {"input", std::to_string(index)}); + GELOGE(false, "Input's dataType is invalid when the index is %d", index); + return false; + } + + if (tensor_desc.format == FORMAT_RESERVED) { + ErrorManager::GetInstance().ATCReportErrMessage("E10028", {"input", "index"}, {"input", std::to_string(index)}); + GELOGE(PARAM_INVALID, "Input's format is invalid when the index is %d", index); + return false; + } + ++index; + } + + index = 0; + for (auto &tensor_desc : op_desc.output_desc) { + if (tensor_desc.type == DT_UNDEFINED) { + ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "index"}, {"output", std::to_string(index)}); + GELOGE(PARAM_INVALID, "Output's dataType is invalid when the index is %d", index); + return false; + } + + if (tensor_desc.format == FORMAT_RESERVED) { + ErrorManager::GetInstance().ATCReportErrMessage("E10028", {"input", "index"}, {"output", std::to_string(index)}); + GELOGE(PARAM_INVALID, "Output's format is invalid when the index is %d", index); + return false; + } + ++index; + } + + for (auto &attr : op_desc.attrs) { + if (attr.name.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10029"); + GELOGE(PARAM_INVALID, "attr name is empty"); + return false; + } + + if (attr.value.IsEmpty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10030", {"attrname"}, {attr.name}); + GELOGE(PARAM_INVALID, "Parse attr \"%s\" failed. ", attr.name.c_str()); + return false; + } + } + + return true; +} + +std::unique_ptr SingleOpParser::CreateOpDesc(const string &op_type) { + return std::unique_ptr(new(std::nothrow) OpDesc(op_type, op_type)); +} + +Status SingleOpParser::ConvertToBuildParam(int index, + const SingleOpDesc &single_op_desc, + SingleOpBuildParam &build_param) { + auto op_desc = CreateOpDesc(single_op_desc.op); + if (op_desc == nullptr) { + GELOGE(MEMALLOC_FAILED, "Failed to create instance of opDesc"); + return MEMALLOC_FAILED; + } + + std::stringstream file_name; + file_name << index; + file_name << "_" << single_op_desc.op; + for (auto &desc : single_op_desc.input_desc) { + file_name << "_" << desc.type << "_" << desc.format; + for (auto dim : desc.dims) { + file_name << "_" << dim; + } + GeTensorDesc ge_tensor_desc(GeShape(desc.dims), + desc.format, + desc.type); + ge_tensor_desc.SetOriginFormat(desc.format); + GE_CHK_STATUS_RET_NOLOG(SetShapeRange(desc, ge_tensor_desc)); + TensorUtils::SetRealDimCnt(ge_tensor_desc, desc.dims.size()); + TensorUtils::SetInputTensor(ge_tensor_desc, true); + TensorUtils::SetOutputTensor(ge_tensor_desc, false); + if (desc.name.empty()) { + op_desc->AddInputDesc(ge_tensor_desc); + } else { + op_desc->AddInputDesc(desc.name, ge_tensor_desc); + } + build_param.inputs.emplace_back(ge_tensor_desc); + } + + for (auto &desc : single_op_desc.output_desc) { + file_name << "_" << desc.type << "_" << desc.format; + for (auto dim : desc.dims) { + file_name << "_" << dim; + } + + GeTensorDesc ge_tensor_desc(GeShape(desc.dims), + desc.format, + desc.type); + ge_tensor_desc.SetOriginFormat(desc.format); + GE_CHK_STATUS_RET_NOLOG(SetShapeRange(desc, ge_tensor_desc)); + TensorUtils::SetRealDimCnt(ge_tensor_desc, desc.dims.size()); + TensorUtils::SetInputTensor(ge_tensor_desc, false); + TensorUtils::SetOutputTensor(ge_tensor_desc, true); + op_desc->AddOutputDesc(ge_tensor_desc); + build_param.outputs.emplace_back(ge_tensor_desc); + } + + for (const auto &attr : single_op_desc.attrs) { + op_desc->SetAttr(attr.name, attr.value); + } + + if (VerifyOpInputOutputSizeByIr(*op_desc) != SUCCESS) { + GELOGE(PARAM_INVALID, "Verify op [%s] input or output size failed.", op_desc->GetType().c_str()); + return PARAM_INVALID; + } + + file_name << kFileSuffix; + build_param.file_name = file_name.str(); + build_param.op_desc.reset(op_desc.release()); + return SUCCESS; +} + +Status SingleOpParser::VerifyOpInputOutputSizeByIr(const OpDesc ¤t_op_desc) { + ge::Operator operator_ir = ge::OperatorFactory::CreateOperator("tmp_operator", current_op_desc.GetType()); + if (!operator_ir.IsEmpty()) { + auto opdesc_ir = ge::OpDescUtils::GetOpDescFromOperator(operator_ir); + GE_CHECK_NOTNULL(opdesc_ir); + size_t current_opdesc_inputs_num = current_op_desc.GetInputsSize(); + size_t ir_opdesc_inputs_num = opdesc_ir->GetInputsSize(); + if (current_opdesc_inputs_num < ir_opdesc_inputs_num) { + string reason = "is smaller than the ir needed input size " + std::to_string(ir_opdesc_inputs_num); + ErrorManager::GetInstance().ATCReportErrMessage("E19014", {"opname", "value", "reason"}, + {current_op_desc.GetName(), "input size " + std::to_string(current_opdesc_inputs_num), reason}); + GELOGE(PARAM_INVALID, "This op [%s] input size %zu is smaller than the ir needed input size %zu", + current_op_desc.GetName().c_str(), current_opdesc_inputs_num, ir_opdesc_inputs_num); + return PARAM_INVALID; + } + size_t current_opdesc_outputs_num = current_op_desc.GetOutputsSize(); + size_t ir_opdesc_outputs_num = opdesc_ir->GetOutputsSize(); + if (current_opdesc_outputs_num < ir_opdesc_outputs_num) { + string reason = "is smaller than the ir needed output size " + std::to_string(ir_opdesc_outputs_num); + ErrorManager::GetInstance().ATCReportErrMessage("E19014", {"opname", "value", "reason"}, + {current_op_desc.GetName(), "output size " + std::to_string(current_opdesc_outputs_num), reason}); + GELOGE(PARAM_INVALID, "This op [%s] output size %zu is smaller than the ir needed output size %zu", + current_op_desc.GetName().c_str(), current_opdesc_outputs_num, ir_opdesc_outputs_num); + return PARAM_INVALID; + } + } + return SUCCESS; +} + +Status SingleOpParser::SetShapeRange(const SingleOpTensorDesc &tensor_desc, GeTensorDesc &ge_tensor_desc) { + if (tensor_desc.dim_ranges.empty()) { + return SUCCESS; + } + + std::vector> shape_range; + size_t range_index = 0; + for (auto dim : tensor_desc.dims) { + if (dim >= 0) { + shape_range.emplace_back(dim, dim); + GELOGD("Adding shape range: [%ld, %ld]", dim, dim); + } else { + if (range_index >= tensor_desc.dim_ranges.size()) { + GELOGE(PARAM_INVALID, "The number of shape_range mismatches that of unknown dims."); + return PARAM_INVALID; + } + + auto &range = tensor_desc.dim_ranges[range_index]; + if (range.size() != kShapeRangePairSize) { + GELOGE(PARAM_INVALID, "Invalid shape range entry. index = %zu, size = %zu", range_index, range.size()); + return PARAM_INVALID; + } + + shape_range.emplace_back(range[kShapeRangeLow], range[kShapeRangeHigh]); + GELOGD("Adding shape range: [%ld, %ld]", range[kShapeRangeLow], range[kShapeRangeHigh]); + ++range_index; + } + } + + ge_tensor_desc.SetShapeRange(shape_range); + return SUCCESS; +} + +Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector &op_list) { + int index = 0; + try { + Json single_op_list_json; + auto ret = ReadJsonFile(file, single_op_list_json); + if (ret != SUCCESS) { + return ret; + } + + for (const Json &single_op_json : single_op_list_json) { + SingleOpDesc single_op_desc; + GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str()); + single_op_desc = single_op_json; + if (!Validate(single_op_desc)) { + GELOGE(PARAM_INVALID, "Validate the index[%d] of op failed when read json file[%s].", index, file.c_str()); + return PARAM_INVALID; + } + + SingleOpBuildParam param; + ret = ConvertToBuildParam(index, single_op_desc, param); + if (ret != SUCCESS) { + return ret; + } + + op_list.emplace_back(param); + GELOGI("Parse the index[%d] of op success", index); + index += 1; + } + } catch (const nlohmann::json::exception &e) { + ErrorManager::GetInstance().ATCReportErrMessage("E10032", {"index", "jsonfile", "exception"}, + {std::to_string(index), file, e.what()}); + GELOGE(PARAM_INVALID, "Parse the index[%d] of op failed when read json file[%s], exception %s", + index, file.c_str(), e.what()); + return PARAM_INVALID; + } + + return SUCCESS; +} +} // namespace ge + diff --git a/ge/offline/single_op_parser.h b/ge/offline/single_op_parser.h new file mode 100755 index 00000000..8e5082d6 --- /dev/null +++ b/ge/offline/single_op_parser.h @@ -0,0 +1,78 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ACL_TOOLS_COMPILE_PARSER_H +#define ACL_TOOLS_COMPILE_PARSER_H + +#include +#include + +#include + +#include "ge/ge_api_error_codes.h" +#include "graph/types.h" +#include "graph/ge_attr_value.h" +#include "graph/op_desc.h" + +namespace ge { +struct SingleOpTensorDesc { + std::string name; + std::vector dims; + std::vector> dim_ranges; + ge::Format format = ge::FORMAT_RESERVED; + ge::DataType type = ge::DT_UNDEFINED; +}; + +struct SingleOpAttr { + std::string name; + std::string type; + ge::GeAttrValue value; +}; + +struct SingleOpDesc { + std::string op; + std::vector input_desc; + std::vector output_desc; + std::vector attrs; +}; + +struct SingleOpBuildParam { + ge::OpDescPtr op_desc; + std::vector inputs; + std::vector outputs; + std::string file_name; +}; + +void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc); + +void from_json(const nlohmann::json &json, SingleOpAttr &desc); + +void from_json(const nlohmann::json &json, SingleOpDesc &desc); + +class SingleOpParser { + public: + static Status ParseSingleOpList(const std::string &file, std::vector &op_list); + + private: + static Status ReadJsonFile(const std::string &file, nlohmann::json &json_obj); + static bool Validate(const SingleOpDesc &op_desc); + static std::unique_ptr CreateOpDesc(const std::string &op_type); + static Status ConvertToBuildParam(int index, const SingleOpDesc &single_op_desc, SingleOpBuildParam &build_param); + static Status VerifyOpInputOutputSizeByIr(const OpDesc ¤t_op_desc); + static Status SetShapeRange(const SingleOpTensorDesc &tensor_desc, GeTensorDesc &ge_tensor_desc); +}; +} // namespace ge + +#endif // ACL_TOOLS_COMPILE_PARSER_H diff --git a/ge/opskernel_manager/ops_kernel_manager.cc b/ge/opskernel_manager/ops_kernel_manager.cc index 51e8f438..e810b1de 100644 --- a/ge/opskernel_manager/ops_kernel_manager.cc +++ b/ge/opskernel_manager/ops_kernel_manager.cc @@ -89,12 +89,12 @@ Status OpsKernelManager::Initialize(const map &options_const) { return GE_OPS_GET_NO_VALID_SO; } Status rst1 = - plugin_manager_.InvokeAll &>(kGetOpsKernelInfoStores, ops_kernel_store_); + plugin_manager_.InvokeAll &>(kGetOpsKernelInfoStores, ops_kernel_store_); if (rst1 != SUCCESS) { GELOGW("Initialize OpsKernelInfo failed."); } Status rst2 = - plugin_manager_.InvokeAll &>(kGetGraphOptimizerObjs, graph_optimizers_); + plugin_manager_.InvokeAll &>(kGetGraphOptimizerObjs, graph_optimizers_); if (rst2 != SUCCESS) { GELOGW("Initialize GraphOptimizerObjs failed."); } @@ -125,7 +125,7 @@ Status OpsKernelManager::Initialize(const map &options_const) { } } -void OpsKernelManager::GetExternalEnginePath(std::string &extern_engine_path, const std::map &options) { +void OpsKernelManager::GetExternalEnginePath(std::string &extern_engine_path, const std::map& options) { GELOGI("Enter get external engine so path schedule"); const char *path_env = std::getenv("ASCEND_ENGINE_PATH"); if (path_env != nullptr) { @@ -137,8 +137,8 @@ void OpsKernelManager::GetExternalEnginePath(std::string &extern_engine_path, co std::string so_path = "plugin/opskernel/"; std::string path = path_base + so_path; extern_engine_path = (path + "libfe.so" + ":") + (path + "libge_local_engine.so" + ":") + - (path + "librts_engine.so" + ":") + (path + "libaicpu_engine.so" + ":") + - (path + "libhost_cpu_engine.so" + ":"); + (path + "librts_engine.so" + ":") + (path + "libaicpu_ascend_engine.so" + ":") + + (path + "libhost_cpu_engine.so" + ":") + (path + "libaicpu_tf_engine.so" + ":"); auto iter = options.find(OPTION_EXEC_HCCL_FLAG); if (iter == options.end() || iter->second != "0") { extern_engine_path += (path_base + "libhcom_graph_adaptor.so"); diff --git a/ge/opskernel_manager/ops_kernel_manager.h b/ge/opskernel_manager/ops_kernel_manager.h index a5d4d85c..b34c483e 100644 --- a/ge/opskernel_manager/ops_kernel_manager.h +++ b/ge/opskernel_manager/ops_kernel_manager.h @@ -34,8 +34,8 @@ #include "ge/ge_api_types.h" #include "runtime/base.h" -using std::map; using std::string; +using std::map; using std::vector; namespace ge { @@ -89,7 +89,7 @@ class OpsKernelManager { Status CheckPluginPtr() const; - void GetExternalEnginePath(std::string &path, const std::map &options); + void GetExternalEnginePath(std::string &path, const std::map& options); void InitOpsKernelInfo(); @@ -99,7 +99,7 @@ class OpsKernelManager { Status ParsePluginOptions(const map &options, const string &plugin_name, bool &enable_flag); - Status LoadGEGraphOptimizer(map &graphOptimizer); + Status LoadGEGraphOptimizer(map& graphOptimizer); Status InitGraphOptimizerPriority(); diff --git a/ge/opskernel_manager/optimizer_priority.pbtxt b/ge/opskernel_manager/optimizer_priority.pbtxt index 9f8a03fb..1c9522c9 100755 --- a/ge/opskernel_manager/optimizer_priority.pbtxt +++ b/ge/opskernel_manager/optimizer_priority.pbtxt @@ -1 +1 @@ -optimizer:["aicpu_original_optimizer","AIcoreEngine","VectorEngine","aicpu_optimizer","hccl_graph_optimizer", "hvd_graph_optimizer", "DNN_VM_RTS_GRAPH_OPTIMIZER_STORE"] \ No newline at end of file +optimizer:["aicpu_tf_optimizer","AIcoreEngine","VectorEngine","aicpu_ascend_optimizer","hccl_graph_optimizer", "hvd_graph_optimizer", "DNN_VM_RTS_GRAPH_OPTIMIZER_STORE"] diff --git a/ge/plugin/engine/CMakeLists.txt b/ge/plugin/engine/CMakeLists.txt index f19d077c..751da08e 100644 --- a/ge/plugin/engine/CMakeLists.txt +++ b/ge/plugin/engine/CMakeLists.txt @@ -1,47 +1,47 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ +set(SRC_LIST + "dnnengines.cc" + "engine_manage.cc" +) -# libengine.so -file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "*.cc" - ) +############ libengine.so ############ +add_library(engine SHARED ${SRC_LIST}) -# include directories -include_directories(${CMAKE_CURRENT_LIST_DIR}) -include_directories(${GE_SOURCE_DIR}) -#include_directories(${GE_SOURCE_DIR}/src) -include_directories(${GE_SOURCE_DIR}/ge) -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/framework) -include_directories(${GE_SOURCE_DIR}/inc/framework/common) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/metadef/inc) -include_directories(${GE_SOURCE_DIR}/metadef/inc/external) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/proto/ge) -include_directories(${GE_SOURCE_DIR}/build) +target_compile_options(engine PRIVATE + -Werror +) -######### libengine.so ############# -add_library(engine SHARED ${SRC_LIST}) target_compile_definitions(engine PRIVATE - REUSE_MEMORY=1 - PLATFORM_CLOUD - PROTOBUF_INLINE_NOT_IN_HEADERS=0 - Werror) -target_link_libraries(engine - ${slog} - rt - dl) + REUSE_MEMORY=1 + PROTOBUF_INLINE_NOT_IN_HEADERS=0 +) + +target_include_directories(engine PRIVATE + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc/ + ${GE_CODE_DIR}/inc/framework + ${GE_CODE_DIR}/inc/framework/common + ${GE_CODE_DIR}/inc/external + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc +) + +target_link_libraries(engine PRIVATE + $ + -Wl,--no-as-needed + slog + -Wl,--as-needed + -lrt + -ldl +) + +############ install ############ +set(INSTALL_BASE_DIR "") +set(INSTALL_LIBRARY_DIR lib) + +install(TARGETS engine OPTIONAL + LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} +) diff --git a/ge/plugin/engine/dnnengines.cc b/ge/plugin/engine/dnnengines.cc old mode 100644 new mode 100755 index d85d1668..cf6b7517 --- a/ge/plugin/engine/dnnengines.cc +++ b/ge/plugin/engine/dnnengines.cc @@ -55,13 +55,13 @@ void VectorCoreDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs AICpuDNNEngine::AICpuDNNEngine(const std::string &engine_name) { engine_attribute_.engine_name = engine_name; - engine_attribute_.compute_cost = COST_2; + engine_attribute_.compute_cost = COST_3; engine_attribute_.runtime_type = DEVICE; engine_attribute_.engine_input_format = FORMAT_RESERVED; engine_attribute_.engine_output_format = FORMAT_RESERVED; } -AICpuDNNEngine::AICpuDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } +AICpuDNNEngine::AICpuDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } Status AICpuDNNEngine::Initialize(const std::map &options) { return SUCCESS; } @@ -69,6 +69,22 @@ Status AICpuDNNEngine::Finalize() { return SUCCESS; } void AICpuDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } +AICpuTFDNNEngine::AICpuTFDNNEngine(const std::string &engine_name) { + engine_attribute_.engine_name = engine_name; + engine_attribute_.compute_cost = COST_2; + engine_attribute_.runtime_type = DEVICE; + engine_attribute_.engine_input_format = FORMAT_RESERVED; + engine_attribute_.engine_output_format = FORMAT_RESERVED; +} + +AICpuTFDNNEngine::AICpuTFDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } + +Status AICpuTFDNNEngine::Initialize(const std::map &options) { return SUCCESS; } + +Status AICpuTFDNNEngine::Finalize() { return SUCCESS; } + +void AICpuTFDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } + GeLocalDNNEngine::GeLocalDNNEngine(const std::string &engine_name) { engine_attribute_.engine_name = engine_name; engine_attribute_.engine_input_format = FORMAT_RESERVED; diff --git a/ge/plugin/engine/dnnengines.h b/ge/plugin/engine/dnnengines.h index d776c2b9..4a2a9df5 100644 --- a/ge/plugin/engine/dnnengines.h +++ b/ge/plugin/engine/dnnengines.h @@ -55,6 +55,7 @@ class VectorCoreDNNEngine : public DNNEngine { DNNEngineAttribute engine_attribute_; }; + class AICpuDNNEngine : public DNNEngine { public: AICpuDNNEngine() = default; @@ -70,6 +71,21 @@ class AICpuDNNEngine : public DNNEngine { DNNEngineAttribute engine_attribute_; }; +class AICpuTFDNNEngine : public DNNEngine { + public: + AICpuTFDNNEngine() = default; + explicit AICpuTFDNNEngine(const std::string &engine_name); + explicit AICpuTFDNNEngine(const DNNEngineAttribute &attrs); + ~AICpuTFDNNEngine() = default; + + Status Initialize(const std::map &options); + Status Finalize(); + void GetAttributes(DNNEngineAttribute &attr) const; + + private: + DNNEngineAttribute engine_attribute_; +}; + class GeLocalDNNEngine : public DNNEngine { public: GeLocalDNNEngine() = default; @@ -86,7 +102,7 @@ class GeLocalDNNEngine : public DNNEngine { }; class HostCpuDNNEngine : public DNNEngine { - public: +public: HostCpuDNNEngine() = default; explicit HostCpuDNNEngine(const std::string &engine_name); explicit HostCpuDNNEngine(const DNNEngineAttribute &attrs); @@ -96,7 +112,7 @@ class HostCpuDNNEngine : public DNNEngine { Status Finalize(); void GetAttributes(DNNEngineAttribute &attr) const; - private: +private: DNNEngineAttribute engine_attribute_; }; diff --git a/ge/plugin/engine/engine_manage.cc b/ge/plugin/engine/engine_manage.cc index 82cd90ee..a14c92ea 100644 --- a/ge/plugin/engine/engine_manage.cc +++ b/ge/plugin/engine/engine_manage.cc @@ -89,10 +89,10 @@ void RegisterVectorEngine() { } void RegisterAiCpuEngine() { - const std::string vm_aicpu = "DNN_VM_AICPU"; + const std::string vm_aicpu = "DNN_VM_AICPU_ASCEND"; std::vector mem_type_aicpu; mem_type_aicpu.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); - DNNEngineAttribute attr_aicpu = {vm_aicpu, mem_type_aicpu, COST_2, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; + DNNEngineAttribute attr_aicpu = {vm_aicpu, mem_type_aicpu, COST_3, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; DNNEnginePtr vm_engine_ptr = MakeShared(attr_aicpu); if (vm_engine_ptr == nullptr) { GELOGE(ge::FAILED, "make vm_engine_ptr failed"); @@ -103,6 +103,21 @@ void RegisterAiCpuEngine() { } } +void RegisterAiCpuTFEngine() { + const std::string vm_aicpu_tf = "DNN_VM_AICPU"; + std::vector mem_type_aicpu_tf; + mem_type_aicpu_tf.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); + DNNEngineAttribute attr_aicpu_tf = {vm_aicpu_tf, mem_type_aicpu_tf, COST_2, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; + DNNEnginePtr vm_engine_ptr = MakeShared(attr_aicpu_tf); + if (vm_engine_ptr == nullptr) { + GELOGE(ge::FAILED, "make vm_engine_ptr failed"); + return; + } + if (EngineManager::RegisterEngine(vm_aicpu_tf, vm_engine_ptr) != SUCCESS) { + GELOGW("register vmAicpuTFEngine failed"); + } +} + void RegisterGeLocalEngine() { const std::string vm_ge_local = "DNN_VM_GE_LOCAL"; std::vector mem_type_ge_local; @@ -168,6 +183,7 @@ void RegisterHcclEngine() { void GetDNNEngineObjs(std::map &engines) { RegisterAiCoreEngine(); RegisterVectorEngine(); + RegisterAiCpuTFEngine(); RegisterAiCpuEngine(); RegisterGeLocalEngine(); RegisterHostCpuEngine(); diff --git a/ge/plugin/engine/module.mk b/ge/plugin/engine/module.mk old mode 100644 new mode 100755 diff --git a/ge/proto/caffe/caffe.proto b/ge/proto/caffe/caffe.proto new file mode 120000 index 00000000..8630c65b --- /dev/null +++ b/ge/proto/caffe/caffe.proto @@ -0,0 +1 @@ +../../../../inc/register/proto/caffe/caffe.proto \ No newline at end of file diff --git a/ge/proto/dump_task.proto b/ge/proto/dump_task.proto new file mode 120000 index 00000000..e98adb2f --- /dev/null +++ b/ge/proto/dump_task.proto @@ -0,0 +1 @@ +../../../inc/common/proto/dump_task.proto \ No newline at end of file diff --git a/ge/proto/fusion_model.proto b/ge/proto/fusion_model.proto new file mode 100755 index 00000000..c92c5581 --- /dev/null +++ b/ge/proto/fusion_model.proto @@ -0,0 +1,21 @@ +/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Apache License for more details at + * http://www.apache.org/licenses/LICENSE-2.0 + */ +syntax = "proto3"; + +import "om.proto"; + +package domi; + +message FusionModelDef { + string version = 1; + repeated OpDef fusion_op = 2; +} \ No newline at end of file diff --git a/ge/proto/fwk_adapter.proto b/ge/proto/fwk_adapter.proto new file mode 120000 index 00000000..969b0acf --- /dev/null +++ b/ge/proto/fwk_adapter.proto @@ -0,0 +1 @@ +../../../inc/cce/fwk_adapter.proto \ No newline at end of file diff --git a/ge/proto/ge_api.proto b/ge/proto/ge_api.proto new file mode 100755 index 00000000..331c5aea --- /dev/null +++ b/ge/proto/ge_api.proto @@ -0,0 +1,88 @@ +syntax = "proto3"; +package ge.api_pb; + +import "ge_ir.proto"; + +// GE initialize +message GEInitialize { + map options = 1; +}; + +// initialize response +message GEInitializeResponse { + uint32 status = 1; + uint32 clientId = 2; +}; + +// GE finalize +message GEFinalize { + bool final = 1; + uint32 clientId = 2; +}; + +message GEFinalizeResponse { + uint32 status = 1; +}; + +// GE Session +message CreateSession{ + map options = 1; +}; + +message CreateSessionResponse { + uint32 status = 1; + uint64 sessionId = 2; +}; + +//GE AddGraph +//model serialize :: serializegraph +message SessionAddGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; + ge.proto.GraphDef graph = 3; +}; + +message SessionAddGraphResponse { + uint32 status = 1; +}; + +//GE SessionRemoveGraph +message SessionRemoveGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; +}; + +message SessionRemoveGraphResponse { + uint32 status = 1; +}; + +message SessionRunGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; + repeated ge.proto.TensorDef tensor = 3; +}; + +message SessionBuildGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; + repeated ge.proto.TensorDef tensor = 3; + string savePath = 4; +}; + +message SessionRunGraphResponse { + uint32 status = 1; + repeated ge.proto.TensorDef tensor = 2; +}; + +message SessionBuildGraphResponse { + uint32 status = 1; +}; + +message DestroySession{ + bool final = 1; + uint64 sessionId = 2; +}; + +message DestroySessionResponse { + uint32 status = 1; +}; diff --git a/ge/proto/ge_ir.proto b/ge/proto/ge_ir.proto new file mode 120000 index 00000000..a1cbb368 --- /dev/null +++ b/ge/proto/ge_ir.proto @@ -0,0 +1 @@ +../../../inc/common/proto/ge_ir.proto \ No newline at end of file diff --git a/ge/proto/insert_op.proto b/ge/proto/insert_op.proto new file mode 120000 index 00000000..bcae07d0 --- /dev/null +++ b/ge/proto/insert_op.proto @@ -0,0 +1 @@ +../../../inc/common/proto/insert_op.proto \ No newline at end of file diff --git a/ge/proto/om.proto b/ge/proto/om.proto new file mode 120000 index 00000000..f8fc294e --- /dev/null +++ b/ge/proto/om.proto @@ -0,0 +1 @@ +../../../inc/common/proto/om.proto \ No newline at end of file diff --git a/ge/proto/op_mapping_info.proto b/ge/proto/op_mapping_info.proto new file mode 120000 index 00000000..858fd65b --- /dev/null +++ b/ge/proto/op_mapping_info.proto @@ -0,0 +1 @@ +../../../inc/common/proto/op_mapping_info.proto \ No newline at end of file diff --git a/ge/proto/optimizer_priority.proto b/ge/proto/optimizer_priority.proto new file mode 100644 index 00000000..769619cf --- /dev/null +++ b/ge/proto/optimizer_priority.proto @@ -0,0 +1,7 @@ +syntax = "proto3"; +package ge.optimizers; + +// Default: GE>FE>AICPU +message Priority{ + repeated string optimizer = 1; +} \ No newline at end of file diff --git a/ge/proto/task.proto b/ge/proto/task.proto new file mode 120000 index 00000000..9f009354 --- /dev/null +++ b/ge/proto/task.proto @@ -0,0 +1 @@ +../../../inc/common/proto/task.proto \ No newline at end of file diff --git a/ge/proto/tensorflow/attr_value.proto b/ge/proto/tensorflow/attr_value.proto new file mode 120000 index 00000000..7f693fb0 --- /dev/null +++ b/ge/proto/tensorflow/attr_value.proto @@ -0,0 +1 @@ +../../../../inc/register/proto/tensorflow/attr_value.proto \ No newline at end of file diff --git a/ge/proto/tensorflow/function.proto b/ge/proto/tensorflow/function.proto new file mode 120000 index 00000000..119dbf5b --- /dev/null +++ b/ge/proto/tensorflow/function.proto @@ -0,0 +1 @@ +../../../../inc/register/proto/tensorflow/function.proto \ No newline at end of file diff --git a/ge/proto/tensorflow/graph.proto b/ge/proto/tensorflow/graph.proto new file mode 120000 index 00000000..dc7c7fd0 --- /dev/null +++ b/ge/proto/tensorflow/graph.proto @@ -0,0 +1 @@ +../../../../inc/register/proto/tensorflow/graph.proto \ No newline at end of file diff --git a/ge/proto/tensorflow/node_def.proto b/ge/proto/tensorflow/node_def.proto new file mode 120000 index 00000000..6557a8de --- /dev/null +++ b/ge/proto/tensorflow/node_def.proto @@ -0,0 +1 @@ +../../../../inc/register/proto/tensorflow/node_def.proto \ No newline at end of file diff --git a/ge/proto/tensorflow/op_def.proto b/ge/proto/tensorflow/op_def.proto new file mode 120000 index 00000000..cae13380 --- /dev/null +++ b/ge/proto/tensorflow/op_def.proto @@ -0,0 +1 @@ +../../../../inc/register/proto/tensorflow/op_def.proto \ No newline at end of file diff --git a/ge/proto/tensorflow/resource_handle.proto b/ge/proto/tensorflow/resource_handle.proto new file mode 120000 index 00000000..7a263cff --- /dev/null +++ b/ge/proto/tensorflow/resource_handle.proto @@ -0,0 +1 @@ +../../../../inc/register/proto/tensorflow/resource_handle.proto \ No newline at end of file diff --git a/ge/proto/tensorflow/tensor.proto b/ge/proto/tensorflow/tensor.proto new file mode 120000 index 00000000..d6c4a180 --- /dev/null +++ b/ge/proto/tensorflow/tensor.proto @@ -0,0 +1 @@ +../../../../inc/register/proto/tensorflow/tensor.proto \ No newline at end of file diff --git a/ge/proto/tensorflow/tensor_shape.proto b/ge/proto/tensorflow/tensor_shape.proto new file mode 120000 index 00000000..1dcf6d8a --- /dev/null +++ b/ge/proto/tensorflow/tensor_shape.proto @@ -0,0 +1 @@ +../../../../inc/register/proto/tensorflow/tensor_shape.proto \ No newline at end of file diff --git a/ge/proto/tensorflow/types.proto b/ge/proto/tensorflow/types.proto new file mode 120000 index 00000000..47f2b951 --- /dev/null +++ b/ge/proto/tensorflow/types.proto @@ -0,0 +1 @@ +../../../../inc/register/proto/tensorflow/types.proto \ No newline at end of file diff --git a/ge/proto/tensorflow/versions.proto b/ge/proto/tensorflow/versions.proto new file mode 120000 index 00000000..46967ef3 --- /dev/null +++ b/ge/proto/tensorflow/versions.proto @@ -0,0 +1 @@ +../../../../inc/register/proto/tensorflow/versions.proto \ No newline at end of file diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc old mode 100644 new mode 100755 index 3d3adfd8..3e765fa1 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,13 @@ */ #include "session/inner_session.h" + #include #include #include + +#include "analyzer/analyzer.h" +#include "adx_datadump_server.h" #include "common/dump/dump_properties.h" #include "common/util.h" #include "framework/common/debug/ge_log.h" @@ -47,7 +51,7 @@ Status CheckReuseMemoryOption(const std::map &options) { } return SUCCESS; } -} // namespace +} static std::mutex mutex_; // BuildGraph and RunGraph use bool InnerSession::is_dump_server_inited_ = false; @@ -76,10 +80,12 @@ Status InnerSession::Initialize() { DumpProperties dump_properties; dump_properties.InitByOptions(); + GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "Add dump properties failed"); ret = graph_manager_.Initialize(options_); if (ret != SUCCESS) { GELOGE(ret, "[InnerSession:%lu] initialize failed.", session_id_); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); return ret; } @@ -87,6 +93,7 @@ Status InnerSession::Initialize() { if (ret != SUCCESS) { GELOGE(ret, "failed to set malloc size"); (void)graph_manager_.Finalize(); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); return ret; } @@ -97,6 +104,7 @@ Status InnerSession::Initialize() { ret = VarManager::Instance(session_id_)->Init(version, session_id_, DEFAULT_DEVICE_ID, DEFAULT_JOB_ID); if (ret != SUCCESS) { GELOGE(ret, "failed to init session instance"); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); } init_flag_ = true; return SUCCESS; @@ -120,8 +128,11 @@ Status InnerSession::Finalize() { // release var memory GELOGI("VarManager free var memory."); (void)VarManager::Instance(session_id_)->FreeVarMemory(); + // release analyzer saved info(Session Level) + Analyzer::GetInstance()->DestroySessionJsonObject(session_id_); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); + GE_CHK_STATUS_RET(RemoveDumpProperties(), "Remove dump properties failed"); return ret; } @@ -206,7 +217,8 @@ Status InnerSession::RemoveGraph(uint32_t graph_id) { } Status InnerSession::RegisterCallBackFunc( - const std::string &key, const std::function &)> &callback) { + const std::string &key, + const std::function &)> &callback) { std::lock_guard lock(resource_mutex_); if (!init_flag_) { GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); @@ -297,4 +309,27 @@ Status InnerSession::SaveVariables(const Graph &graph, const std::vector &)> &callback); + const std::string &key, + const std::function &)> &callback); const GraphManager &getGraphManagerObj() const; bool IsGraphNeedRebuild(uint32_t graph_id); + Status AddDumpProperties(const DumpProperties &dump_properties); + + Status RemoveDumpProperties(); + private: bool init_flag_; uint64_t session_id_; diff --git a/ge/session/omg.cc b/ge/session/omg.cc old mode 100644 new mode 100755 index bcf42032..e90b4635 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,6 @@ #include #include #include "common/auth/file_saver.h" -#include "common/convert/pb2json.h" #include "common/debug/log.h" #include "common/debug/memory_dumper.h" #include "common/ge/ge_util.h" @@ -45,6 +44,7 @@ #include "omg/parser/parser_factory.h" #include "omg/parser/weights_parser.h" #include "parser/common/pre_checker.h" +#include "parser/common/convert/pb2json.h" #include "proto/ge_ir.pb.h" #include "register/op_registry.h" @@ -75,7 +75,7 @@ const std::set kOmBlackFields = {"output", "data_offset", "data", " "memory_size", "weight_size", "size", "bt", "quantize_factor"}; static std::map output_type_str_to_datatype = { - {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; static bool CheckInputTrueOrFalse(const std::string &s, const std::string &atc_param) { if ((s == "true") || (s == "false")) { @@ -257,6 +257,11 @@ void FindParserSo(const string &path, vector &file_list, string &caffe_p if (real_path.empty()) { // plugin path does not exist return; } + struct stat stat_buf; + if ((stat(real_path.c_str(), &stat_buf) != 0) || (!S_ISDIR(stat_buf.st_mode))) { + GELOGI("The path %s is not a directory.", real_path.c_str()); + return; + } struct dirent *dent(nullptr); DIR *dir = opendir(real_path.c_str()); @@ -272,21 +277,11 @@ void FindParserSo(const string &path, vector &file_list, string &caffe_p string full_name = real_path + "/" + name; const string so_suff = ".so"; const string caffe_parser_so_suff = "lib_caffe_parser.so"; - const string aicpu_so_suff = "_aicpu.so"; - const string aicpu_host_so_suff = "_online.so"; if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) { if (full_name.size() >= caffe_parser_so_suff.size() && full_name.compare(full_name.size() - caffe_parser_so_suff.size(), caffe_parser_so_suff.size(), caffe_parser_so_suff) == 0) { caffe_parser_path = full_name; - } else if ((full_name.size() >= aicpu_so_suff.size() && - full_name.compare(full_name.size() - aicpu_so_suff.size(), aicpu_so_suff.size(), aicpu_so_suff) == - 0) || - (full_name.size() >= aicpu_host_so_suff.size() && - full_name.compare(full_name.size() - aicpu_host_so_suff.size(), aicpu_host_so_suff.size(), - aicpu_host_so_suff) == 0)) { - // aicpu so, Put the file path into the omgcontext and save into the model in the builder stage; - domi::GetContext().aicpu_op_run_paths.push_back(full_name); } else { // save parser so path into file_list vector file_list.push_back(full_name); } @@ -299,29 +294,6 @@ void FindParserSo(const string &path, vector &file_list, string &caffe_p return; } -Status CheckCustomAiCpuOpLib() { - std::vector vec_op_type; - domi::OpRegistry::Instance()->GetOpTypeByImplyType(vec_op_type, domi::ImplyType::CUSTOM); - for (uint32_t i = 0; i < vec_op_type.size(); i++) { - bool aicpu_so_exist = false; - std::string ai_cpu_so_name = "lib" + vec_op_type[i] + "_aicpu.so"; - for (uint32_t j = 0; j < domi::GetContext().aicpu_op_run_paths.size(); j++) { - string bin_file_path = domi::GetContext().aicpu_op_run_paths[j]; - if (bin_file_path.size() >= ai_cpu_so_name.size() && - bin_file_path.compare(bin_file_path.size() - ai_cpu_so_name.size(), ai_cpu_so_name.size(), ai_cpu_so_name) == - 0) { - aicpu_so_exist = true; - break; - } - } - if (!aicpu_so_exist) { - GELOGE(domi::FAILED, "cant find aicpu run so(%s), please check the plugin path!", ai_cpu_so_name.c_str()); - return domi::FAILED; - } - } - return domi::SUCCESS; -} - Status SetOutFormatAndDataTypeAttr(ge::OpDescPtr op_desc, const ge::Format format, const ge::DataType data_type) { if (op_desc == nullptr) { GELOGE(domi::FAILED, "Input op desc invalid."); @@ -447,8 +419,8 @@ Status CheckOutNode(ge::OpDescPtr op_desc, int32_t index) { "out_node [%s] output index:%d must be smaller " "than node output size:%d and can not be negative!", op_desc->GetName().c_str(), index, out_size); - std::string fail_reason = "output index:" + to_string(index) + - " must be smaller than output size:" + to_string(out_size) + " and can not be negative!"; + std::string fail_reason = "output index:" + to_string(index) + " must be smaller than output size:" + + to_string(out_size) + " and can not be negative!"; ErrorManager::GetInstance().ATCReportErrMessage("E10003", {"parameter", "value", "reason"}, {"out_nodes", op_desc->GetName(), fail_reason}); return domi::FAILED; @@ -618,21 +590,31 @@ Status ParseOutNodes(const string &out_nodes) { if (!out_nodes.empty()) { domi::GetContext().out_nodes_map.clear(); domi::GetContext().user_out_nodes.clear(); + domi::GetContext().user_out_nodes_top_vec.clear(); vector nodes_v = StringUtils::Split(out_nodes, ';'); for (const string &node : nodes_v) { vector key_value_v = StringUtils::Split(node, ':'); if (key_value_v.size() != 2) { // The size must be 2. + if (key_value_v.size() == 1 && domi::GetContext().type == domi::CAFFE) { + domi::GetContext().user_out_nodes_top_vec.push_back(node); + continue; + } ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); + "E10001", {"parameter", "value", "reason"}, + {"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); GELOGE(PARAM_INVALID, "The input format of --out_nodes is invalid, the correct format is " "\"node_name1:0;node_name1:1;node_name2:0\", while the actual input is %s.", node.c_str()); return PARAM_INVALID; } - auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); + if (!domi::GetContext().user_out_nodes_top_vec.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {"--out_nodes", out_nodes, "is not all index or top_name"}); + GELOGE(PARAM_INVALID, "This out_nodes str must be all index or top_name, while the actual input is %s", out_nodes.c_str()); + return PARAM_INVALID; + } // stoi: The method may throw an exception: invalid_argument/out_of_range if (!CheckDigitStr(key_value_v[1])) { ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, @@ -640,7 +622,10 @@ Status ParseOutNodes(const string &out_nodes) { GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str()); return PARAM_INVALID; } + + auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); int32_t index = stoi(StringUtils::Trim(key_value_v[1])); + GELOGD("Get output info: node[%s] and index[%ld]", key_value_v[0].c_str(), index); if (iter != domi::GetContext().out_nodes_map.end()) { iter->second.emplace_back(index); } else { @@ -681,16 +666,16 @@ static Status CheckOpNameMap(const ComputeGraphPtr &graph, const std::string &op } std::map &propertiesMap = domi::GetContext().op_conf_map; if (propertiesMap.empty()) { - ErrorManager::GetInstance().ATCReportErrMessage("E10003", {"parameter", "value", "reason"}, - {"op_name_map", op_conf, "the file content is empty"}); + ErrorManager::GetInstance().ATCReportErrMessage( + "E10003", {"parameter", "value", "reason"}, {"op_name_map", op_conf, "the file content is empty"}); GELOGE(PARAM_INVALID, "op_name_map file content is empty, please check file!"); return PARAM_INVALID; } for (auto iter = propertiesMap.begin(); iter != propertiesMap.end(); iter++) { GE_IF_BOOL_EXEC(graphNodeTypes.find(iter->second) == graphNodeTypes.end(), ErrorManager::GetInstance().ATCReportErrMessage( - "E10003", {"parameter", "value", "reason"}, - {"op_name_map", op_conf, "type[" + iter->second + "] is not found in model"}); + "E10003", {"parameter", "value", "reason"}, + {"op_name_map", op_conf, "type[" + iter->second + "] is not found in model"}); GELOGE(PARAM_INVALID, "Invalid parameter for op_name_map."); return PARAM_INVALID;); } return SUCCESS; @@ -820,35 +805,35 @@ void GetGroupName(ge::proto::ModelDef &model_def) { auto modelAttrMap = model_def.mutable_attr(); auto fusionModelOpListIter = modelAttrMap->find(MODEL_ATTR_FUSION_MODEL_DEF); GE_IF_BOOL_EXEC( - fusionModelOpListIter != modelAttrMap->end(), int fusionOpIndex = 0; - for (int i = 0; i < model_def.graph_size(); i++) { - auto graph = model_def.mutable_graph(i); - for (int j = 0; j < graph->op_size(); j++) { - int64_t scope_id = 0; - auto bt = fusionModelOpListIter->second.list().bt(fusionOpIndex++); - ge::proto::OpDef fusion_op_def; - GE_CHK_BOOL_EXEC(bt.size() != 0, GELOGW("Invalid bt size"); return;); - - (void)(fusion_op_def.ParseFromArray(bt.data(), bt.size())); - auto fusion_attr_map = fusion_op_def.mutable_attr(); - auto fusion_iter = fusion_attr_map->find(kScopeIdAttr); - GE_IF_BOOL_EXEC(fusion_iter == fusion_attr_map->end(), continue;); - - scope_id = fusion_iter->second.i(); - ge::proto::OpDef *opdef = graph->mutable_op(j); - auto attr_map = opdef->mutable_attr(); - - int64_t stream_id = opdef->stream_id(); - - uint16_t l1_id = (((uint64_t)scope_id & 0xFFFF0000)) >> 16; - GE_IF_BOOL_EXEC(l1_id != 0, ostringstream groupName; groupName << "group_op_l1_" << l1_id << "_" << stream_id; - (*attr_map)["group_op_name"].set_s(groupName.str()); continue;); - - uint16_t ub_id = ((uint64_t)scope_id & 0xFFFF); - GE_IF_BOOL_EXEC(ub_id != 0, ostringstream groupName; groupName << "group_op_ub_" << ub_id << "_" << stream_id; - (*attr_map)["group_op_name"].set_s(groupName.str());); - } - }); + fusionModelOpListIter != modelAttrMap->end(), int fusionOpIndex = 0; + for (int i = 0; i < model_def.graph_size(); i++) { + auto graph = model_def.mutable_graph(i); + for (int j = 0; j < graph->op_size(); j++) { + int64_t scope_id = 0; + auto bt = fusionModelOpListIter->second.list().bt(fusionOpIndex++); + ge::proto::OpDef fusion_op_def; + GE_CHK_BOOL_EXEC(bt.size() != 0, GELOGW("Invalid bt size"); return;); + + (void)(fusion_op_def.ParseFromArray(bt.data(), bt.size())); + auto fusion_attr_map = fusion_op_def.mutable_attr(); + auto fusion_iter = fusion_attr_map->find(kScopeIdAttr); + GE_IF_BOOL_EXEC(fusion_iter == fusion_attr_map->end(), continue;); + + scope_id = fusion_iter->second.i(); + ge::proto::OpDef *opdef = graph->mutable_op(j); + auto attr_map = opdef->mutable_attr(); + + int64_t stream_id = opdef->stream_id(); + + uint16_t l1_id = (((uint64_t)scope_id & 0xFFFF0000)) >> 16; + GE_IF_BOOL_EXEC(l1_id != 0, ostringstream groupName; groupName << "group_op_l1_" << l1_id << "_" << stream_id; + (*attr_map)["group_op_name"].set_s(groupName.str()); continue;); + + uint16_t ub_id = ((uint64_t)scope_id & 0xFFFF); + GE_IF_BOOL_EXEC(ub_id != 0, ostringstream groupName; groupName << "group_op_ub_" << ub_id << "_" << stream_id; + (*attr_map)["group_op_name"].set_s(groupName.str());); + } + }); } FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, const char *json_file) { @@ -981,8 +966,8 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertFwkModelToJson(const domi::FrameworkType } ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--framework", std::to_string(framework), "only support 0(Caffe) 3(TensorFlow)"}); + "E10001", {"parameter", "value", "reason"}, + {"--framework", std::to_string(framework), "only support 0(Caffe) 3(TensorFlow)"}); GELOGE(PARAM_INVALID, "Input parameter[--framework] is mandatory and it's value must be: 0(Caffe) 3(TensorFlow)."); return PARAM_INVALID; } @@ -1014,13 +999,32 @@ FMK_FUNC_HOST_VISIBILITY Status DumpInfershapeJson(const ge::Graph &graph, const void UpdateOmgCtxWithParserCtx() { domi::GetContext().format = GetParserContext().format; domi::GetContext().input_dims = GetParserContext().input_dims; - return; + domi::GetContext().user_input_dims = GetParserContext().user_input_dims; + domi::GetContext().is_dynamic_input = GetParserContext().is_dynamic_input; + domi::GetContext().type = GetParserContext().type; + domi::GetContext().user_out_nodes = GetParserContext().user_out_nodes; + domi::GetContext().train_flag = GetParserContext().train_flag; + domi::GetContext().run_mode = GetParserContext().run_mode; + domi::GetContext().op_conf_map = GetParserContext().op_conf_map; + domi::GetContext().out_nodes_map = GetParserContext().out_nodes_map; + domi::GetContext().input_nodes_format_map = GetParserContext().input_nodes_format_map; + domi::GetContext().out_top_names = GetParserContext().out_top_names; + domi::GetContext().user_out_nodes_top_vec = GetParserContext().user_out_nodes_top_vec; } void UpdateParserCtxWithOmgCtx() { GetParserContext().format = domi::GetContext().format; GetParserContext().input_dims = domi::GetContext().input_dims; + GetParserContext().user_input_dims = domi::GetContext().user_input_dims; + GetParserContext().is_dynamic_input = domi::GetContext().is_dynamic_input; + GetParserContext().type = domi::GetContext().type; + GetParserContext().user_out_nodes = domi::GetContext().user_out_nodes; + GetParserContext().train_flag = domi::GetContext().train_flag; GetParserContext().run_mode = domi::GetContext().run_mode; - return; + GetParserContext().op_conf_map = domi::GetContext().op_conf_map; + GetParserContext().out_nodes_map = domi::GetContext().out_nodes_map; + GetParserContext().input_nodes_format_map = domi::GetContext().input_nodes_format_map; + GetParserContext().out_top_names = domi::GetContext().out_top_names; + GetParserContext().user_out_nodes_top_vec = domi::GetContext().user_out_nodes_top_vec; } } // namespace ge diff --git a/ge/session/readme.txt b/ge/session/readme.txt new file mode 100644 index 00000000..d8d0f393 --- /dev/null +++ b/ge/session/readme.txt @@ -0,0 +1,3 @@ +GE +SessionManager +InnerSession diff --git a/ge/session/session_manager.cc b/ge/session/session_manager.cc old mode 100644 new mode 100755 index 35d97c31..6f8c9432 --- a/ge/session/session_manager.cc +++ b/ge/session/session_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -227,8 +227,8 @@ Status SessionManager::GetNextSessionId(SessionId &next_session_id) { } Status SessionManager::RegisterCallBackFunc( - SessionId session_id, const std::string &key, - const std::function &)> &callback) { + SessionId session_id, const std::string &key, + const std::function &)> &callback) { if (!init_flag_) { GELOGE(GE_SESSION_MANAGER_NOT_INIT); return GE_SESSION_MANAGER_NOT_INIT; diff --git a/ge/session/session_manager.h b/ge/session/session_manager.h index 1efb47d8..88864f61 100644 --- a/ge/session/session_manager.h +++ b/ge/session/session_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -132,7 +132,8 @@ class SessionManager { /// @param [out] var_values: variable values /// @return Status result of function /// - Status GetVariables(SessionId session_id, const std::vector &var_names, std::vector &var_values); + Status GetVariables(SessionId session_id, const std::vector &var_names, + std::vector &var_values); /// /// @ingroup ge_graph @@ -143,8 +144,8 @@ class SessionManager { /// @return Status result of function /// Status RegisterCallBackFunc( - SessionId session_id, const std::string &key, - const std::function &)> &callback); + SessionId session_id, const std::string &key, + const std::function &)> &callback); bool IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id); diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc old mode 100644 new mode 100755 index a74be1f3..647f1618 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -30,13 +30,14 @@ namespace ge { namespace { const size_t kDataMemAlignSize = 32; -size_t GetAlignedSize(uint32_t size) { +size_t GetAlignedSize(size_t size) { size_t aligned_size = (size + 2 * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; return aligned_size; } } // namespace -SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) {} +SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) { +} FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOp::~SingleOp() { for (auto task : tasks_) { @@ -58,13 +59,11 @@ Status SingleOp::ValidateArgs(const std::vector &inputs, const std:: for (size_t i = 0; i < num_inputs; ++i) { // preventing from read out of bound size_t aligned_size = GetAlignedSize(inputs[i].length); - GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%lu", i, aligned_size, inputs[i].length, - input_sizes_[i]); + GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%lu", + i, aligned_size, inputs[i].length, input_sizes_[i]); if (aligned_size < input_sizes_[i]) { - GELOGE(PARAM_INVALID, - "Input size mismatch. index = %zu, model expect %zu," - " but given %zu(after align)", - i, input_sizes_[i], aligned_size); + GELOGE(PARAM_INVALID, "Input size mismatch. index = %zu, model expect %zu," + " but given %zu(after align)", i, input_sizes_[i], aligned_size); return PARAM_INVALID; } } @@ -78,13 +77,11 @@ Status SingleOp::ValidateArgs(const std::vector &inputs, const std:: for (size_t i = 0; i < num_outputs; ++i) { // preventing from write out of bound size_t aligned_size = GetAlignedSize(outputs[i].length); - GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%lu", i, aligned_size, outputs[i].length, - output_sizes_[i]); + GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%lu", + i, aligned_size, outputs[i].length, output_sizes_[i]); if (aligned_size < output_sizes_[i]) { - GELOGE(PARAM_INVALID, - "Output size mismatch. index = %zu, model expect %zu," - "but given %zu(after align)", - i, output_sizes_[i], aligned_size); + GELOGE(PARAM_INVALID, "Output size mismatch. index = %zu, model expect %zu," + "but given %zu(after align)", i, output_sizes_[i], aligned_size); return PARAM_INVALID; } } @@ -129,8 +126,12 @@ Status SingleOp::UpdateArgs(const std::vector &inputs, const std::ve GELOGD("Update aicpu_TF task args"); auto *dst_io_addr = const_cast(reinterpret_cast(task->GetIOAddr())); GE_CHECK_NOTNULL(dst_io_addr); - auto rt_ret = rtMemcpyAsync(dst_io_addr, sizeof(uint64_t) * args_.size(), &args_[0], - sizeof(uint64_t) * args_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, stream_); + auto rt_ret = rtMemcpyAsync(dst_io_addr, + sizeof(uint64_t) * args_.size(), + &args_[0], + sizeof(uint64_t) * args_.size(), + RT_MEMCPY_HOST_TO_DEVICE_EX, + stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtMemcpyAsync addresses failed, ret = %d", rt_ret); return RT_FAILED; @@ -179,29 +180,40 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c return ret; } -void SingleOp::SetStream(rtStream_t stream) { stream_ = stream; } +void SingleOp::SetStream(rtStream_t stream) { + stream_ = stream; +} -void SingleOp::SetSessionID(uint64_t session_id) { aicpu_session_id_ = session_id; } +void SingleOp::SetSessionID(uint64_t session_id) { + aicpu_session_id_ = session_id; +} DynamicSingleOp::DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex, rtStream_t stream) - : resource_id_(resource_id), stream_mutex_(stream_mutex), stream_(stream) {} + : resource_id_(resource_id), stream_mutex_(stream_mutex), stream_(stream) { +} DynamicSingleOp::~DynamicSingleOp() { GELOGI("DynamicSingleOp destory sessionId = %lu", aicpu_session_id_); ModelManager::GetInstance()->DestroyAicpuSession(aicpu_session_id_); } -Status DynamicSingleOp::ValidateParams(const vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs) const { +Status DynamicSingleOp::ValidateParams(const vector &input_desc, + const std::vector &inputs, + std::vector &output_desc, + std::vector &outputs) const { if (inputs.size() != input_desc.size()) { - GELOGE(PARAM_INVALID, "Input number mismatches input desc number. Input num = %zu, input desc num = %zu", - inputs.size(), input_desc.size()); + GELOGE(PARAM_INVALID, + "Input number mismatches input desc number. Input num = %zu, input desc num = %zu", + inputs.size(), + input_desc.size()); return PARAM_INVALID; } if (outputs.size() != output_desc.size()) { - GELOGE(PARAM_INVALID, "Output number mismatches output desc number. Output num = %zu, output desc num = %zu", - outputs.size(), output_desc.size()); + GELOGE(PARAM_INVALID, + "Output number mismatches output desc number. Output num = %zu, output desc num = %zu", + outputs.size(), + output_desc.size()); return PARAM_INVALID; } @@ -251,8 +263,10 @@ Status DynamicSingleOp::AllocateWorkspaces(const std::vector &workspace return SUCCESS; } -Status DynamicSingleOp::ExecuteTbeTask(const vector &input_desc, const vector &inputs, - vector &output_desc, vector &outputs) { +Status DynamicSingleOp::ExecuteTbeTask(const vector &input_desc, + const vector &inputs, + vector &output_desc, + vector &outputs) { GE_CHK_STATUS_RET_NOLOG(op_task_->UpdateRunInfo(input_desc, output_desc)); std::vector workspace_buffers; @@ -261,8 +275,10 @@ Status DynamicSingleOp::ExecuteTbeTask(const vector &input_desc, c return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_); } -Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, const vector &input_buffers, - vector &output_desc, vector &output_buffers) { +Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, + const vector &input_buffers, + vector &output_desc, + vector &output_buffers) { GE_CHECK_NOTNULL(op_task_); GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); std::lock_guard lk(*stream_mutex_); @@ -281,11 +297,14 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, con } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { return op_task_->LaunchKernel(input_desc, inputs, output_desc, outputs, stream_); } else { - GELOGE(UNSUPPORTED, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", + GELOGE(UNSUPPORTED, + "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", op_task_->GetOpTaskType()); return UNSUPPORTED; } } -void DynamicSingleOp::SetSessionID(uint64_t session_id) { aicpu_session_id_ = session_id; } +void DynamicSingleOp::SetSessionID(uint64_t session_id) { + aicpu_session_id_ = session_id; +} } // namespace ge diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h old mode 100644 new mode 100755 index 0ca4afef..bd671017 --- a/ge/single_op/single_op.h +++ b/ge/single_op/single_op.h @@ -62,19 +62,26 @@ class DynamicSingleOp { public: DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex_, rtStream_t stream); ~DynamicSingleOp(); - Status ExecuteAsync(const vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs); + Status ExecuteAsync(const vector &input_desc, + const std::vector &inputs, + std::vector &output_desc, + std::vector &outputs); void SetSessionID(uint64_t session_id); private: friend class SingleOpModel; - Status ValidateParams(const vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs) const; + Status ValidateParams(const vector &input_desc, + const std::vector &inputs, + std::vector &output_desc, + std::vector &outputs) const; - Status AllocateWorkspaces(const std::vector &workspace_sizes, std::vector &workspaces); + Status AllocateWorkspaces(const std::vector &workspace_sizes, + std::vector &workspaces); - Status ExecuteTbeTask(const vector &input_desc, const vector &inputs, - vector &output_desc, vector &outputs); + Status ExecuteTbeTask(const vector &input_desc, + const vector &inputs, + vector &output_desc, + vector &outputs); std::unique_ptr op_task_; uintptr_t resource_id_ = 0; diff --git a/ge/single_op/single_op_manager.cc b/ge/single_op/single_op_manager.cc index 709b238f..56bbdef6 100644 --- a/ge/single_op/single_op_manager.cc +++ b/ge/single_op/single_op_manager.cc @@ -96,7 +96,9 @@ StreamResource *SingleOpManager::TryGetResource(uintptr_t resource_id) { return it->second; } -Status SingleOpManager::GetDynamicOpFromModel(const string &model_name, const ModelData &model_data, void *stream, +Status SingleOpManager::GetDynamicOpFromModel(const string &model_name, + const ModelData &model_data, + void *stream, DynamicSingleOp **single_op) { if (!tiling_func_registered_) { RegisterTilingFunc(); diff --git a/ge/single_op/single_op_manager.h b/ge/single_op/single_op_manager.h index 09ae0e4e..e6d10980 100644 --- a/ge/single_op/single_op_manager.h +++ b/ge/single_op/single_op_manager.h @@ -34,10 +34,14 @@ class SingleOpManager { return instance; } - Status GetOpFromModel(const std::string &model_name, const ge::ModelData &model_data, void *stream, + Status GetOpFromModel(const std::string &model_name, + const ge::ModelData &model_data, + void *stream, SingleOp **single_op); - Status GetDynamicOpFromModel(const std::string &model_name, const ge::ModelData &model_data, void *stream, + Status GetDynamicOpFromModel(const std::string &model_name, + const ge::ModelData &model_data, + void *stream, DynamicSingleOp **dynamic_single_op); StreamResource *GetResource(uintptr_t resource_id, rtStream_t stream); diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc old mode 100644 new mode 100755 index fb676008..4892b7a1 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -108,8 +108,11 @@ Status SingleOpModel::InitModelMem(StreamResource &res) { auto weight_buffer = model_helper_.GetGeModel()->GetWeight(); GELOGI("To copy weight to device. weight size = %zu", weight_buffer.GetSize()); - GE_CHK_RT_RET(rtMemcpy(model_params_.weight_base, model_params_.weight_size, weight_buffer.GetData(), - weight_buffer.GetSize(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(model_params_.weight_base, + model_params_.weight_size, + weight_buffer.GetData(), + weight_buffer.GetSize(), + RT_MEMCPY_HOST_TO_DEVICE)); } return SUCCESS; @@ -323,8 +326,8 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa return SUCCESS; } -Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, bool dynamic_flag, - bool &depend_compute_flag, uint64_t session_id) { +Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, + bool dynamic_flag, bool& depend_compute_flag, uint64_t session_id) { auto iter = op_list_.find(kernel_def.op_index()); if (iter == op_list_.end()) { GELOGE(INTERNAL_ERROR, "op desc not found. op index = %u", kernel_def.op_index()); @@ -426,8 +429,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { bool depend_compute_flag = false; uint64_t dynamic_singleop_sessionid = aicpu_sessionid++; GELOGI("Build dynamic singleOp, sessionId = %lu", dynamic_singleop_sessionid); - GE_CHK_STATUS_RET_NOLOG( - BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true, depend_compute_flag, dynamic_singleop_sessionid)); + GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true, + depend_compute_flag, dynamic_singleop_sessionid)); if (depend_compute_flag) { if (i >= tasks.size() - 1) { GELOGE(FAILED, "The copy task of the fourth operator was not found."); diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h old mode 100644 new mode 100755 index 09b90050..2e6b37dc --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -45,7 +45,9 @@ struct SingleOpModelParam { class SingleOpModel { public: - SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size); + SingleOpModel(const std::string &model_name, + const void *model_data, + uint32_t model_size); ~SingleOpModel() = default; Status Init(); @@ -66,8 +68,8 @@ class SingleOpModel { Status BuildTaskList(SingleOp &single_op); Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); - Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, bool dynamic_flag, - bool &depend_compute_flag, uint64_t session_id); + Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, + bool dynamic_flag, bool& depend_compute_flag, uint64_t session_id); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task); Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); diff --git a/ge/single_op/stream_resource.cc b/ge/single_op/stream_resource.cc old mode 100644 new mode 100755 index c2b93974..388f447e --- a/ge/single_op/stream_resource.cc +++ b/ge/single_op/stream_resource.cc @@ -22,7 +22,8 @@ #include "single_op/single_op_model.h" namespace ge { -StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) {} +StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) { +} StreamResource::~StreamResource() { for (auto mem : memory_list_) { @@ -60,9 +61,13 @@ DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) { return it->second.get(); } -void StreamResource::SetStream(rtStream_t stream) { stream_ = stream; } +void StreamResource::SetStream(rtStream_t stream) { + stream_ = stream; +} -uint8_t *StreamResource::DoMallocMemory(const std::string &purpose, size_t size, size_t &max_allocated, +uint8_t *StreamResource::DoMallocMemory(const std::string &purpose, + size_t size, + size_t &max_allocated, std::vector &allocated) { if (size <= max_allocated && !allocated.empty()) { GELOGD("reuse last memory"); @@ -111,7 +116,8 @@ uint8_t *StreamResource::MallocWeight(const std::string &purpose, size_t size) { return buffer; } -Status StreamResource::BuildDynamicOperator(const string &model_name, const ModelData &model_data, +Status StreamResource::BuildDynamicOperator(const string &model_name, + const ModelData &model_data, DynamicSingleOp **single_op) { std::lock_guard lk(mu_); auto it = dynamic_op_map_.find(model_data.model_data); @@ -127,8 +133,7 @@ Status StreamResource::BuildDynamicOperator(const string &model_name, const Mode return ret; } - auto new_op = - std::unique_ptr(new (std::nothrow) DynamicSingleOp(resource_id_, &stream_mu_, stream_)); + auto new_op = std::unique_ptr(new(std::nothrow) DynamicSingleOp(resource_id_, &stream_mu_, stream_)); GE_CHECK_NOTNULL(new_op); GELOGI("To build operator: %s", model_name.c_str()); @@ -153,7 +158,7 @@ Status StreamResource::BuildOperator(const string &model_name, const ModelData & return ret; } - auto new_op = std::unique_ptr(new (std::nothrow) SingleOp(&stream_mu_, stream_)); + auto new_op = std::unique_ptr(new(std::nothrow) SingleOp(&stream_mu_, stream_)); if (new_op == nullptr) { GELOGE(MEMALLOC_FAILED, "new SingleOp failed"); return MEMALLOC_FAILED; diff --git a/ge/single_op/stream_resource.h b/ge/single_op/stream_resource.h old mode 100644 new mode 100755 index 3c0dd03f..39f08ebe --- a/ge/single_op/stream_resource.h +++ b/ge/single_op/stream_resource.h @@ -49,7 +49,9 @@ class StreamResource { uint8_t *MallocWeight(const std::string &purpose, size_t size); private: - uint8_t *DoMallocMemory(const std::string &purpose, size_t size, size_t &max_allocated, + uint8_t *DoMallocMemory(const std::string &purpose, + size_t size, + size_t &max_allocated, std::vector &allocated); uintptr_t resource_id_; diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc old mode 100644 new mode 100755 index cc334f41..fc7a9f97 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -27,7 +27,7 @@ Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task) { return RT_FAILED; } std::unique_ptr aicpu_args; - aicpu_args.reset(new (std::nothrow) uint8_t[aicpu_arg_size]()); + aicpu_args.reset(new(std::nothrow) uint8_t[aicpu_arg_size]()); if (aicpu_args == nullptr) { GELOGE(RT_FAILED, "malloc failed, size = %zu", aicpu_arg_size); return RT_FAILED; @@ -62,8 +62,8 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { auto &kernel_ext_info = kernel_def_.kernel_ext_info(); auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, - "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", kernel_ext_info.size(), - kernel_ext_info_size); + "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", + kernel_ext_info.size(), kernel_ext_info_size); ret = task.SetExtInfoAndType(kernel_ext_info); if (ret != SUCCESS) { diff --git a/ge/single_op/task/aicpu_kernel_task_builder.h b/ge/single_op/task/aicpu_kernel_task_builder.h old mode 100644 new mode 100755 diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc old mode 100644 new mode 100755 index 9ad52d81..a70ae91d --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #include "single_op/task/aicpu_task_builder.h" #include @@ -23,145 +23,146 @@ #include "graph/load/new_model_manager/model_manager.h" namespace ge { -AiCpuTaskBuilder::AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def) - : op_desc_(op_desc), kernel_def_(kernel_def) {} - -Status AiCpuTaskBuilder::SetInputOutputAddr(void **io_addr, const std::vector &addresses) { - size_t arg_size = kernel_def_.args_size(); - auto rt_ret = rtMalloc(io_addr, arg_size, RT_MEMORY_HBM); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtMalloc failed, size = %zu, ret = %d", arg_size, rt_ret); - return RT_FAILED; - } + AiCpuTaskBuilder::AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def) + : op_desc_(op_desc), kernel_def_(kernel_def) {} + + Status AiCpuTaskBuilder::SetInputOutputAddr(void **io_addr, const std::vector &addresses) { + size_t arg_size = kernel_def_.args_size(); + auto rt_ret = rtMalloc(io_addr, arg_size, RT_MEMORY_HBM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtMalloc failed, size = %zu, ret = %d", arg_size, rt_ret); + return RT_FAILED; + } + + const void *src_addr = reinterpret_cast(addresses.data()); + uint64_t src_len = sizeof(void *) * addresses.size(); + rt_ret = rtMemcpy(*io_addr, arg_size, src_addr, src_len, RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + (void)rtFree(*io_addr); + GELOGE(RT_FAILED, "rtMemcpy addresses failed, ret = %d", rt_ret); + return RT_FAILED; + } - const void *src_addr = reinterpret_cast(addresses.data()); - uint64_t src_len = sizeof(void *) * addresses.size(); - rt_ret = rtMemcpy(*io_addr, arg_size, src_addr, src_len, RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - (void)rtFree(*io_addr); - GELOGE(RT_FAILED, "rtMemcpy addresses failed, ret = %d", rt_ret); - return RT_FAILED; + return SUCCESS; } - return SUCCESS; -} + Status AiCpuTaskBuilder::SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &fwk_op_kernel) { + auto sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), + kernel_def_.args().data(), kernel_def_.args().size()); + if (sec_ret != EOK) { + GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); + return FAILED; + } -Status AiCpuTaskBuilder::SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &fwk_op_kernel) { - auto sec_ret = - memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_def_.args().data(), kernel_def_.args().size()); - if (sec_ret != EOK) { - GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); - return FAILED; + auto io_addr_val = static_cast(reinterpret_cast(io_addr)); + fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = io_addr_val; + auto ws_addr_val = static_cast(reinterpret_cast(ws_addr)); + fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = ws_addr_val; + return SUCCESS; } - auto io_addr_val = static_cast(reinterpret_cast(io_addr)); - fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = io_addr_val; - auto ws_addr_val = static_cast(reinterpret_cast(ws_addr)); - fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = ws_addr_val; - return SUCCESS; -} - -Status AiCpuTaskBuilder::SetKernelArgs(void **args, STR_FWK_OP_KERNEL &fwk_op_kernel) { - void *fwk_op_args = nullptr; - auto rt_ret = rtMalloc(&fwk_op_args, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "malloc arg memory failed, ret = %d", rt_ret); - return RT_FAILED; - } + Status AiCpuTaskBuilder::SetKernelArgs(void **args, STR_FWK_OP_KERNEL &fwk_op_kernel) { + void *fwk_op_args = nullptr; + auto rt_ret = rtMalloc(&fwk_op_args, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "malloc arg memory failed, ret = %d", rt_ret); + return RT_FAILED; + } - rt_ret = rtMemcpy(fwk_op_args, sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), - RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - (void)rtFree(fwk_op_args); - GELOGE(RT_FAILED, "copy args failed, ret = %d", rt_ret); - return RT_FAILED; - } - *args = fwk_op_args; - return SUCCESS; -} - -Status AiCpuTaskBuilder::InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, const SingleOpModelParam ¶m, - bool dynamic_flag) { - if (kernel_def_.args_size() > sizeof(STR_FWK_OP_KERNEL)) { - GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), - kernel_def_.args_size()); - return PARAM_INVALID; + rt_ret = rtMemcpy(fwk_op_args, sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, + sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + (void)rtFree(fwk_op_args); + GELOGE(RT_FAILED, "copy args failed, ret = %d", rt_ret); + return RT_FAILED; + } + *args = fwk_op_args; + return SUCCESS; } - auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param); - auto ws_addr_vec = addresses.at(BuildTaskUtils::kAddressIndexWorkspace); - - if (dynamic_flag) { - GE_CHK_RT_RET(rtMalloc(kernel_workspace, kernel_def_.task_info_size(), RT_MEMORY_HBM)); - } else { - if (ws_addr_vec.empty()) { - GELOGE(PARAM_INVALID, "workspace Data Address is empty."); + + Status AiCpuTaskBuilder::InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, + const SingleOpModelParam ¶m, bool dynamic_flag) { + if (kernel_def_.args_size() > sizeof(STR_FWK_OP_KERNEL)) { + GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", + sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size()); return PARAM_INVALID; } - *kernel_workspace = ws_addr_vec[0]; - } - GE_CHK_RT_RET(rtMemcpy(*kernel_workspace, kernel_def_.task_info_size(), kernel_def_.task_info().data(), - kernel_def_.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); + auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param); + auto ws_addr_vec = addresses.at(BuildTaskUtils::kAddressIndexWorkspace); + + if (dynamic_flag) { + GE_CHK_RT_RET(rtMalloc(kernel_workspace, kernel_def_.task_info_size(), RT_MEMORY_HBM)); + } else { + if (ws_addr_vec.empty()) { + GELOGE(PARAM_INVALID, "workspace Data Address is empty."); + return PARAM_INVALID; + } + *kernel_workspace = ws_addr_vec[0]; + } + GE_CHK_RT_RET(rtMemcpy(*kernel_workspace, kernel_def_.task_info_size(), + kernel_def_.task_info().data(), kernel_def_.task_info_size(), + RT_MEMCPY_HOST_TO_DEVICE)); - auto ret = SetInputOutputAddr(io_addr, BuildTaskUtils::JoinAddresses(addresses)); - if (ret != SUCCESS) { - return ret; - } - return SUCCESS; -} - -Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, - uint64_t session_id) { - void *io_addr = nullptr; - void *kernel_workspace = nullptr; - GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(&io_addr, &kernel_workspace, param, dynamic_flag)); - - STR_FWK_OP_KERNEL fwk_op_kernel = {0}; - auto ret = SetFmkOpKernel(io_addr, kernel_workspace, fwk_op_kernel); - if (ret != SUCCESS) { - (void)rtFree(io_addr); - return ret; + auto ret = SetInputOutputAddr(io_addr, BuildTaskUtils::JoinAddresses(addresses)); + if (ret != SUCCESS) { + return ret; + } + return SUCCESS; } - task.op_desc_ = op_desc_; - task.num_inputs_ = op_desc_->GetInputsSize(); - task.num_outputs_ = op_desc_->GetOutputsSize(); - - // get kernel_ext_info - auto &kernel_ext_info = kernel_def_.kernel_ext_info(); - auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); - GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, - "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", kernel_ext_info.size(), - kernel_ext_info_size); - GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info), "Init ext info failed."); - - if (task.ext_info_addr_dev_ != nullptr) { - fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast(task.ext_info_addr_dev_); - fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = kernel_ext_info_size; - } - GE_CHK_STATUS_RET(task.InitForSummaryAndCopy(), "AiCpuTask init for summary and copy task failed."); - - // Create session - fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id; - GELOGI("Begin to CreateAicpuSession, session id: %lu", session_id); - GE_CHECK_NOTNULL(ModelManager::GetInstance()); - GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS, - GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id); - return FAILED;) - ret = SetKernelArgs(&task.args_, fwk_op_kernel); - if (ret != SUCCESS) { - (void)rtFree(io_addr); - return ret; - } + Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, + bool dynamic_flag, uint64_t session_id) { + void *io_addr = nullptr; + void *kernel_workspace = nullptr; + GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(&io_addr, &kernel_workspace, param, dynamic_flag)); + + STR_FWK_OP_KERNEL fwk_op_kernel = {0}; + auto ret = SetFmkOpKernel(io_addr, kernel_workspace, fwk_op_kernel); + if (ret != SUCCESS) { + (void)rtFree(io_addr); + return ret; + } - task.arg_size_ = sizeof(STR_FWK_OP_KERNEL); - task.op_type_ = op_desc_->GetName(); - task.io_addr_ = io_addr; - task.task_info_ = kernel_def_.task_info(); - task.workspace_addr_ = kernel_workspace; - task.dynamic_flag_ = dynamic_flag; - - auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); - GELOGI("[TASK_INFO] %s %s", task.task_info_.c_str(), debug_info.c_str()); - return SUCCESS; -} + task.op_desc_ = op_desc_; + task.num_inputs_ = op_desc_->GetInputsSize(); + task.num_outputs_ = op_desc_->GetOutputsSize(); + + // get kernel_ext_info + auto &kernel_ext_info = kernel_def_.kernel_ext_info(); + auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); + GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, + "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", + kernel_ext_info.size(), kernel_ext_info_size); + GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info), "Init ext info failed."); + + if (task.ext_info_addr_dev_ != nullptr) { + fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast(task.ext_info_addr_dev_); + fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = kernel_ext_info_size; + } + GE_CHK_STATUS_RET(task.InitForSummaryAndCopy(), "AiCpuTask init for summary and copy task failed."); + + // Create session + fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id; + GELOGI("Begin to CreateAicpuSession, session id: %lu", session_id); + GE_CHECK_NOTNULL(ModelManager::GetInstance()); + GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS, + GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id); + return FAILED;) + ret = SetKernelArgs(&task.args_, fwk_op_kernel); + if (ret != SUCCESS) { + (void)rtFree(io_addr); + return ret; + } + + task.arg_size_ = sizeof(STR_FWK_OP_KERNEL); + task.op_type_ = op_desc_->GetName(); + task.io_addr_ = io_addr; + task.task_info_ = kernel_def_.task_info(); + task.workspace_addr_ = kernel_workspace; + task.dynamic_flag_ = dynamic_flag; + + auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); + GELOGI("[TASK_INFO] %s %s", task.task_info_.c_str(), debug_info.c_str()); + return SUCCESS; + } } // namespace ge diff --git a/ge/single_op/task/aicpu_task_builder.h b/ge/single_op/task/aicpu_task_builder.h old mode 100644 new mode 100755 index 76ccb161..6dcd7a0f --- a/ge/single_op/task/aicpu_task_builder.h +++ b/ge/single_op/task/aicpu_task_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef GE_SINGLE_OP_TASK_AICPU_TASK_BUILDER_H_ #define GE_SINGLE_OP_TASK_AICPU_TASK_BUILDER_H_ @@ -24,23 +24,23 @@ #include "cce/aicpu_engine_struct.h" namespace ge { -class AiCpuTaskBuilder { - public: - AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def); - ~AiCpuTaskBuilder() = default; + class AiCpuTaskBuilder { + public: + AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def); + ~AiCpuTaskBuilder() = default; - Status BuildTask(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, uint64_t session_id); + Status BuildTask(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, uint64_t session_id); - private: - static Status SetKernelArgs(void **args, STR_FWK_OP_KERNEL &kernel); - Status SetInputOutputAddr(void **io_addr, const std::vector &addresses); - Status SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &kernel); - Status InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, const SingleOpModelParam ¶m, - bool dynamic_flag); + private: + static Status SetKernelArgs(void **args, STR_FWK_OP_KERNEL &kernel); + Status SetInputOutputAddr(void **io_addr, const std::vector &addresses); + Status SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &kernel); + Status InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, + const SingleOpModelParam ¶m, bool dynamic_flag); - const OpDescPtr op_desc_; - const domi::KernelExDef &kernel_def_; -}; + const OpDescPtr op_desc_; + const domi::KernelExDef &kernel_def_; + }; } // namespace ge #endif // GE_SINGLE_OP_TASK_AICPU_TASK_BUILDER_H_ \ No newline at end of file diff --git a/ge/single_op/task/build_task_utils.cc b/ge/single_op/task/build_task_utils.cc index 9e97ee57..28177dc7 100644 --- a/ge/single_op/task/build_task_utils.cc +++ b/ge/single_op/task/build_task_utils.cc @@ -29,7 +29,7 @@ const uint64_t kSessionId = UINT64_MAX; uint8_t *kVarBase = nullptr; const uint64_t kLogicVarBase = 0; const uint64_t kVarSize = 0; -} // namespace +} std::vector> BuildTaskUtils::GetAddresses(const OpDescPtr &op_desc, const SingleOpModelParam ¶m) { @@ -60,7 +60,8 @@ std::vector BuildTaskUtils::JoinAddresses(const std::vector BuildTaskUtils::GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m) { +std::vector BuildTaskUtils::GetKernelArgs(const OpDescPtr &op_desc, + const SingleOpModelParam ¶m) { auto addresses = GetAddresses(op_desc, param); return JoinAddresses(addresses); } @@ -75,8 +76,11 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { // Conv2D IN[DT_FLOAT16 NC1HWC0[256, 128, 7, 7, 16],DT_FLOAT16 FRACTAL_Z[128, 32, 16, 16]] // OUT[DT_FLOAT16 NC1HWC0[256, 32, 7, 7, 16]] ss << op_type << " IN["; - for (uint32_t idx = 0; idx < op_desc->GetInputsSize(); idx++) { + for (uint32_t idx = 0; idx < op_desc->GetAllInputsSize(); idx++) { const GeTensorDescPtr &input = op_desc->MutableInputDesc(idx); + if (input == nullptr) { + continue; + } ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; ss << TypeUtils::FormatToSerialString(input->GetFormat()); ss << VectorToString(input->GetShape().GetDims()); diff --git a/ge/single_op/task/build_task_utils.h b/ge/single_op/task/build_task_utils.h index f5885fd2..cddc7a2b 100644 --- a/ge/single_op/task/build_task_utils.h +++ b/ge/single_op/task/build_task_utils.h @@ -33,8 +33,9 @@ class BuildTaskUtils { static std::vector JoinAddresses(const std::vector> &addresses); static std::vector GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); static std::string GetTaskInfo(const OpDescPtr &op_desc); - template - static std::string VectorToString(const std::vector &values) { + template + static std::string VectorToString(const std::vector &values) + { std::stringstream ss; ss << '['; auto size = values.size(); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc old mode 100644 new mode 100755 index 0c489aa4..f778f189 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -34,7 +34,7 @@ constexpr int kLaunchRetryTimes = 1000; constexpr int kSleepTime = 10; constexpr uint64_t kReleaseFlag = 1; constexpr int kCopyNum = 2; -} // namespace +} Status OpTask::OpenDump(const std::vector &io_addr, rtStream_t stream) { if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) { @@ -235,12 +235,14 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info) { } int32_t unknown_shape_type_val = 0; - (void)AttrUtils::GetInt(op_desc_, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); + (void) AttrUtils::GetInt(op_desc_, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); GELOGD("Get unknown_type is %d.", unknown_shape_type_val); unknown_type_ = static_cast(unknown_shape_type_val); - aicpu_ext_handle_.reset( - new (std::nothrow)::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), num_inputs_, num_outputs_, unknown_type_)); + aicpu_ext_handle_.reset(new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), + num_inputs_, + num_outputs_, + unknown_type_)); GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!"); Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); @@ -250,12 +252,12 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info) { } GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, kernel_ext_info.size(), RT_MEMORY_HBM)); - GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, kernel_ext_info.size(), kernel_ext_info.data(), kernel_ext_info.size(), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, kernel_ext_info.size(), + kernel_ext_info.data(), kernel_ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } -Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, +Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, std::vector &output_desc) { GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); if (num_inputs_ == 0 && num_outputs_ == 0) { @@ -279,8 +281,9 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, } GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, - aicpu_ext_handle_->GetExtInfoLen(), // check size - aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), + aicpu_ext_handle_->GetExtInfoLen(), // check size + aicpu_ext_handle_->GetExtInfo(), + aicpu_ext_handle_->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE)); GELOGI("Update ext info end."); @@ -294,15 +297,18 @@ Status AiCpuBaseTask::UpdateOutputShape(vector &output_desc) { } GELOGD("Start to update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape."); - GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), ext_info_addr_dev_, - aicpu_ext_handle_->GetExtInfoLen(), RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), + aicpu_ext_handle_->GetExtInfoLen(), + ext_info_addr_dev_, + aicpu_ext_handle_->GetExtInfoLen(), + RT_MEMCPY_DEVICE_TO_HOST)); for (size_t i = 0; i < num_outputs_; ++i) { GeShape shape; DataType data_type; aicpu_ext_handle_->GetOutputShapeAndType(i, shape, data_type); - GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), "AiCpuCCTask Update [%zu]th output shape failed.", - i); + GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), + "AiCpuCCTask Update [%zu]th output shape failed.", i); } GELOGD("Update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape finished."); return SUCCESS; @@ -323,15 +329,16 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor std::vector origin_dims_new; - auto trans_ret = - formats::TransShape(format, shape_new.GetDims(), output_desc.GetDataType(), origin_format, origin_dims_new); - GE_CHK_STATUS_RET(trans_ret, "AiCpuTask originFormat[%d] is not same as format[%d], but TransShape failed, shape=%s.", + auto trans_ret = formats::TransShape(format, shape_new.GetDims(), + output_desc.GetDataType(), origin_format, origin_dims_new); + GE_CHK_STATUS_RET(trans_ret, + "AiCpuTask originFormat[%d] is not same as format[%d], but TransShape failed, shape=%s.", origin_format, format, shape_new.ToString().c_str()); auto origin_shape_new = GeShape(origin_dims_new); output_desc.SetOriginShape(origin_shape_new); - GELOGD("AiCpuTask originFormat[%d] is not same as format[%d], need update from %s ro %s.", origin_format, format, - origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); + GELOGD("AiCpuTask originFormat[%d] is not same as format[%d], need update from %s ro %s.", + origin_format, format, origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); return SUCCESS; } @@ -405,7 +412,8 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { return SUCCESS; } -Status AiCpuTask::PrepareCopyInputs(vector &outputs, const std::vector &out_shape_hbm) { +Status AiCpuTask::PrepareCopyInputs(vector &outputs, + const std::vector &out_shape_hbm) { std::vector copy_input_release_flag; std::vector copy_input_data_size; std::vector copy_input_src; @@ -413,8 +421,9 @@ Status AiCpuTask::PrepareCopyInputs(vector &outputs, const std::vector &outputs, const std::vector &out_sha for (size_t i = 0; i < num_outputs_; ++i) { auto &result_summary = output_summary_host_[i]; - GE_CHK_RT_RET(rtMemcpy(&result_summary, sizeof(aicpu::FWKAdapter::ResultSummary), output_summary_[i], - sizeof(aicpu::FWKAdapter::ResultSummary), RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(&result_summary, sizeof(aicpu::FWKAdapter::ResultSummary), + output_summary_[i], sizeof(aicpu::FWKAdapter::ResultSummary), + RT_MEMCPY_DEVICE_TO_HOST)); auto shape_data_size = result_summary.shape_data_size; void *shape_buffer = nullptr; GE_MAKE_GUARD_RTMEM(shape_buffer); @@ -456,25 +466,29 @@ Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector &out_sha return SUCCESS; } -Status AiCpuTask::CopyDataToHbm(vector &outputs, const std::vector &out_shape_hbm, rtStream_t stream) { +Status AiCpuTask::CopyDataToHbm(vector &outputs, + const std::vector &out_shape_hbm, + rtStream_t stream) { GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs, out_shape_hbm)); - GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, stream)); + GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), + RT_KERNEL_DEFAULT, stream)); GE_CHK_RT_RET(rtStreamSynchronize(stream)); return SUCCESS; } -Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc, const std::vector &out_shape_hbm) { +Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc, + const std::vector &out_shape_hbm) { for (size_t i = 0; i < num_outputs_; ++i) { const auto &result_summary = output_summary_host_[i]; std::vector shape_dims; const auto &shape_hbm = out_shape_hbm[i]; uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); - std::unique_ptr shape_addr(new (std::nothrow) int64_t[dim_num]()); + std::unique_ptr shape_addr(new(std::nothrow) int64_t[dim_num]()); GE_CHECK_NOTNULL(shape_addr); - GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, result_summary.shape_data_size, - RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, + shape_hbm, result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { shape_dims.emplace_back(shape_addr[dim_idx]); @@ -487,8 +501,8 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc, cons return SUCCESS; } -Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output_desc, vector &outputs, - rtStream_t stream) { +Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output_desc, + vector &outputs, rtStream_t stream) { if (num_outputs_ == 0) { GELOGI("Output num is 0, there is no need to update the output and size."); return SUCCESS; @@ -500,9 +514,11 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(out_shape_hbm), "Read ResultSummary and update output shape failed."); - GE_CHK_STATUS_RET(CopyDataToHbm(outputs, out_shape_hbm, stream), "Copy data to output failed."); + GE_CHK_STATUS_RET(CopyDataToHbm(outputs, out_shape_hbm, stream), + "Copy data to output failed."); - GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc, out_shape_hbm), "Update shape by hbm buffer failed."); + GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc, out_shape_hbm), + "Update shape by hbm buffer failed."); GELOGI("Update shape and data by result summary end."); return SUCCESS; @@ -532,8 +548,11 @@ Status AiCpuTask::SetIO(const vector &inputs, vector &outputs) { if (!io_addrs.empty()) { auto *dst_io_addr = const_cast(reinterpret_cast(io_addr_)); - GE_CHK_RT_RET(rtMemcpy(dst_io_addr, sizeof(uint64_t) * io_addrs.size(), &io_addrs[0], - sizeof(uint64_t) * io_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(dst_io_addr, + sizeof(uint64_t) * io_addrs.size(), + &io_addrs[0], + sizeof(uint64_t) * io_addrs.size(), + RT_MEMCPY_HOST_TO_DEVICE)); GE_CHECK_NOTNULL(dst_io_addr); }; return SUCCESS; @@ -571,23 +590,24 @@ Status AiCpuTask::InitForSummaryAndCopy() { GE_CHK_RT_RET(rtMalloc(©_ioaddr_dev_, copy_io_addr_size, RT_MEMORY_HBM)); - GE_CHK_RT_RET( - rtMemcpy(copy_ioaddr_dev_, copy_io_addr_size, copy_io_addr.data(), copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_, copy_io_addr_size, + copy_io_addr.data(), copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { - GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), - kernel_def.args_size()); + GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", + sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); return PARAM_INVALID; } GE_CHK_RT_RET(rtMalloc(©_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM)); - GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(), kernel_def.task_info().data(), - kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(), + kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); STR_FWK_OP_KERNEL aicpu_task = {0}; - auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), kernel_def.args().data(), kernel_def.args().size()); + auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), + kernel_def.args().data(), kernel_def.args().size()); if (sec_ret != EOK) { GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); return FAILED; @@ -598,13 +618,15 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; - GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), &aicpu_task, sizeof(STR_FWK_OP_KERNEL), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), + &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } -Status AiCpuTask::LaunchKernel(const std::vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs, +Status AiCpuTask::LaunchKernel(const std::vector &input_desc, + const std::vector &inputs, + std::vector &output_desc, + std::vector &outputs, rtStream_t stream) { GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc)); GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs)); @@ -639,7 +661,8 @@ const void *AiCpuCCTask::GetArgs() const { return args_.get(); } size_t AiCpuCCTask::GetArgSize() const { return arg_size_; } -AiCpuCCTask::~AiCpuCCTask() {} +AiCpuCCTask::~AiCpuCCTask() { +} Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { GELOGI("To invoke rtCpuKernelLaunch. block_dim = %u, so_name is %s, kernel_name is %s", block_dim_, so_name_.data(), @@ -647,8 +670,8 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { // sm_desc is nullptr, because l2 buffer does not support auto *sm_desc = reinterpret_cast(sm_desc_); auto ret = - rtCpuKernelLaunch(static_cast(so_name_.data()), static_cast(kernel_name_.data()), - block_dim_, args_.get(), static_cast(arg_size_), sm_desc, stream); + rtCpuKernelLaunch(static_cast(so_name_.data()), static_cast(kernel_name_.data()), + block_dim_, args_.get(), static_cast(arg_size_), sm_desc, stream); if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); return RT_FAILED; @@ -658,11 +681,14 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { return SUCCESS; } -Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs, +Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, + const std::vector &inputs, + std::vector &output_desc, + std::vector &outputs, rtStream_t stream) { GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, - "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", unknown_type_); + "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", + unknown_type_); GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc)); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index b6ea9114..e541426b 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -44,27 +44,32 @@ class OpTask { OpTask() = default; virtual ~OpTask() = default; virtual Status LaunchKernel(rtStream_t stream) = 0; - virtual Status UpdateRunInfo(const vector &input_desc, const vector &output_desc) { + virtual Status UpdateRunInfo(const vector &input_desc, + const vector &output_desc) { return UNSUPPORTED; } - virtual Status LaunchKernel(const std::vector &inputs, const std::vector &outputs, - const std::vector &workspaces, rtStream_t stream) { + virtual Status LaunchKernel(const std::vector &inputs, + const std::vector &outputs, + const std::vector &workspaces, + rtStream_t stream) { return UNSUPPORTED; } virtual OpTaskType GetOpTaskType() = 0; virtual const void *GetIOAddr() const = 0; const vector &GetWorkspaceSizes() const; void SetWorkspaceSizes(const vector &workspace_sizes); - const OpDescPtr &GetOpdesc() const { return op_desc_; } + const OpDescPtr &GetOpdesc() const {return op_desc_;} Status OpenDump(const std::vector &io_addr, rtStream_t stream); - virtual Status LaunchKernel(const std::vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs, rtStream_t stream) { + virtual Status LaunchKernel(const std::vector &input_desc, + const std::vector &inputs, + std::vector &output_desc, + std::vector &outputs, + rtStream_t stream) { return UNSUPPORTED; } private: std::vector workspace_sizes_; - protected: DumpProperties dump_properties_; DumpOp dump_op_; @@ -75,15 +80,22 @@ class TbeOpTask : public OpTask { public: ~TbeOpTask() override; Status LaunchKernel(rtStream_t stream) override; - OpTaskType GetOpTaskType() override { return OP_TASK_TBE; } - const void *GetIOAddr() const override { return nullptr; } + OpTaskType GetOpTaskType() override { + return OP_TASK_TBE; + } + const void *GetIOAddr() const override { + return nullptr; + } void SetSmDesc(void *sm_desc); void SetStubFunc(const std::string &name, const void *stub_func); void SetKernelArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); - Status UpdateRunInfo(const vector &input_desc, const vector &output_desc) override; + Status UpdateRunInfo(const vector &input_desc, + const vector &output_desc) override; - Status LaunchKernel(const vector &inputs, const vector &outputs, const vector &workspaces, + Status LaunchKernel(const vector &inputs, + const vector &outputs, + const vector &workspaces, rtStream_t stream) override; const void *GetArgs() const; @@ -93,7 +105,8 @@ class TbeOpTask : public OpTask { private: static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); - Status UpdateNodeByShape(const vector &input_desc, const vector &output_desc); + Status UpdateNodeByShape(const vector &input_desc, + const vector &output_desc); const void *stub_func_ = nullptr; std::unique_ptr args_; @@ -117,7 +130,8 @@ class AiCpuBaseTask : public OpTask { protected: Status SetExtInfoAndType(const std::string &kernel_ext_info); - Status UpdateExtInfo(const std::vector &input_desc, std::vector &output_desc); + Status UpdateExtInfo(const std::vector &input_desc, + std::vector &output_desc); Status UpdateOutputShape(vector &output_desc); Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); @@ -135,11 +149,16 @@ class AiCpuTask : public AiCpuBaseTask { ~AiCpuTask() override; Status LaunchKernel(rtStream_t stream) override; - OpTaskType GetOpTaskType() override { return OP_TASK_AICPU; } + OpTaskType GetOpTaskType() override { + return OP_TASK_AICPU; + } const void *GetIOAddr() const override; - Status LaunchKernel(const std::vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs, rtStream_t stream) override; + Status LaunchKernel(const std::vector &input_desc, + const std::vector &inputs, + std::vector &output_desc, + std::vector &outputs, + rtStream_t stream) override; Status SetMemCopyTask(const domi::KernelExDef &kernel_def); private: @@ -147,14 +166,17 @@ class AiCpuTask : public AiCpuBaseTask { // for copy task. Status InitForSummaryAndCopy(); - Status UpdateShapeAndDataByResultSummary(vector &output_desc, vector &outputs, + Status UpdateShapeAndDataByResultSummary(vector &output_desc, + vector &outputs, rtStream_t stream); Status ReadResultSummaryAndPrepareMemory(std::vector &out_shape_hbm); Status CopyDataToHbm(vector &outputs, const std::vector &out_shape_hbm, rtStream_t stream); - Status PrepareCopyInputs(vector &outputs, const std::vector &out_shape_hbm); + Status PrepareCopyInputs(vector &outputs, + const std::vector &out_shape_hbm); - Status UpdateShapeByHbmBuffer(vector &output_desc, const std::vector &out_shape_hbm); + Status UpdateShapeByHbmBuffer(vector &output_desc, + const std::vector &out_shape_hbm); friend class AiCpuTaskBuilder; void *workspace_addr_ = nullptr; @@ -197,10 +219,13 @@ class AiCpuCCTask : public AiCpuBaseTask { void SetIoAddr(void *io_addr); size_t GetArgSize() const; - Status LaunchKernel(const std::vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs, rtStream_t stream) override; + Status LaunchKernel(const std::vector &input_desc, + const std::vector &inputs, + std::vector &output_desc, + std::vector &outputs, + rtStream_t stream) override; - private: +private: friend class AiCpuCCTaskBuilder; std::string so_name_; std::string kernel_name_; diff --git a/ge/single_op/task/tbe_task_builder.h b/ge/single_op/task/tbe_task_builder.h old mode 100644 new mode 100755 diff --git a/ge/stub/Makefile b/ge/stub/Makefile new file mode 100644 index 00000000..820fc70d --- /dev/null +++ b/ge/stub/Makefile @@ -0,0 +1,6 @@ +inc_path := $(shell pwd)/inc/external/ +out_path := $(shell pwd)/out/ge/lib64/stub/ +stub_path := $(shell pwd)/framework/domi/stub/ + +mkdir_stub := $(shell mkdir -p $(out_path)) +local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path)) diff --git a/ge/stub/README b/ge/stub/README new file mode 100644 index 00000000..ca98ce85 --- /dev/null +++ b/ge/stub/README @@ -0,0 +1,4 @@ +################################################################################### +the directory (stub) saves the stub file +gen_stubapi.py is using for retrieving API and generating stub functions +################################################################################### diff --git a/ge/stub/README.md b/ge/stub/README.md new file mode 100755 index 00000000..a085e537 --- /dev/null +++ b/ge/stub/README.md @@ -0,0 +1,44 @@ +# "stub" usage: + +## Description + +- File libge_compiler.so ,libgraph.so are used in IR build application interface. + +# Attention + +- Don't link other library except libge_compiler.so ,libgraph.so, as they may be changed in the future. + +# Usage + +## Compile: compile the application invoking the IR build API. + +Makefile: + +''' + +ATC_INCLUDE_DIR := $(ASCEND_PATH)/atc/include +OPP_INCLUDE_DIR := $(ASCEND_PATH)/opp/op_proto/built-in/inc +LOCAL_MODULE_NAME := ir_build +CC := g++ +CFLAGS := -std=c++11 -g -Wall +SRCS := $(wildcard $(LOCAL_DIR)/main.cpp) +INCLUDES := -I $(ASCEND_OPP_PATH)/op_proto/built-in/inc \ + -I $(ATC_INCLUDE_DIR)/graph \ + -I $(ATC_INCLUDE_DIR)/ge \ + +LIBS := -L ${ASCEND_PATH}/atc/lib64/stub \ + -lgraph \ + -lge_compiler +ir_build: + mkdir -p out + $(CC) $(SRCS) $(INCLUDES) $(LIBS) $(CFLAGS) -o ./out/$(LOCAL_MODULE_NAME) +clean: + rm -rf out + +''' +make + +## Run the application after set the LD_LIBRARY_PATH to include the real path of the library which locates in the directory of atc/lib64 + +export LD_LIBRARY_PATH= $(ASCEND_PATH)/atc/lib64 + - ./ ir_build diff --git a/ge/stub/gen_stubapi.py b/ge/stub/gen_stubapi.py new file mode 100644 index 00000000..0c5e712b --- /dev/null +++ b/ge/stub/gen_stubapi.py @@ -0,0 +1,578 @@ +import os +import re +import sys +import logging + +logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s', + level=logging.INFO) + +""" + this attr is used for symbol table visible +""" +GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' + +""" + generate stub func body by return type +""" +RETURN_STATEMENTS = { + 'graphStatus': ' std::cout << "[ERROR]: stub library libgraph or libge_compiler cannot be used for execution, please check your "\n ' + ' << "environment variables and compilation options to make sure you use the correct library."\n' + ' << std::endl;\n' + ' return ACL_ERROR_COMPILING_STUB_MODE;', + 'Status': ' return SUCCESS;', + 'Graph': ' return Graph();', + 'Graph&': ' return *this;', + 'Format': ' return Format();', + 'Format&': ' return *this;', + 'Shape': ' return Shape();', + 'Shape&': ' return *this;', + 'TensorDesc': ' return TensorDesc();', + 'TensorDesc&': ' return *this;', + 'Tensor': ' return Tensor();', + 'Tensor&': ' return *this;', + 'Operator': ' return Operator();', + 'Operator&': ' return *this;', + 'Ptr': ' return nullptr;', + 'std::string': ' return "";', + 'std::string&': ' return "";', + 'string': ' return "";', + 'int': ' return 0;', + 'DataType': ' return DT_FLOAT;', + 'InferenceContextPtr': ' return nullptr;', + 'SubgraphBuilder': ' return nullptr;', + 'OperatorImplPtr': ' return nullptr;', + 'OutHandler': ' return nullptr;', + 'std::vector': ' return {};', + 'std::vector': ' return {};', + 'std::map': ' return {};', + 'uint32_t': ' return 0;', + 'int64_t': ' return 0;', + 'uint64_t': ' return 0;', + 'size_t': ' return 0;', + 'float': ' return 0.0f;', + 'bool': ' return false;', +} + +""" + max code len per line in hua_wei software programming specifications +""" +max_code_len_per_line = 100 + +""" + white_list_for_debug, include_dir_key_words is to + determines which header files to generate cc files from + when DEBUG on +""" +white_list_for_debug = ["attr_value.h", "operator.h", "tensor.h", "graph.h", "operator_factory.h", + "ge_ir_build.h", "ge_api.h", "ge_prof.h", "tensorflow_parser.h", "caffe_parser.h"] +include_dir_key_words = ["ge", "graph", "parser"] +DEBUG = True + + +def need_generate_func(func_line): + """ + :param func_line: + :return: + """ + if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \ + or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"): + return False + return True + + +def file_endswith_white_list_suffix(file): + """ + :param file: + :return: + """ + if DEBUG: + for suffix in white_list_for_debug: + if file.endswith(suffix): + return True + return False + else: + return True + + +""" + belows are patterns used for analyse .h file +""" +# pattern function +pattern_func = re.compile(r"""(^[\s]*) #leading with space,we will find and delete after +([a-zA-Z~_] # void int likely +.* +[)] #we find ) +(?!.*{) # we do not want the case int abc() const { return 1;} +.*) +(;.*) #we want to find ; and after for we will replace these later +\n$ +""", re.VERBOSE | re.MULTILINE | re.DOTALL) + +# pattern comment +pattern_comment = re.compile(r'^\s*//') +pattern_comment_2_start = re.compile(r'^\s*/[*]') +pattern_comment_2_end = re.compile(r'[*]/\s*$') +# pattern define +pattern_define = re.compile(r'^\s*#define') +pattern_define_return = re.compile(r'\\\s*$') +# blank line +pattern_blank_line = re.compile(r'^\s*$') +# virtual,explicit,friend,static +pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)') +# lead space +pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]') +# functions will have patterns such as func ( or func( +# but operator is an exception; the class name is preceded by an operator, and the above mode does not exist +# format like :"operator = ()" +pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]') +# template +pattern_template = re.compile(r'^\s*template') +pattern_template_end = re.compile(r'>\s*$') +# namespace +pattern_namespace = re.compile(r'namespace.*{') +# class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with +pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+ 0 and not friend_match: + line, func_name = self.handle_class_member_func(line, template_string) + # Normal functions + else: + line, func_name = self.handle_normal_func(line, template_string) + + need_generate = need_generate_func(line) + # func body + line += self.implement_function(line) + # comment + line = self.gen_comment(start_i) + line + # write to out file + self.write_func_content(line, func_name, need_generate) + # next loop + self.line_index += 1 + + logging.info('Added %s functions', len(self.func_list_exist)) + logging.info('Successfully converted,please see ' + self.output_file) + + def handle_func1(self, line): + """ + :param line: + :return: + """ + find1 = re.search('[(]', line) + if not find1: + self.line_index += 1 + return "continue", line, None + find2 = re.search('[)]', line) + start_i = self.line_index + space_match = pattern_leading_space.search(line) + # deal with + # int abc(int a, + # int b) + if find1 and (not find2): + self.line_index += 1 + line2 = self.input_content[self.line_index] + if space_match: + line2 = re.sub('^' + space_match.group(1), '', line2) + line += line2 + while self.line_index < len(self.input_content) and (not re.search('[)]', line2)): + self.line_index += 1 + line2 = self.input_content[self.line_index] + line2 = re.sub('^' + space_match.group(1), '', line2) + line += line2 + + match_start = pattern_start.search(self.input_content[self.line_index]) + match_end = pattern_end.search(self.input_content[self.line_index]) + if match_start: # like ) { or ) {} int the last line + if not match_end: + self.stack.append('normal_now') + ii = start_i + while ii <= self.line_index: + ii += 1 + self.line_index += 1 + return "continue", line, start_i + logging.info("line[%s]", line) + # ' int abc();'->'int abc()' + (line, match) = pattern_func.subn(r'\2\n', line) + logging.info("line[%s]", line) + # deal with case: + # 'int \n abc(int a, int b)' + if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]): + line = self.input_content[start_i - 1] + line + line = line.lstrip() + if not match: + self.line_index += 1 + return "continue", line, start_i + return "pass", line, start_i + + def handle_stack(self, match_start): + """ + :param match_start: + :return: + """ + line = self.input_content[self.line_index] + match_end = pattern_end.search(line) + if match_start: + self.stack.append('normal_now') + if match_end: + top_status = self.stack.pop() + if top_status == 'namespace_now': + self.output_fd.write(line + '\n') + elif top_status == 'class_now': + self.stack_class.pop() + self.stack_template.pop() + if match_start or match_end: + self.line_index += 1 + return "continue" + + if len(self.stack) > 0 and self.stack[-1] == 'normal_now': + self.line_index += 1 + return "continue" + return "pass" + + def handle_class(self, template_string, line, match_start, match_class): + """ + :param template_string: + :param line: + :param match_start: + :param match_class: + :return: + """ + if match_class: # we face a class + self.stack_template.append(template_string) + self.stack.append('class_now') + class_name = match_class.group(3) + + # class template specializations: class A > + if '<' in class_name: + k = line.index('<') + fit = 1 + for ii in range(k + 1, len(line)): + if line[ii] == '<': + fit += 1 + if line[ii] == '>': + fit -= 1 + if fit == 0: + break + class_name += line[k + 1:ii + 1] + logging.info('class_name[%s]', class_name) + self.stack_class.append(class_name) + while not match_start: + self.line_index += 1 + line = self.input_content[self.line_index] + match_start = pattern_start.search(line) + self.line_index += 1 + return "continue" + return "pass" + + def handle_template(self): + line = self.input_content[self.line_index] + match_template = pattern_template.search(line) + template_string = '' + if match_template: + match_template_end = pattern_template_end.search(line) + template_string = line + while not match_template_end: + self.line_index += 1 + line = self.input_content[self.line_index] + template_string += line + match_template_end = pattern_template_end.search(line) + self.line_index += 1 + return template_string + + def handle_namespace(self): + line = self.input_content[self.line_index] + match_namespace = pattern_namespace.search(line) + if match_namespace: # we face namespace + self.output_fd.write(line + '\n') + self.stack.append('namespace_now') + self.line_index += 1 + + def handle_normal_func(self, line, template_string): + template_line = '' + self.stack_template.append(template_string) + if self.stack_template[-1] != '': + template_line = re.sub(r'\s*template', 'template', self.stack_template[-1]) + # change '< class T = a, class U = A(3)>' to '' + template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) + template_line = re.sub(r'\s*=.*,', ',', template_line) + template_line = re.sub(r'\s*=.*', '', template_line) + line = re.sub(r'\s*=.*,', ',', line) + line = re.sub(r'\s*=.*\)', ')', line) + line = template_line + line + self.stack_template.pop() + func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() + logging.info("line[%s]", line) + logging.info("func_name[%s]", func_name) + return line, func_name + + def handle_class_member_func(self, line, template_string): + template_line = '' + x = '' + if template_string != '': + template_string = re.sub(r'\s*template', 'template', template_string) + template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string) + template_string = re.sub(r'\s*=.*,', ',', template_string) + template_string = re.sub(r'\s*=.*', '', template_string) + if self.stack_template[-1] != '': + if not (re.search(r'<\s*>', stack_template[-1])): + template_line = re.sub(r'^\s*template', 'template', stack_template[-1]) + if not (re.search(r'<.*>', self.stack_class[-1])): + # for x we get like template -> + x = re.sub(r'template\s*<', '<', template_line) # remove template -> + x = re.sub(r'\n', '', x) + x = re.sub(r'\s*=.*,', ',', x) + x = re.sub(r'\s*=.*\>', '>', x) + x = x.rstrip() # remove \n + x = re.sub(r'(class|typename)\s+|(|\s*class)', '', + x) # remove class,typename -> + x = re.sub(r'<\s+', '<', x) + x = re.sub(r'\s+>', '>', x) + x = re.sub(r'\s+,', ',', x) + x = re.sub(r',\s+', ', ', x) + line = re.sub(r'\s*=\s+0', '', line) + line = re.sub(r'\s*=\s+.*,', ',', line) + line = re.sub(r'\s*=\s+.*\)', ')', line) + logging.info("x[%s]\nline[%s]", x, line) + # if the function is long, void ABC::foo() + # breaks into two lines void ABC::\n foo() + temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1) + if len(temp_line) > max_code_len_per_line: + line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1) + else: + line = temp_line + logging.info("line[%s]", line) + # add template as the above if there is one + template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) + template_line = re.sub(r'\s*=.*,', ',', template_line) + template_line = re.sub(r'\s*=.*', '', template_line) + line = template_line + template_string + line + func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() + logging.info("line[%s]", line) + logging.info("func_name[%s]", func_name) + return line, func_name + + def write_func_content(self, content, func_name, need_generate): + if not (func_name in self.func_list_exist) and need_generate: + self.output_fd.write(content) + self.func_list_exist.append(func_name) + logging.info('add func:[%s]', func_name) + + def gen_comment(self, start_i): + comment_line = '' + # Function comments are on top of function declarations, copy them over + k = start_i - 1 # one line before this func start + if pattern_template.search(self.input_content[k]): + k -= 1 + if pattern_comment_2_end.search(self.input_content[k]): + comment_line = self.input_content[k].lstrip() + while not pattern_comment_2_start.search(self.input_content[k]): + k -= 1 + comment_line = self.input_content[k].lstrip() + comment_line + else: + for j in range(k, 0, -1): + c_line = self.input_content[j] + if pattern_comment.search(c_line): + c_line = re.sub(r'\s*//', '//', c_line) + comment_line = c_line + comment_line + else: + break + return comment_line + + @staticmethod + def implement_function(func): + function_def = '' + function_def += '{\n' + + all_items = func.split() + start = 0 + return_type = all_items[start] + if return_type == "const": + start += 1 + return_type = all_items[start] + if return_type.startswith(('std::map', 'std::set', 'std::vector')): + return_type = "std::map" + if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')): + return_type = "Ptr" + if len(all_items) > start + 1 and all_items[start + 1].startswith('&'): + return_type += "&" + if RETURN_STATEMENTS.__contains__(return_type): + function_def += RETURN_STATEMENTS[return_type] + else: + logging.warning("Unhandled return type[%s]", return_type) + + function_def += '\n' + function_def += '}\n' + function_def += '\n' + return function_def + + +def collect_header_files(path): + """ + :param path: + :return: + """ + header_files = [] + shared_includes_content = [] + for root, dirs, files in os.walk(path): + files.sort() + for file in files: + if file.find("git") >= 0: + continue + if not file.endswith('.h'): + continue + file_path = os.path.join(root, file) + file_path = file_path.replace('\\', '/') + header_files.append(file_path) + include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:]) + shared_includes_content.append(include_str) + # for acl error code + shared_includes_content.append('#include \n') + shared_includes_content.append('const int ACL_ERROR_COMPILING_STUB_MODE = 100039;\n') + return header_files, shared_includes_content + + +def generate_stub_file(inc_dir, out_cc_dir): + """ + :param inc_dir: + :param out_cc_dir: + :return: + """ + target_header_files, shared_includes_content = collect_header_files(inc_dir) + for header_file in target_header_files: + if not file_endswith_white_list_suffix(header_file): + continue + cc_file = re.sub('.h*$', '.cc', header_file) + h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content) + h_2_cc.h2cc() + + +def gen_code(inc_dir, out_cc_dir): + """ + :param inc_dir: + :param out_cc_dir: + :return: + """ + if not inc_dir.endswith('/'): + inc_dir += '/' + if not out_cc_dir.endswith('/'): + out_cc_dir += '/' + for include_dir_key_word in include_dir_key_words: + generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir) + + +if __name__ == '__main__': + inc_dir = sys.argv[1] + out_cc_dir = sys.argv[2] + gen_code(inc_dir, out_cc_dir)