| @@ -72,9 +72,9 @@ if (ENABLE_OPEN_SRC) | |||||
| endif() | endif() | ||||
| set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH}) | set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH}) | ||||
| set(STATIC_ACL_LIB ${GE_LIB_PATH}) | set(STATIC_ACL_LIB ${GE_LIB_PATH}) | ||||
| find_module(slog libslog.so ${GE_LIB_PATH}) | |||||
| find_module(slog libalog.so ${GE_LIB_PATH}) | |||||
| find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) | find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) | ||||
| find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH}) | |||||
| find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH}) | |||||
| find_module(hccl libhccl.so ${GE_LIB_PATH}) | find_module(hccl libhccl.so ${GE_LIB_PATH}) | ||||
| find_module(adump_server libadump_server.a ${GE_LIB_PATH}) | find_module(adump_server libadump_server.a ${GE_LIB_PATH}) | ||||
| find_module(runtime libruntime.so ${GE_LIB_PATH}) | find_module(runtime libruntime.so ${GE_LIB_PATH}) | ||||
| @@ -83,12 +83,12 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) | find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) | ||||
| find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) | find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) | ||||
| find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) | find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) | ||||
| find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH}) | |||||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH}) | |||||
| #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | ||||
| elseif(ENABLE_GE_COV OR ENABLE_GE_UT) | elseif(ENABLE_GE_COV OR ENABLE_GE_UT) | ||||
| add_subdirectory(tests) | add_subdirectory(tests) | ||||
| else() | else() | ||||
| find_module(slog libslog.so ${ASCEND_ATC_DIR}) | |||||
| find_module(slog libalog.so ${ASCEND_ATC_DIR}) | |||||
| find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | ||||
| if(PLATFORM STREQUAL "train") | if(PLATFORM STREQUAL "train") | ||||
| @@ -97,7 +97,7 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) | find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | ||||
| if(PRODUCT STREQUAL "flr3") | if(PRODUCT STREQUAL "flr3") | ||||
| message(FATAL_ERROR "This platform is not supported in train mode, build terminated") | message(FATAL_ERROR "This platform is not supported in train mode, build terminated") | ||||
| @@ -109,7 +109,7 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(resource libresource.so ${ASCEND_ATC_DIR}) | find_module(resource libresource.so ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | ||||
| find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | |||||
| find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | ||||
| if(PRODUCT STREQUAL "flr3") | if(PRODUCT STREQUAL "flr3") | ||||
| elseif(PRODUCT STREQUAL "flr1") | elseif(PRODUCT STREQUAL "flr1") | ||||
| @@ -120,7 +120,7 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | ||||
| endif() | endif() | ||||
| elseif(PLATFORM STREQUAL "all") | elseif(PLATFORM STREQUAL "all") | ||||
| find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | |||||
| find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||||
| find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | ||||
| find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | ||||
| @@ -128,7 +128,7 @@ if (ENABLE_OPEN_SRC) | |||||
| find_module(resource libresource.so ${ASCEND_ATC_DIR}) | find_module(resource libresource.so ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | ||||
| find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | ||||
| find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | ||||
| #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | ||||
| else() | else() | ||||
| @@ -154,7 +154,7 @@ elseif (ENABLE_D OR ENABLE_ACL) | |||||
| include(cmake/intf_pub_linux.cmake) | include(cmake/intf_pub_linux.cmake) | ||||
| # common libraries | # common libraries | ||||
| find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH}) | |||||
| find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH}) | |||||
| find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | ||||
| find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | ||||
| @@ -174,7 +174,7 @@ elseif(ENABLE_MS_TESTCASES) | |||||
| include(cmake/intf_pub_linux.cmake) | include(cmake/intf_pub_linux.cmake) | ||||
| # common libraries | # common libraries | ||||
| find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH}) | |||||
| find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH}) | |||||
| find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | ||||
| find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | ||||
| @@ -16,6 +16,7 @@ target_compile_definitions(intf_pub INTERFACE | |||||
| $<$<CONFIG:Debug>:CFG_BUILD_DEBUG> | $<$<CONFIG:Debug>:CFG_BUILD_DEBUG> | ||||
| WIN64=1 | WIN64=1 | ||||
| LINUX=0 | LINUX=0 | ||||
| LOG_CPP | |||||
| ) | ) | ||||
| target_link_options(intf_pub INTERFACE | target_link_options(intf_pub INTERFACE | ||||
| -Wl,-z,relro | -Wl,-z,relro | ||||
| @@ -1,7 +1,6 @@ | |||||
| if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | ||||
| add_subdirectory(common) | add_subdirectory(common) | ||||
| add_subdirectory(plugin/engine) | add_subdirectory(plugin/engine) | ||||
| add_subdirectory(graph/build/memory) | |||||
| add_subdirectory(ge_local_engine) | add_subdirectory(ge_local_engine) | ||||
| add_subdirectory(host_cpu_engine) | add_subdirectory(host_cpu_engine) | ||||
| add_subdirectory(executor) | add_subdirectory(executor) | ||||
| @@ -125,7 +124,7 @@ set(TRAIN_SRC_LIST | |||||
| "graph/manager/graph_var_manager.cc" | "graph/manager/graph_var_manager.cc" | ||||
| "graph/manager/host_mem_manager.cc" | "graph/manager/host_mem_manager.cc" | ||||
| "graph/manager/rdma_pool_allocator.cc" | "graph/manager/rdma_pool_allocator.cc" | ||||
| $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc> | |||||
| "graph/manager/host_mem_allocator.cc" | |||||
| "graph/manager/memory_api.cc" | "graph/manager/memory_api.cc" | ||||
| "graph/manager/model_manager/event_manager.cc" | "graph/manager/model_manager/event_manager.cc" | ||||
| "graph/manager/trans_var_data_utils.cc" | "graph/manager/trans_var_data_utils.cc" | ||||
| @@ -167,7 +166,7 @@ set(TRAIN_SRC_LIST | |||||
| "graph/passes/hccl_group_pass.cc" | "graph/passes/hccl_group_pass.cc" | ||||
| "graph/passes/enter_pass.cc" | "graph/passes/enter_pass.cc" | ||||
| "graph/passes/assign_remove_pass.cc" | "graph/passes/assign_remove_pass.cc" | ||||
| $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc> | |||||
| "graph/passes/inplace_support_check_pass.cc" | |||||
| "graph/passes/flow_ctrl_pass.cc" | "graph/passes/flow_ctrl_pass.cc" | ||||
| "graph/passes/global_step_insert_pass.cc" | "graph/passes/global_step_insert_pass.cc" | ||||
| "host_kernels/transpose_kernel.cc" | "host_kernels/transpose_kernel.cc" | ||||
| @@ -342,6 +341,13 @@ set(TRAIN_SRC_LIST | |||||
| "analyzer/analyzer.cc" | "analyzer/analyzer.cc" | ||||
| "ir_build/ge_ir_build.cc" | "ir_build/ge_ir_build.cc" | ||||
| "ir_build/atc_ir_common.cc" | "ir_build/atc_ir_common.cc" | ||||
| "graph/build/memory/memory_assigner.cc" | |||||
| "graph/build/memory/graph_mem_assigner.cc" | |||||
| "graph/build/memory/binary_block_mem_assigner.cc" | |||||
| "graph/build/memory/block_mem_assigner.cc" | |||||
| "graph/build/memory/hybrid_mem_assigner.cc" | |||||
| "graph/build/memory/max_block_mem_assigner.cc" | |||||
| "graph/build/memory/var_mem_assign_util.cc" | |||||
| ) | ) | ||||
| set(INFER_SRC_LIST | set(INFER_SRC_LIST | ||||
| @@ -403,7 +409,7 @@ set(INFER_SRC_LIST | |||||
| "graph/manager/graph_var_manager.cc" | "graph/manager/graph_var_manager.cc" | ||||
| "graph/manager/host_mem_manager.cc" | "graph/manager/host_mem_manager.cc" | ||||
| "graph/manager/rdma_pool_allocator.cc" | "graph/manager/rdma_pool_allocator.cc" | ||||
| $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc> | |||||
| "graph/manager/host_mem_allocator.cc" | |||||
| "graph/manager/graph_mem_allocator.cc" | "graph/manager/graph_mem_allocator.cc" | ||||
| "graph/manager/graph_caching_allocator.cc" | "graph/manager/graph_caching_allocator.cc" | ||||
| "model/ge_model.cc" | "model/ge_model.cc" | ||||
| @@ -525,7 +531,7 @@ set(INFER_SRC_LIST | |||||
| "graph/passes/for_pass.cc" | "graph/passes/for_pass.cc" | ||||
| "graph/passes/enter_pass.cc" | "graph/passes/enter_pass.cc" | ||||
| "graph/passes/assign_remove_pass.cc" | "graph/passes/assign_remove_pass.cc" | ||||
| $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc> | |||||
| "graph/passes/inplace_support_check_pass.cc" | |||||
| "graph/passes/addn_pass.cc" | "graph/passes/addn_pass.cc" | ||||
| "graph/passes/common_subexpression_elimination_pass.cc" | "graph/passes/common_subexpression_elimination_pass.cc" | ||||
| "graph/passes/remove_same_const_pass.cc" | "graph/passes/remove_same_const_pass.cc" | ||||
| @@ -611,11 +617,35 @@ set(INFER_SRC_LIST | |||||
| "graph/label/while_label_maker.cc" | "graph/label/while_label_maker.cc" | ||||
| "graph/label/partitioned_call_label_maker.cc" | "graph/label/partitioned_call_label_maker.cc" | ||||
| "analyzer/analyzer.cc" | "analyzer/analyzer.cc" | ||||
| "graph/build/memory/memory_assigner.cc" | |||||
| "graph/build/memory/graph_mem_assigner.cc" | |||||
| "graph/build/memory/binary_block_mem_assigner.cc" | |||||
| "graph/build/memory/block_mem_assigner.cc" | |||||
| "graph/build/memory/hybrid_mem_assigner.cc" | |||||
| "graph/build/memory/max_block_mem_assigner.cc" | |||||
| "graph/build/memory/var_mem_assign_util.cc" | |||||
| ) | ) | ||||
| if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | ||||
| ############ libge_runner.so ############ | ############ libge_runner.so ############ | ||||
| add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) | |||||
| add_library(ge_runner SHARED | |||||
| ${TRAIN_SRC_LIST} | |||||
| ${PROTO_SRCS} | |||||
| ${PROTO_CLIENT_SRCS} | |||||
| $<TARGET_OBJECTS:$<IF:$<TARGET_EXISTS:msprofiler_fwk>,msprofiler_fwk,msprofiler_fwk_object>> | |||||
| ) | |||||
| add_library(msprofiler_fwk_object OBJECT IMPORTED GLOBAL) | |||||
| if (msprofiler_fwk_ext_LIBRARY_DIR) | |||||
| file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object) | |||||
| execute_process( | |||||
| COMMAND ar x ${msprofiler_fwk_ext_LIBRARY_DIR} | |||||
| WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object | |||||
| ) | |||||
| file(GLOB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o) | |||||
| set_property(TARGET msprofiler_fwk_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST}) | |||||
| endif() | |||||
| target_compile_definitions(ge_runner PRIVATE | target_compile_definitions(ge_runner PRIVATE | ||||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
| @@ -624,7 +654,6 @@ target_compile_definitions(ge_runner PRIVATE | |||||
| FMK_SUPPORT_DUMP | FMK_SUPPORT_DUMP | ||||
| DAVINCI_CLOUD | DAVINCI_CLOUD | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_runner PRIVATE | target_compile_options(ge_runner PRIVATE | ||||
| @@ -660,12 +689,8 @@ target_include_directories(ge_runner PRIVATE | |||||
| target_link_libraries(ge_runner PRIVATE | target_link_libraries(ge_runner PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| ge_memory | |||||
| adump_server | adump_server | ||||
| static_mmpa | static_mmpa | ||||
| -Wl,--whole-archive | |||||
| msprofiler_fwk | |||||
| -Wl,--no-whole-archive | |||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| graph | graph | ||||
| ge_common | ge_common | ||||
| @@ -692,7 +717,6 @@ target_compile_definitions(ge_compiler PRIVATE | |||||
| FMK_HOST_INFER | FMK_HOST_INFER | ||||
| COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_compiler PRIVATE | target_compile_options(ge_compiler PRIVATE | ||||
| @@ -728,7 +752,6 @@ target_include_directories(ge_compiler PRIVATE | |||||
| target_link_libraries(ge_compiler PRIVATE | target_link_libraries(ge_compiler PRIVATE | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| ge_memory | |||||
| static_mmpa | static_mmpa | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| graph | graph | ||||
| @@ -755,7 +778,7 @@ file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object) | |||||
| if(EXISTS ${STATIC_ACL_LIB}/libascendcl.a) | if(EXISTS ${STATIC_ACL_LIB}/libascendcl.a) | ||||
| execute_process( | execute_process( | ||||
| COMMAND ar x ${STATIC_ACL_LIB}/libascendcl.a | COMMAND ar x ${STATIC_ACL_LIB}/libascendcl.a | ||||
| WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object | |||||
| WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object | |||||
| ) | ) | ||||
| file(GLOB OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object/*.o) | file(GLOB OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object/*.o) | ||||
| else() | else() | ||||
| @@ -764,11 +787,23 @@ endif() | |||||
| add_library(opensrc_ascendcl SHARED | add_library(opensrc_ascendcl SHARED | ||||
| ${OBJECT_LIST} | ${OBJECT_LIST} | ||||
| $<TARGET_OBJECTS:$<IF:$<TARGET_EXISTS:msprofiler>,msprofiler,msprofiler_object>> | |||||
| ) | ) | ||||
| add_library(msprofiler_object OBJECT IMPORTED GLOBAL) | |||||
| if (msprofiler_ext_LIBRARY_DIR) | |||||
| file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object) | |||||
| execute_process( | |||||
| COMMAND ar x ${msprofiler_ext_LIBRARY_DIR} | |||||
| WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object | |||||
| ) | |||||
| file(GLOB MSPROFILER_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object/*.o) | |||||
| set_property(TARGET msprofiler_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_OBJECT_LIST}) | |||||
| endif() | |||||
| target_compile_definitions(opensrc_ascendcl PRIVATE | target_compile_definitions(opensrc_ascendcl PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(opensrc_ascendcl PRIVATE | target_compile_options(opensrc_ascendcl PRIVATE | ||||
| @@ -780,14 +815,7 @@ target_link_options(opensrc_ascendcl PRIVATE | |||||
| -Wl,--allow-multiple-definition | -Wl,--allow-multiple-definition | ||||
| -Wl,-z,muldefs | -Wl,-z,muldefs | ||||
| -Wl,-Bsymbolic | -Wl,-Bsymbolic | ||||
| -Wl,--exclude-libs,libascend_protobuf.a | |||||
| -Wl,--exclude-libs,libge_executor.a | |||||
| -Wl,--exclude-libs,libge_common.a | |||||
| -Wl,--exclude-libs,libgraph.a | |||||
| -Wl,--exclude-libs,libmmpa.a | |||||
| -Wl,--exclude-libs,libregister.a | |||||
| -Wl,--exclude-libs,liberror_manager.a | |||||
| -Wl,--exclude-libs,libadump_server.a | |||||
| -Wl,--exclude-libs,ALL | |||||
| ) | ) | ||||
| target_link_libraries(opensrc_ascendcl PRIVATE | target_link_libraries(opensrc_ascendcl PRIVATE | ||||
| -Wl,--whole-archive | -Wl,--whole-archive | ||||
| @@ -799,7 +827,6 @@ target_link_libraries(opensrc_ascendcl PRIVATE | |||||
| register_static | register_static | ||||
| error_manager_static | error_manager_static | ||||
| adump_server | adump_server | ||||
| msprofiler | |||||
| -Wl,--no-whole-archive | -Wl,--no-whole-archive | ||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| c_sec | c_sec | ||||
| @@ -217,10 +217,15 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_ | |||||
| json jsn; | json jsn; | ||||
| GraphInfoToJson(jsn, *graph_info); | GraphInfoToJson(jsn, *graph_info); | ||||
| json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; | |||||
| bool ret_failed = false; | |||||
| try { | |||||
| json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; | |||||
| } catch (nlohmann::detail::type_error &e) { | |||||
| GELOGE(FAILED, "analyzer file [%s] failed because [%s]", json_file_name_.c_str(), e.what()); | |||||
| ret_failed = true; | |||||
| } | |||||
| json_file_.close(); | json_file_.close(); | ||||
| return SUCCESS; | |||||
| return ret_failed ? FAILED : SUCCESS; | |||||
| } | } | ||||
| ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { | ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { | ||||
| @@ -32,9 +32,7 @@ | |||||
| #include "graph/common/ge_call_wrapper.h" | #include "graph/common/ge_call_wrapper.h" | ||||
| #include "register/op_registry.h" | #include "register/op_registry.h" | ||||
| #include "common/ge/tbe_plugin_manager.h" | #include "common/ge/tbe_plugin_manager.h" | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "toolchain/plog.h" | #include "toolchain/plog.h" | ||||
| #endif | |||||
| using domi::OpRegistry; | using domi::OpRegistry; | ||||
| using std::map; | using std::map; | ||||
| @@ -132,11 +130,9 @@ Status GEInitializeImpl(const std::map<string, string> &options) { | |||||
| // Initialize GE, prepare for execution, call GELib::Initialize | // Initialize GE, prepare for execution, call GELib::Initialize | ||||
| Status GEInitialize(const std::map<string, string> &options) { | Status GEInitialize(const std::map<string, string> &options) { | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if (DlogReportInitialize() != SUCCESS) { | if (DlogReportInitialize() != SUCCESS) { | ||||
| GELOGW("Dlog report device log initialize failed."); | GELOGW("Dlog report device log initialize failed."); | ||||
| } | } | ||||
| #endif | |||||
| return GEInitializeImpl(options); | return GEInitializeImpl(options); | ||||
| } | } | ||||
| @@ -151,11 +147,9 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) { | |||||
| std::string val = option.second.GetString(); | std::string val = option.second.GetString(); | ||||
| str_options[key] = val; | str_options[key] = val; | ||||
| } | } | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if (DlogReportInitialize() != SUCCESS) { | if (DlogReportInitialize() != SUCCESS) { | ||||
| GELOGW("Dlog report device log initialize failed."); | GELOGW("Dlog report device log initialize failed."); | ||||
| } | } | ||||
| #endif | |||||
| return GEInitializeImpl(str_options); | return GEInitializeImpl(str_options); | ||||
| } | } | ||||
| @@ -200,11 +194,9 @@ Status GEFinalize() { | |||||
| // to avoid memory fragment, use malloc_trim to back free stack to system | // to avoid memory fragment, use malloc_trim to back free stack to system | ||||
| malloc_trim(0); | malloc_trim(0); | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if (DlogReportFinalize() != SUCCESS) { | if (DlogReportFinalize() != SUCCESS) { | ||||
| GELOGW("Dlog report device log finalize failed."); | GELOGW("Dlog report device log finalize failed."); | ||||
| } | } | ||||
| #endif | |||||
| GELOGT(TRACE_STOP, "GEFinalize finished"); | GELOGT(TRACE_STOP, "GEFinalize finished"); | ||||
| return ret; | return ret; | ||||
| @@ -73,7 +73,6 @@ target_compile_definitions(ge_common PRIVATE | |||||
| FMK_SUPPORT_DUMP | FMK_SUPPORT_DUMP | ||||
| OS_CENTOS | OS_CENTOS | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_common PRIVATE | target_compile_options(ge_common PRIVATE | ||||
| @@ -133,7 +132,6 @@ target_compile_definitions(ge_common_static PRIVATE | |||||
| $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | ||||
| $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | ||||
| LOG_CPP | LOG_CPP | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_common_static PRIVATE | target_compile_options(ge_common_static PRIVATE | ||||
| @@ -182,7 +180,6 @@ target_compile_definitions(ge_common PRIVATE | |||||
| FMK_SUPPORT_DUMP | FMK_SUPPORT_DUMP | ||||
| OS_CENTOS | OS_CENTOS | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_common PRIVATE | target_compile_options(ge_common PRIVATE | ||||
| @@ -94,7 +94,7 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt | |||||
| if (options.profiling_mode == "1" && !options.profiling_options.empty()) { | if (options.profiling_mode == "1" && !options.profiling_options.empty()) { | ||||
| // enable profiling by ge option | // enable profiling by ge option | ||||
| if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), | if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), | ||||
| MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { | |||||
| MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { | |||||
| GELOGE(INTERNAL_ERROR, "copy profiling_options failed."); | GELOGE(INTERNAL_ERROR, "copy profiling_options failed."); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -124,8 +124,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt | |||||
| return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
| } | } | ||||
| if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), | |||||
| MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { | |||||
| if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), MSPROF_OPTIONS_DEF_LEN_MAX - 1) != | |||||
| EOK) { | |||||
| GELOGE(INTERNAL_ERROR, "copy job_id failed."); | GELOGE(INTERNAL_ERROR, "copy job_id failed."); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -302,6 +302,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||||
| } | } | ||||
| data.append(" model_id:").append(std::to_string(model_id)); | data.append(" model_id:").append(std::to_string(model_id)); | ||||
| data.append(" task_id:").append(std::to_string(graph.task_id)); | |||||
| data.append(" stream_id:").append(std::to_string(graph.stream_id)); | |||||
| data.append("\n"); | data.append("\n"); | ||||
| GraphDescReport(device_id, data); | GraphDescReport(device_id, data); | ||||
| @@ -480,6 +480,9 @@ REGISTER_OPTYPE_DEFINE(HVDWAIT, "HorovodWait"); | |||||
| // aicpu op for online_infer dynamic_dims | // aicpu op for online_infer dynamic_dims | ||||
| REGISTER_OPTYPE_DEFINE(GETDYNAMICDIMS, "GetDynamicDims"); | REGISTER_OPTYPE_DEFINE(GETDYNAMICDIMS, "GetDynamicDims"); | ||||
| // profiling training trace node | |||||
| REGISTER_OPTYPE_DEFINE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace"); | |||||
| const std::string MODEL_ATTR_TASKS = "tasks"; | const std::string MODEL_ATTR_TASKS = "tasks"; | ||||
| const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR = "task_gen_base_addr"; | const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR = "task_gen_base_addr"; | ||||
| const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR = "task_gen_weight_addr"; | const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR = "task_gen_weight_addr"; | ||||
| @@ -28,7 +28,7 @@ set(SRC_LIST | |||||
| "../graph/manager/trans_var_data_utils.cc" | "../graph/manager/trans_var_data_utils.cc" | ||||
| "../graph/manager/util/debug.cc" | "../graph/manager/util/debug.cc" | ||||
| "../graph/manager/rdma_pool_allocator.cc" | "../graph/manager/rdma_pool_allocator.cc" | ||||
| $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:../graph/manager/host_mem_allocator.cc> | |||||
| "../graph/manager/host_mem_allocator.cc" | |||||
| "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" | "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" | ||||
| "../model/ge_model.cc" | "../model/ge_model.cc" | ||||
| "../model/ge_root_model.cc" | "../model/ge_root_model.cc" | ||||
| @@ -175,7 +175,6 @@ target_compile_definitions(ge_executor PRIVATE | |||||
| $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | ||||
| $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | ||||
| LOG_CPP | LOG_CPP | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(ge_executor PRIVATE | target_include_directories(ge_executor PRIVATE | ||||
| @@ -218,7 +217,6 @@ target_compile_definitions(ge_executor_shared PRIVATE | |||||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
| DAVINCI_SUPPORT_PROFILING | DAVINCI_SUPPORT_PROFILING | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(ge_executor_shared PRIVATE | target_include_directories(ge_executor_shared PRIVATE | ||||
| @@ -676,7 +676,7 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | ||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info); | |||||
| Status ret = GraphExecutor::GetAippInfo(model_id, index, aipp_info); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGW("GetAIPPInfo is not success."); | GELOGW("GetAIPPInfo is not success."); | ||||
| return ret; | return ret; | ||||
| @@ -713,43 +713,6 @@ Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dyn | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | |||||
| std::vector<TensorDesc> &output_desc) { | |||||
| GELOGI("get model desc info for zero copy begin."); | |||||
| if (!isInit_) { | |||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||||
| } | |||||
| std::vector<InputOutputDescInfo> input_desc_infos; | |||||
| std::vector<InputOutputDescInfo> output_desc_infos; | |||||
| std::vector<uint32_t> input_formats; | |||||
| std::vector<uint32_t> output_formats; | |||||
| Status ret = GraphExecutor::GetInputOutputDescInfoForZeroCopy(model_id, input_desc_infos, output_desc_infos, | |||||
| input_formats, output_formats); | |||||
| if (ret != domi::SUCCESS) { | |||||
| GELOGE(ret, "Get DescInfo from zero copy failed. ret = %u", ret); | |||||
| return ACL_ERROR_GE_GET_TENSOR_INFO; | |||||
| } | |||||
| if (input_formats.size() != input_desc_infos.size()) { | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "input_formats.size() != input_desc_infos.size()."); | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | |||||
| } | |||||
| if (output_formats.size() != output_desc_infos.size()) { | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output_formats.size() != output_desc_infos.size()."); | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | |||||
| } | |||||
| GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats); | |||||
| GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats); | |||||
| GELOGI("get model desc info from zero copy end."); | |||||
| return ge::SUCCESS; | |||||
| } | |||||
| Status GeExecutor::CommandHandle(const Command &command) { | Status GeExecutor::CommandHandle(const Command &command) { | ||||
| Status ret = GraphLoader::CommandHandle(command); | Status ret = GraphLoader::CommandHandle(command); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -31,7 +31,6 @@ target_compile_options(ge_local_engine PRIVATE | |||||
| target_compile_definitions(ge_local_engine PRIVATE | target_compile_definitions(ge_local_engine PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(ge_local_engine PRIVATE | target_include_directories(ge_local_engine PRIVATE | ||||
| @@ -73,7 +72,6 @@ target_compile_options(atc_ge_local_engine PRIVATE | |||||
| target_compile_definitions(atc_ge_local_engine PRIVATE | target_compile_definitions(atc_ge_local_engine PRIVATE | ||||
| COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(atc_ge_local_engine PRIVATE | target_include_directories(atc_ge_local_engine PRIVATE | ||||
| @@ -119,7 +117,6 @@ target_compile_options(ge_local_opskernel_builder PRIVATE | |||||
| target_compile_definitions(ge_local_opskernel_builder PRIVATE | target_compile_definitions(ge_local_opskernel_builder PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(ge_local_opskernel_builder PRIVATE | target_include_directories(ge_local_opskernel_builder PRIVATE | ||||
| @@ -161,7 +158,6 @@ target_compile_options(atc_ge_local_opskernel_builder PRIVATE | |||||
| target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE | target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(atc_ge_local_opskernel_builder PRIVATE | target_include_directories(atc_ge_local_opskernel_builder PRIVATE | ||||
| @@ -209,7 +205,6 @@ target_compile_options(ge_local_opskernel_builder_static PRIVATE | |||||
| target_compile_definitions(ge_local_opskernel_builder_static PRIVATE | target_compile_definitions(ge_local_opskernel_builder_static PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| LOG_CPP | LOG_CPP | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(ge_local_opskernel_builder_static PRIVATE | target_include_directories(ge_local_opskernel_builder_static PRIVATE | ||||
| @@ -26,7 +26,6 @@ | |||||
| #include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
| namespace { | namespace { | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ | #define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ | ||||
| case (DTYPE): { \ | case (DTYPE): { \ | ||||
| GeTensorPtr ge_tensor = nullptr; \ | GeTensorPtr ge_tensor = nullptr; \ | ||||
| @@ -50,43 +49,6 @@ namespace { | |||||
| named_outputs.emplace(tensor_name, tensor); \ | named_outputs.emplace(tensor_name, tensor); \ | ||||
| break; \ | break; \ | ||||
| } | } | ||||
| #else | |||||
| #define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ | |||||
| case (DTYPE): { \ | |||||
| GeTensorPtr ge_tensor = nullptr; \ | |||||
| if (need_create_flag) { \ | |||||
| GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ | |||||
| std::unique_ptr<TYPE[]> buf(new (std::nothrow) TYPE[data_num]()); \ | |||||
| if (buf == nullptr) { \ | |||||
| GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \ | |||||
| static_cast<size_t>(sizeof(TYPE) * data_num)); \ | |||||
| return MEMALLOC_FAILED; \ | |||||
| } \ | |||||
| ge_tensor = MakeShared<GeTensor>(out_desc); \ | |||||
| GE_CHECK_NOTNULL(ge_tensor); \ | |||||
| GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\ | |||||
| if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ | |||||
| GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ | |||||
| return MEMALLOC_FAILED; \ | |||||
| } \ | |||||
| ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ | |||||
| ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ | |||||
| outputs.emplace_back(ge_tensor); \ | |||||
| } else { \ | |||||
| ge_tensor = outputs[i]; \ | |||||
| GE_CHECK_NOTNULL(ge_tensor); \ | |||||
| GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \ | |||||
| } \ | |||||
| auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ | |||||
| auto tensor_name = op_desc->GetOutputNameByIndex(i); \ | |||||
| GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ | |||||
| op_desc->GetName().c_str(), i); \ | |||||
| GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \ | |||||
| op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \ | |||||
| named_outputs.emplace(tensor_name, tensor); \ | |||||
| break; \ | |||||
| } | |||||
| #endif | |||||
| } | } | ||||
| namespace ge { | namespace ge { | ||||
| @@ -27,7 +27,6 @@ target_compile_options(ge_runtime PRIVATE | |||||
| target_compile_definitions(ge_runtime PRIVATE | target_compile_definitions(ge_runtime PRIVATE | ||||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(ge_runtime PRIVATE | target_include_directories(ge_runtime PRIVATE | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "graph/build/graph_builder.h" | #include "graph/build/graph_builder.h" | ||||
| #include "graph/build/memory/graph_mem_assigner.h" | |||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
| #include "graph/build/logical_stream_allocator.h" | #include "graph/build/logical_stream_allocator.h" | ||||
| @@ -197,10 +198,8 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfo | |||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| } | } | ||||
| GeModelPtr ge_model_ptr = nullptr; | GeModelPtr ge_model_ptr = nullptr; | ||||
| bool is_dynamic_shape = false; | |||||
| // To be compatible with the old process, do not verify the return value temporarily. | |||||
| (void)AttrUtils::GetBool(comp_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); | |||||
| if (is_dynamic_shape) { | |||||
| if (comp_graph->GetGraphUnknownFlag()) { | |||||
| GE_CHK_STATUS_RET( | GE_CHK_STATUS_RET( | ||||
| BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id), | BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id), | ||||
| "Build for dynamic shape graph failed."); | "Build for dynamic shape graph failed."); | ||||
| @@ -270,16 +269,78 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphBuilder::SetConstantInputOffset(ComputeGraphPtr &comp_graph) { | |||||
| for (auto &node : comp_graph->GetDirectNode()) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| auto num_inputs = op_desc->GetInputsSize(); | |||||
| std::vector<int64_t> input_offsets(num_inputs, 0); | |||||
| int valid_input_index = -1; | |||||
| for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) { | |||||
| auto in_anchor = node->GetInDataAnchor(i); | |||||
| auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); | |||||
| if (peer_out_anchor == nullptr) { | |||||
| continue; | |||||
| } | |||||
| ++valid_input_index; | |||||
| auto peer_node = peer_out_anchor->GetOwnerNode(); | |||||
| if (peer_node == nullptr) { | |||||
| continue; | |||||
| } | |||||
| if (peer_node->GetType() != CONSTANT) { | |||||
| continue; | |||||
| } | |||||
| std::vector<GeTensorPtr> weights = OpDescUtils::MutableWeights(peer_node); | |||||
| if (weights.empty()) { | |||||
| GELOGE(FAILED, "weights size of node %s is empty", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| GeTensorPtr weight = weights[0]; | |||||
| GE_CHECK_NOTNULL(weight); | |||||
| int64_t input_offset = 0; | |||||
| (void) TensorUtils::GetDataOffset(weight->MutableTensorDesc(), input_offset); | |||||
| // valid_input_index must smaller than num_inputs | |||||
| input_offsets[valid_input_index] = input_offset; | |||||
| GELOGD("[%s] input[%u] is const, offset = %ld", node->GetName().c_str(), valid_input_index, input_offset); | |||||
| } | |||||
| op_desc->SetInputOffset(input_offsets); | |||||
| std::vector<int64_t> output_offsets(op_desc->GetOutputsSize(), 0); | |||||
| op_desc->SetOutputOffset(output_offsets); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | ||||
| uint64_t session_id) { | uint64_t session_id) { | ||||
| GELOGI("Begin to build unknown shape graph[%s].", comp_graph->GetName().c_str()); | GELOGI("Begin to build unknown shape graph[%s].", comp_graph->GetName().c_str()); | ||||
| Graph2SubGraphInfoList subgraph_map; | |||||
| ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); | |||||
| GE_DUMP(comp_graph, "BeforePreBuildModel"); | |||||
| GE_TIMESTAMP_START(PreBuildModel); | |||||
| GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.", | |||||
| comp_graph->GetName().c_str()); | |||||
| GE_TIMESTAMP_END(PreBuildModel, "GraphBuilder::PreBuildModel"); | |||||
| GE_DUMP(comp_graph, "AfterPreBuildModel"); | |||||
| GE_TIMESTAMP_START(CalcOpParam); | GE_TIMESTAMP_START(CalcOpParam); | ||||
| GE_CHK_STATUS_RET(CalcOpParam(comp_graph), "Graph[%s] builder CalcOpParam() return fail.", | GE_CHK_STATUS_RET(CalcOpParam(comp_graph), "Graph[%s] builder CalcOpParam() return fail.", | ||||
| comp_graph->GetName().c_str()); | comp_graph->GetName().c_str()); | ||||
| GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam"); | GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam"); | ||||
| GE_DUMP(comp_graph, "AfterCalcOpParam"); | GE_DUMP(comp_graph, "AfterCalcOpParam"); | ||||
| Graph2SubGraphInfoList subgraph_map; | |||||
| ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); | |||||
| GE_TIMESTAMP_START(SetConstantInputOffset); | |||||
| GE_CHK_STATUS_RET(SetConstantInputOffset(comp_graph), | |||||
| "Graph[%s] failed to set constant input offset.", comp_graph->GetName().c_str()); | |||||
| GE_TIMESTAMP_END(SetConstantInputOffset, "GraphBuilder::SetConstantInputOffset"); | |||||
| GE_TIMESTAMP_START(MergeWeights); | |||||
| GE_CHK_STATUS_RET(builder.MergeWeights(), "Graph[%s] failed to merge weights.", comp_graph->GetName().c_str()); | |||||
| GE_TIMESTAMP_END(MergeWeights, "GraphBuilder::MergeWeights"); | |||||
| ModelPtr model_ptr = MakeShared<ge::Model>(); | ModelPtr model_ptr = MakeShared<ge::Model>(); | ||||
| if (model_ptr == nullptr) { | if (model_ptr == nullptr) { | ||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| @@ -360,6 +421,52 @@ static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | |||||
| bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); | |||||
| com_graph->SetGraphUnknownFlag(false); | |||||
| GELOGD("Start to mark profiling task attr for fp and bp."); | |||||
| TaskGenerator task_generator; | |||||
| ProfilingPoint profiling_point; | |||||
| std::vector<uint32_t> all_reduce_node_index; | |||||
| Status ret = task_generator.FindProfilingNodeIndex(com_graph, profiling_point, all_reduce_node_index); | |||||
| com_graph->SetGraphUnknownFlag(original_unknown_shape_flag); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGW("Find profiling node index failed."); | |||||
| } | |||||
| if (profiling_point.fp_index == 0 || profiling_point.bp_index == 0 || profiling_point.end_index.empty()) { | |||||
| GELOGD("No need to mark fp bp profiling task attr."); | |||||
| return SUCCESS; | |||||
| } | |||||
| // mark profiling task attr for node | |||||
| uint32_t node_index = 0; | |||||
| for (const auto &node : com_graph->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| node_index++; | |||||
| if (profiling_point.fp_index == node_index) { | |||||
| GELOGI("The first fp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); | |||||
| (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, true); | |||||
| } | |||||
| if (profiling_point.bp_index == node_index) { | |||||
| GELOGI("The bp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); | |||||
| (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); | |||||
| } | |||||
| for (size_t i = 0; i < all_reduce_node_index.size(); i++) { | |||||
| if (all_reduce_node_index[i] == node_index) { | |||||
| GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); | |||||
| (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); | |||||
| continue; | |||||
| } | |||||
| } | |||||
| if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) { | |||||
| GELOGI("The end node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); | |||||
| (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, true); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | ||||
| std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | ||||
| GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | ||||
| @@ -375,10 +482,21 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||||
| op_desc->GetName().c_str()); | op_desc->GetName().c_str()); | ||||
| } | } | ||||
| } | } | ||||
| // | |||||
| for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { | |||||
| // Set fp bp profiling task attr for graph | |||||
| if (MarkFpBpProfilingTaskAttr(comp_graph) != SUCCESS) { | |||||
| GELOGE(FAILED, "Set fp bp profiling task attr for graph."); | |||||
| return FAILED; | |||||
| } | |||||
| auto all_graphs = comp_graph->GetAllSubgraphs(); | |||||
| if (all_graphs.empty()) { | |||||
| all_graphs.push_back(comp_graph); | |||||
| } | |||||
| for (auto &sub_graph : all_graphs) { | |||||
| // exclude functional subgraph in known subgraph | // exclude functional subgraph in known subgraph | ||||
| if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | |||||
| if (sub_graph->GetParentGraph() != nullptr && sub_graph->GetParentGraph() != comp_graph && | |||||
| !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -60,6 +60,7 @@ class GraphBuilder { | |||||
| Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); | Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); | ||||
| Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); | Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); | ||||
| Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list); | Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list); | ||||
| Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph); | |||||
| Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | ||||
| GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | ||||
| uint64_t session_id = INVALID_SESSION_ID); | uint64_t session_id = INVALID_SESSION_ID); | ||||
| @@ -67,6 +68,7 @@ class GraphBuilder { | |||||
| GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | ||||
| Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | ||||
| uint64_t session_id = INVALID_SESSION_ID); | uint64_t session_id = INVALID_SESSION_ID); | ||||
| Status SetConstantInputOffset(ComputeGraphPtr &comp_graph); | |||||
| Status AddOutputMemTypeForNode(const NodePtr &node); | Status AddOutputMemTypeForNode(const NodePtr &node); | ||||
| Status BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | Status BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | ||||
| uint64_t session_id = INVALID_SESSION_ID); | uint64_t session_id = INVALID_SESSION_ID); | ||||
| @@ -1,45 +0,0 @@ | |||||
| set(SRC_LIST | |||||
| "memory_assigner.cc" | |||||
| "graph_mem_assigner.cc" | |||||
| "binary_block_mem_assigner.cc" | |||||
| "block_mem_assigner.cc" | |||||
| "hybrid_mem_assigner.cc" | |||||
| "max_block_mem_assigner.cc" | |||||
| "var_mem_assign_util.cc" | |||||
| ) | |||||
| ############ libge_memory.a ############ | |||||
| add_library(ge_memory STATIC ${SRC_LIST}) | |||||
| target_compile_options(ge_memory PRIVATE | |||||
| -Werror | |||||
| -O2 | |||||
| -fno-common | |||||
| ) | |||||
| target_compile_definitions(ge_memory PRIVATE | |||||
| google=ascend_private | |||||
| LOG_CPP | |||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | |||||
| target_link_libraries(ge_memory PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | |||||
| ascend_protobuf | |||||
| c_sec | |||||
| ) | |||||
| target_include_directories(ge_memory PRIVATE | |||||
| ${CMAKE_CURRENT_LIST_DIR} | |||||
| ${GE_CODE_DIR}/ge | |||||
| ${GE_CODE_DIR}/inc | |||||
| ${GE_CODE_DIR}/inc/external | |||||
| ${METADEF_DIR}/inc | |||||
| ${METADEF_DIR}/inc/external | |||||
| ${METADEF_DIR}/inc/external/graph | |||||
| ${GE_CODE_DIR}/inc/framework | |||||
| #### yellow zone #### | |||||
| ${GE_CODE_DIR}/../inc | |||||
| #### blue zone #### | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ) | |||||
| @@ -551,31 +551,11 @@ void GetMaxBatchAllMemorySize(std::map<std::string, vector<int64_t>> &batch_all_ | |||||
| } | } | ||||
| } | } | ||||
| void BlockMemAssigner::MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node) { | |||||
| auto node_op_desc = node->GetOpDesc(); | |||||
| GE_IF_BOOL_EXEC(node_op_desc == nullptr, return); | |||||
| // if input size just one and from variable, no need to reassign continuous memory | |||||
| bool is_input_continuous = false; | |||||
| (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||||
| if (is_input_continuous && (node_op_desc->GetInputsSize() == 1)) { | |||||
| auto peer_out_anchor = node->GetInDataAnchor(0)->GetPeerOutAnchor(); | |||||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, return); | |||||
| auto in_node = peer_out_anchor->GetOwnerNode(); | |||||
| GE_IF_BOOL_EXEC(in_node == nullptr, return); | |||||
| if (in_node->GetType() == VARIABLE || in_node->GetType() == CONSTANT) { | |||||
| GELOGI("node only one input and from variable, set continuous alloced. node_name:%s", node->GetName().c_str()); | |||||
| (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | |||||
| } | |||||
| } | |||||
| } | |||||
| void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | ||||
| vector<int64_t> temp; | vector<int64_t> temp; | ||||
| std::map<std::string, vector<int64_t>> batch_all_memory_size; | std::map<std::string, vector<int64_t>> batch_all_memory_size; | ||||
| std::map<std::string, int64_t> batch_total_size; | std::map<std::string, int64_t> batch_total_size; | ||||
| for (const NodePtr &n : compute_graph_->GetAllNodes()) { | for (const NodePtr &n : compute_graph_->GetAllNodes()) { | ||||
| MarkContinuousAllocedForOneInputFromVariable(n); | |||||
| auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
| GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | ||||
| @@ -1081,53 +1061,18 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| return block; | return block; | ||||
| } | } | ||||
| void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, | |||||
| const NodePtr &n) { | |||||
| const auto node_op_desc = n->GetOpDesc(); | |||||
| for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | |||||
| int32_t reuse_in_index = -1; | |||||
| if (!GraphUtils::IsRefFromInput(n->GetOutDataAnchor(index), reuse_in_index)) { | |||||
| isAllOutputRef = false; | |||||
| break; | |||||
| } else { | |||||
| zero_memory_list_.emplace_back(n, kOutput, index); | |||||
| isOutputHasRef = true; | |||||
| } | |||||
| } | |||||
| } | |||||
| Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | |||||
| MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | |||||
| const bool is_op_reuse_mem) { | const bool is_op_reuse_mem) { | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null."); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | |||||
| auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null."); | |||||
| // continuous output support ref only when all output ref input | |||||
| bool isAllOutputRef = true; | |||||
| bool isOutputHasRef = false; | |||||
| ContinuousOutRefCheck(isAllOutputRef, isOutputHasRef, n); | |||||
| if (isAllOutputRef) { | |||||
| GELOGI("continuous output node ref all input, skip continuous alloc, node_name:%s", n->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| if (!isAllOutputRef && isOutputHasRef) { | |||||
| GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s", | |||||
| n->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); | |||||
| MemoryBlock *block = nullptr; | MemoryBlock *block = nullptr; | ||||
| int64_t total_size = 0; | int64_t total_size = 0; | ||||
| int64_t memory_type = RT_MEMORY_HBM; | int64_t memory_type = RT_MEMORY_HBM; | ||||
| for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | ||||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | ||||
| if (output_op_desc == nullptr) { | if (output_op_desc == nullptr) { | ||||
| GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
| return INTERNAL_ERROR; | |||||
| return nullptr; | |||||
| } | } | ||||
| if (CheckIsZeroMemNodeType(n->GetType())) { | if (CheckIsZeroMemNodeType(n->GetType())) { | ||||
| @@ -1137,8 +1082,8 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
| int64_t size = 0; | int64_t size = 0; | ||||
| if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | ||||
| GELOGE(INTERNAL_ERROR, "Get size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
| return INTERNAL_ERROR; | |||||
| GELOGI("Get size failed"); | |||||
| return nullptr; | |||||
| } | } | ||||
| size_t align_size = static_cast<size_t>(size); | size_t align_size = static_cast<size_t>(size); | ||||
| AlignMemOffset(align_size); | AlignMemOffset(align_size); | ||||
| @@ -1161,7 +1106,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
| } | } | ||||
| if (total_size == 0) { | if (total_size == 0) { | ||||
| return SUCCESS; | |||||
| return nullptr; | |||||
| } | } | ||||
| auto block_size = GetBlockSize(total_size, ranges); | auto block_size = GetBlockSize(total_size, ranges); | ||||
| @@ -1175,11 +1120,8 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
| // hccl task need align header and tail | // hccl task need align header and tail | ||||
| block->first_continuous_block_ = true; | block->first_continuous_block_ = true; | ||||
| block->last_continuous_block_ = true; | block->last_continuous_block_ = true; | ||||
| } else { | |||||
| GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | } | ||||
| return SUCCESS; | |||||
| return block; | |||||
| } | } | ||||
| MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | ||||
| @@ -1191,8 +1133,9 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| NodeIndexIO node_index_io(n, index, kOut); | NodeIndexIO node_index_io(n, index, kOut); | ||||
| int64_t size = 0; | int64_t size = 0; | ||||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | ||||
| GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr); | |||||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||||
| if (output_op_desc != nullptr) { | |||||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||||
| } | |||||
| size_t no_align_size = 0; | size_t no_align_size = 0; | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | ||||
| return nullptr, "Get no align size failed"); | return nullptr, "Get no align size failed"); | ||||
| @@ -1203,14 +1146,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); | block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); | ||||
| block->ref_count_++; | block->ref_count_++; | ||||
| } else { | } else { | ||||
| // if ref input is variable, can not find symbol, must judge alone | |||||
| int32_t reuse_in_index = -1; | |||||
| if (GraphUtils::IsRefFromInput(n->GetOutDataAnchor(index), reuse_in_index)) { | |||||
| zero_memory_list_.emplace_back(n, kOutput, index, false); | |||||
| GELOGI("ref mode skip out block assign. node_name: %s, index:%d", n->GetName().c_str(), index); | |||||
| return nullptr; | |||||
| } | |||||
| int64_t max_size = size; | int64_t max_size = size; | ||||
| int64_t memory_type = RT_MEMORY_HBM; | int64_t memory_type = RT_MEMORY_HBM; | ||||
| auto iter1 = anchor_to_symbol_.find(node_index_io.ToString()); | auto iter1 = anchor_to_symbol_.find(node_index_io.ToString()); | ||||
| @@ -1458,7 +1393,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); | for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); | ||||
| ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); | ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); | ||||
| if (IsContinuousOutput(node)) { | if (IsContinuousOutput(node)) { | ||||
| return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | |||||
| (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | ||||
| int64_t size = 0; | int64_t size = 0; | ||||
| @@ -1952,8 +1888,9 @@ Status BlockMemAssigner::Assign() { | |||||
| bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | ||||
| return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | ||||
| (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || | |||||
| (node_type == ASSIGN) || (node_type == HVDWAIT); | |||||
| (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || | |||||
| (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || | |||||
| (node_type == HVDCALLBACKBROADCAST); | |||||
| } | } | ||||
| bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { | bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { | ||||
| @@ -420,11 +420,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
| bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); | bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); | ||||
| void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n); | |||||
| Status ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem); | |||||
| void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); | |||||
| MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem); | |||||
| std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | ||||
| @@ -805,7 +805,7 @@ Status ModelBuilder::CompileSingleOp() { | |||||
| } | } | ||||
| void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &aicpu_op_types, | void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &aicpu_op_types, | ||||
| std::set<std::string> &aicpu_tf_op_types) { | |||||
| std::set<std::string> &aicpu_tf_op_types) { | |||||
| std::string aicpu_optype; | std::string aicpu_optype; | ||||
| bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype); | bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype); | ||||
| std::vector<std::string> tf_optypes; | std::vector<std::string> tf_optypes; | ||||
| @@ -822,7 +822,7 @@ void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std: | |||||
| } | } | ||||
| void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types, | void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types, | ||||
| std::set<std::string> &aicpu_tf_op_types) { | |||||
| std::set<std::string> &aicpu_tf_op_types) { | |||||
| std::vector<std::string> aicpu_optype_list; | std::vector<std::string> aicpu_optype_list; | ||||
| std::vector<std::string> aicpu_tf_optype_list; | std::vector<std::string> aicpu_tf_optype_list; | ||||
| if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) { | if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) { | ||||
| @@ -839,10 +839,10 @@ void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string | |||||
| aicpu_optype_list.assign(aicpu_op_types.begin(), aicpu_op_types.end()); | aicpu_optype_list.assign(aicpu_op_types.begin(), aicpu_op_types.end()); | ||||
| aicpu_tf_optype_list.assign(aicpu_tf_op_types.begin(), aicpu_tf_op_types.end()); | aicpu_tf_optype_list.assign(aicpu_tf_op_types.begin(), aicpu_tf_op_types.end()); | ||||
| GELOGI( | GELOGI( | ||||
| "Check Aicpu op types ComputeGraph: %s aicpu_op_types: %zu, aicpu_optype_list: %zu, aicpu_tf_op_types: %zu, " | |||||
| "aicpu_tf_optype_list:%zu.", | |||||
| compute_graph_->GetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(), | |||||
| aicpu_tf_optype_list.size()); | |||||
| "Check Aicpu op types ComputeGraph: %s aicpu_op_types: %zu, aicpu_optype_list: %zu, aicpu_tf_op_types: %zu, " | |||||
| "aicpu_tf_optype_list:%zu.", | |||||
| compute_graph_->GetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(), | |||||
| aicpu_tf_optype_list.size()); | |||||
| GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return, | GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return, | ||||
| "Set attr needCheckCpu fail."); | "Set attr needCheckCpu fail."); | ||||
| @@ -55,13 +55,13 @@ class ModelBuilder { | |||||
| ge::Buffer GetWeightBuffer() const; | ge::Buffer GetWeightBuffer() const; | ||||
| Status MergeWeights(); | |||||
| protected: | protected: | ||||
| void AddNodeInputProperty(); | void AddNodeInputProperty(); | ||||
| void ClearOriginalFormat(); | void ClearOriginalFormat(); | ||||
| Status MergeWeights(); | |||||
| private: | private: | ||||
| bool SetInputConst(const OpDescPtr &op_desc, const NodePtr &src_node, size_t index, vector<bool> &is_input_const); | bool SetInputConst(const OpDescPtr &op_desc, const NodePtr &src_node, size_t index, vector<bool> &is_input_const); | ||||
| @@ -274,6 +274,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| }; | }; | ||||
| GE_MAKE_GUARD(release, callback); | GE_MAKE_GUARD(release, callback); | ||||
| uint64_t all_reduce_node_idx = 0; | |||||
| for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | ||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| @@ -292,7 +293,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| // Part2: Call | // Part2: Call | ||||
| auto fusion_task_info = | auto fusion_task_info = | ||||
| FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, | FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, | ||||
| ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; | |||||
| ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx}; | |||||
| GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), | GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), | ||||
| "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); | "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); | ||||
| // continue directly | // continue directly | ||||
| @@ -316,7 +317,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| type.c_str()); | type.c_str()); | ||||
| // Profiling task | // Profiling task | ||||
| size_t task_list_size_before = task_def_list.size(); | size_t task_list_size_before = task_def_list.size(); | ||||
| GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | |||||
| GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, | |||||
| node_index, task_def_list, all_reduce_node_idx)); | |||||
| int64_t op_id = op_desc->GetId(); | int64_t op_id = op_desc->GetId(); | ||||
| // Compatible with dynamic shape scenes, the default is 0 | // Compatible with dynamic shape scenes, the default is 0 | ||||
| int64_t stream_id = 0; | int64_t stream_id = 0; | ||||
| @@ -336,8 +338,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| // Profiling task | // Profiling task | ||||
| GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | |||||
| GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, | |||||
| node_index, task_def_list, all_reduce_node_idx)); | |||||
| size_t task_list_size_after = task_def_list.size(); | size_t task_list_size_after = task_def_list.size(); | ||||
| // If tasks is reduced | // If tasks is reduced | ||||
| if (task_list_size_after < task_list_size_before) { | if (task_list_size_after < task_list_size_before) { | ||||
| @@ -380,6 +382,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info | |||||
| auto &op_name_map = fusion_task_info.op_name_map; | auto &op_name_map = fusion_task_info.op_name_map; | ||||
| auto &profiling_point = fusion_task_info.profiling_point; | auto &profiling_point = fusion_task_info.profiling_point; | ||||
| auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes; | auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes; | ||||
| auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx; | |||||
| // If op_desc have this attr, call nodes with same group key in a stream together | // If op_desc have this attr, call nodes with same group key in a stream together | ||||
| if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) && | if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) && | ||||
| (fusion_nodes_seen.count(node.get()) == 0)) { | (fusion_nodes_seen.count(node.get()) == 0)) { | ||||
| @@ -426,7 +429,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| // profiling task | // profiling task | ||||
| (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); | |||||
| (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, | |||||
| node_index, task_def_list, all_reduce_idx); | |||||
| run_context.stream = run_context.graphStreamList[stream_id]; | run_context.stream = run_context.graphStreamList[stream_id]; | ||||
| GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.", | GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.", | ||||
| op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id); | op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id); | ||||
| @@ -439,7 +443,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| // profiling task | // profiling task | ||||
| (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); | |||||
| (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, | |||||
| node_index, task_def_list, all_reduce_idx); | |||||
| size_t task_list_size_after = task_def_list.size(); | size_t task_list_size_after = task_def_list.size(); | ||||
| // if tasks is reduced | // if tasks is reduced | ||||
| if (task_list_size_after < task_list_size_before) { | if (task_list_size_after < task_list_size_before) { | ||||
| @@ -830,6 +835,11 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status TaskGenerator::FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | |||||
| std::vector<uint32_t> &all_reduce_nodes) { | |||||
| return FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes); | |||||
| } | |||||
| Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | ||||
| vector<uint32_t> &all_reduce_nodes) const { | vector<uint32_t> &all_reduce_nodes) const { | ||||
| GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
| @@ -840,7 +850,6 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||||
| GELOGD("Profiling is not open."); | GELOGD("Profiling is not open."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| GELOGI("Start get FP/BP index."); | GELOGI("Start get FP/BP index."); | ||||
| std::string fp_point_str; | std::string fp_point_str; | ||||
| std::string bp_point_str; | std::string bp_point_str; | ||||
| @@ -878,18 +887,27 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | ||||
| vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | ||||
| vector<domi::TaskDef> &task_def_list) { | |||||
| vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx) { | |||||
| const char *profiling_mode = std::getenv(kProfilingMode); | const char *profiling_mode = std::getenv(kProfilingMode); | ||||
| bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | ||||
| ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ||||
| if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | |||||
| (profiling_point.end_index.empty())) { | |||||
| bool is_insert_fp_profiling_task = false; | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); | |||||
| bool is_insert_bp_profiling_task = false; | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); | |||||
| bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | |||||
| (profiling_point.end_index.empty())) && | |||||
| (!(is_insert_fp_profiling_task || is_insert_bp_profiling_task)); | |||||
| if (!is_profiling || no_insert_profiling_task) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| if (profiling_point.fp_index == node_index) { | |||||
| GELOGD("Insert fp profiling task: %d, insert bp profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", | |||||
| is_insert_fp_profiling_task, is_insert_bp_profiling_task, profiling_point.fp_index, profiling_point.bp_index, | |||||
| profiling_point.end_index.size()); | |||||
| if ((profiling_point.fp_index == node_index) || is_insert_fp_profiling_task) { | |||||
| uint64_t jobid_log_id = ge::GetContext().TraceId(); | uint64_t jobid_log_id = ge::GetContext().TraceId(); | ||||
| GELOGI("The first FP operator is %s, idx %u, job_id %lu", op_desc->GetName().c_str(), node_index, jobid_log_id); | GELOGI("The first FP operator is %s, idx %u, job_id %lu", op_desc->GetName().c_str(), node_index, jobid_log_id); | ||||
| @@ -913,22 +931,40 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const | |||||
| task_def_list.emplace_back(fp_task_def); | task_def_list.emplace_back(fp_task_def); | ||||
| } | } | ||||
| for (size_t i = 0; i < all_reduce_nodes.size(); i++) { | |||||
| if (all_reduce_nodes[i] != node_index) { | |||||
| continue; | |||||
| bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | |||||
| uint64_t all_reduce_task_idx = 0; | |||||
| bool is_insert_all_reduce_task = false; | |||||
| if (is_all_reduce && is_insert_bp_profiling_task) { | |||||
| all_reduce_task_idx = all_reduce_node_idx; | |||||
| is_insert_all_reduce_task = true; | |||||
| } | |||||
| if (is_all_reduce) { | |||||
| all_reduce_node_idx++; | |||||
| } | |||||
| if (!is_insert_all_reduce_task) { | |||||
| for (size_t i = 0; i < all_reduce_nodes.size(); i++) { | |||||
| if (all_reduce_nodes[i] == node_index) { | |||||
| all_reduce_task_idx = i; | |||||
| is_insert_all_reduce_task = true; | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| } | |||||
| if (is_insert_all_reduce_task) { | |||||
| GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | ||||
| TaskDef ar_task_def; | TaskDef ar_task_def; | ||||
| ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | ||||
| ar_task_def.set_stream_id(op_desc->GetStreamId()); | ar_task_def.set_stream_id(op_desc->GetStreamId()); | ||||
| LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | ||||
| if (ar_log_def != nullptr) { | if (ar_log_def != nullptr) { | ||||
| GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), | |||||
| GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), | |||||
| GELOGE(FAILED, "Multiply result is out of range."); | GELOGE(FAILED, "Multiply result is out of range."); | ||||
| return FAILED); | return FAILED); | ||||
| auto log_id = i * kProfilingArStep + kProfilingArStartLogid; | |||||
| auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid; | |||||
| ar_log_def->set_logid(log_id); | ar_log_def->set_logid(log_id); | ||||
| ar_log_def->set_notify(false); | ar_log_def->set_notify(false); | ||||
| (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); | |||||
| } | } | ||||
| task_def_list.push_back(ar_task_def); | task_def_list.push_back(ar_task_def); | ||||
| } | } | ||||
| @@ -937,16 +973,27 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const | |||||
| Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | ||||
| vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | ||||
| vector<domi::TaskDef> &task_def_list) { | |||||
| vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx) { | |||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| const char *profiling_mode = std::getenv(kProfilingMode); | const char *profiling_mode = std::getenv(kProfilingMode); | ||||
| bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | ||||
| ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ||||
| if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | |||||
| (profiling_point.end_index.empty())) { | |||||
| bool is_insert_bp_profiling_task = false; | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); | |||||
| bool is_insert_end_profiling_task = false; | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task); | |||||
| bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | |||||
| (profiling_point.end_index.empty())) && | |||||
| (!(is_insert_bp_profiling_task || is_insert_end_profiling_task)); | |||||
| if (!is_profiling || no_insert_profiling_task) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| if (profiling_point.bp_index == node_index) { | |||||
| GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", | |||||
| is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index, | |||||
| profiling_point.end_index.size() ); | |||||
| bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | |||||
| if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) { | |||||
| GELOGI("The last BP operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | GELOGI("The last BP operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | ||||
| TaskDef bp_task_def; | TaskDef bp_task_def; | ||||
| bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | ||||
| @@ -957,7 +1004,9 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||||
| bp_log_def->set_notify(false); | bp_log_def->set_notify(false); | ||||
| task_def_list.emplace_back(bp_task_def); | task_def_list.emplace_back(bp_task_def); | ||||
| } | } | ||||
| if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) { | |||||
| if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end() || | |||||
| is_insert_end_profiling_task) { | |||||
| GELOGI("The iteration end operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | GELOGI("The iteration end operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | ||||
| TaskDef end_task_def; | TaskDef end_task_def; | ||||
| end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | ||||
| @@ -969,20 +1018,32 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||||
| task_def_list.emplace_back(end_task_def); | task_def_list.emplace_back(end_task_def); | ||||
| } | } | ||||
| uint32_t all_reduce_task_idx = 0; | |||||
| bool is_insert_all_reduce_task = false; | |||||
| if (is_all_reduce && is_insert_bp_profiling_task) { | |||||
| all_reduce_task_idx = all_reduce_node_idx; | |||||
| is_insert_all_reduce_task = true; | |||||
| } | |||||
| for (size_t i = 0; i < all_reduce_nodes.size(); i++) { | for (size_t i = 0; i < all_reduce_nodes.size(); i++) { | ||||
| if (all_reduce_nodes[i] != node_index) { | |||||
| continue; | |||||
| if (all_reduce_nodes[i] == node_index) { | |||||
| all_reduce_task_idx = i; | |||||
| is_insert_all_reduce_task = true; | |||||
| break; | |||||
| } | } | ||||
| } | |||||
| if (is_insert_all_reduce_task) { | |||||
| GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | ||||
| TaskDef ar_task_def; | TaskDef ar_task_def; | ||||
| ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | ||||
| ar_task_def.set_stream_id(op_desc->GetStreamId()); | ar_task_def.set_stream_id(op_desc->GetStreamId()); | ||||
| LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | ||||
| GE_CHECK_NOTNULL(ar_log_def); | GE_CHECK_NOTNULL(ar_log_def); | ||||
| GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), | |||||
| GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), | |||||
| GELOGE(FAILED, "Multiply result is out of range."); | GELOGE(FAILED, "Multiply result is out of range."); | ||||
| return FAILED); | return FAILED); | ||||
| auto log_id = i * kProfilingArStep + kProfilingArEndLogid; | |||||
| auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid; | |||||
| ar_log_def->set_logid(log_id); | ar_log_def->set_logid(log_id); | ||||
| ar_log_def->set_notify(false); | ar_log_def->set_notify(false); | ||||
| task_def_list.emplace_back(ar_task_def); | task_def_list.emplace_back(ar_task_def); | ||||
| @@ -51,6 +51,7 @@ struct FusionTaskInfo { | |||||
| std::map<uint32_t, string> &op_name_map; | std::map<uint32_t, string> &op_name_map; | ||||
| ProfilingPoint &profiling_point; | ProfilingPoint &profiling_point; | ||||
| vector<uint32_t> all_reduce_nodes; | vector<uint32_t> all_reduce_nodes; | ||||
| uint64_t all_reduce_node_idx; | |||||
| }; | }; | ||||
| class TaskGenerator { | class TaskGenerator { | ||||
| @@ -76,6 +77,8 @@ class TaskGenerator { | |||||
| /// | /// | ||||
| Status GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t session_id, RunContext &run_context); | Status GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t session_id, RunContext &run_context); | ||||
| Status FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | |||||
| std::vector<uint32_t> &all_reduce_nodes); | |||||
| private: | private: | ||||
| Status UpdateAnchorStatus(const NodePtr &node); | Status UpdateAnchorStatus(const NodePtr &node); | ||||
| @@ -126,10 +129,10 @@ class TaskGenerator { | |||||
| std::vector<uint32_t> &all_reduce_nodes) const; | std::vector<uint32_t> &all_reduce_nodes) const; | ||||
| Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | ||||
| std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | ||||
| std::vector<domi::TaskDef> &task_def_list); | |||||
| std::vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx); | |||||
| Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | ||||
| std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | ||||
| std::vector<domi::TaskDef> &task_def_list); | |||||
| std::vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx); | |||||
| static bool IsProfPoint(const OpDescPtr &op, const std::string &name); | static bool IsProfPoint(const OpDescPtr &op, const std::string &name); | ||||
| @@ -560,34 +560,10 @@ Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector<string> &dynam | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphExecutor::GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &out_formats) { | |||||
| try { | |||||
| auto model_manager = ge::ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| Status ret = | |||||
| model_manager->GetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetInputOutputDescInfoForZeroCopy failed."); | |||||
| return ret; | |||||
| } | |||||
| } catch (std::bad_alloc &) { | |||||
| GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfoForZeroCopy failed, bad memory allocation occur !"); | |||||
| return MEMALLOC_FAILED; | |||||
| } catch (...) { | |||||
| GELOGE(FAILED, "GetInputOutputDescInfoForZeroCopy failed, some exceptions occur !"); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | |||||
| Status GraphExecutor::GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | |||||
| auto model_manager = ge::ModelManager::GetInstance(); | auto model_manager = ge::ModelManager::GetInstance(); | ||||
| GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
| Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info); | |||||
| Status ret = model_manager->GetAippInfo(model_id, index, aipp_info); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGW("GetAIPPInfo is not success."); | GELOGW("GetAIPPInfo is not success."); | ||||
| return ret; | return ret; | ||||
| @@ -73,7 +73,7 @@ class GraphExecutor { | |||||
| vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats, | vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats, | ||||
| std::vector<uint32_t> &output_formats, bool new_model_desc = false); | std::vector<uint32_t> &output_formats, bool new_model_desc = false); | ||||
| static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | |||||
| static Status GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | |||||
| static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | ||||
| @@ -110,10 +110,6 @@ class GraphExecutor { | |||||
| static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | ||||
| static Status GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &output_formats); | |||||
| static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | ||||
| static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | ||||
| std::vector<InputOutputDims> &output_dims); | std::vector<InputOutputDims> &output_dims); | ||||
| @@ -830,6 +830,13 @@ Status DataDumper::UnloadDumpInfo() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DataDumper::DumpShrink() { | |||||
| compute_graph_.reset(); | |||||
| input_map_.clear(); | |||||
| ref_info_.clear(); | |||||
| op_list_.clear(); | |||||
| } | |||||
| void DataDumper::PrintCheckLog(string &dump_list_key) { | void DataDumper::PrintCheckLog(string &dump_list_key) { | ||||
| std::set<std::string> model_list = dump_properties_.GetAllDumpModel(); | std::set<std::string> model_list = dump_properties_.GetAllDumpModel(); | ||||
| if (model_list.empty()) { | if (model_list.empty()) { | ||||
| @@ -83,6 +83,8 @@ class DataDumper { | |||||
| Status UnloadDumpInfo(); | Status UnloadDumpInfo(); | ||||
| void DumpShrink(); | |||||
| void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } | void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } | ||||
| const DumpProperties &GetDumpProperties() const { return dump_properties_; } | const DumpProperties &GetDumpProperties() const { return dump_properties_; } | ||||
| bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; | bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; | ||||
| @@ -112,18 +114,18 @@ class DataDumper { | |||||
| struct InnerInputMapping; | struct InnerInputMapping; | ||||
| std::vector<OpDescInfo> op_desc_info_; | std::vector<OpDescInfo> op_desc_info_; | ||||
| std::vector<InnerDumpInfo> op_list_; | |||||
| std::vector<InnerDumpInfo> op_list_; // release after DavinciModel::Init | |||||
| uint32_t end_graph_task_id_ = 0; | uint32_t end_graph_task_id_ = 0; | ||||
| uint32_t end_graph_stream_id_ = 0; | uint32_t end_graph_stream_id_ = 0; | ||||
| bool is_end_graph_ = false; | bool is_end_graph_ = false; | ||||
| std::multimap<std::string, InnerInputMapping> input_map_; | |||||
| std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init | |||||
| bool load_flag_; | bool load_flag_; | ||||
| uint32_t device_id_; | uint32_t device_id_; | ||||
| uintptr_t global_step_; | uintptr_t global_step_; | ||||
| uintptr_t loop_per_iter_; | uintptr_t loop_per_iter_; | ||||
| uintptr_t loop_cond_; | uintptr_t loop_cond_; | ||||
| ComputeGraphPtr compute_graph_; | |||||
| std::map<OpDescPtr, void *> ref_info_; | |||||
| ComputeGraphPtr compute_graph_; // release after DavinciModel::Init | |||||
| std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | |||||
| void *l1_fusion_addr_ = nullptr; | void *l1_fusion_addr_ = nullptr; | ||||
| @@ -75,7 +75,6 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace { | namespace { | ||||
| const uint32_t kDataIndex = 0; | const uint32_t kDataIndex = 0; | ||||
| const uint32_t kOutputNum = 1; | |||||
| const uint32_t kTrueBranchStreamNum = 1; | const uint32_t kTrueBranchStreamNum = 1; | ||||
| const uint32_t kGetDynamicDimsCount = 1; | const uint32_t kGetDynamicDimsCount = 1; | ||||
| const uint32_t kThreadNum = 16; | const uint32_t kThreadNum = 16; | ||||
| @@ -87,6 +86,7 @@ const uint32_t kDumpL1FusionOpMByteSize = 2097152; // 2 * 1024 * 1024 | |||||
| const uint32_t kDumpFlagOfL1Fusion = 0; | const uint32_t kDumpFlagOfL1Fusion = 0; | ||||
| const char *const kDefaultBatchLable = "Batch_default"; | const char *const kDefaultBatchLable = "Batch_default"; | ||||
| const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node"; | const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node"; | ||||
| const char *const kMultiBatchNodePostfix = "_ascend_mbatch_batch_"; | |||||
| const int32_t kInvalidStream = -1; | const int32_t kInvalidStream = -1; | ||||
| const uint32_t kEndOfSequence = 0x0704000a; | const uint32_t kEndOfSequence = 0x0704000a; | ||||
| const uint32_t kEndOfSequenceNew = 507005; | const uint32_t kEndOfSequenceNew = 507005; | ||||
| @@ -150,19 +150,11 @@ DavinciModel::~DavinciModel() { | |||||
| GELOGW("UnloadDumpInfo failed, ret: %u.", ret); | GELOGW("UnloadDumpInfo failed, ret: %u.", ret); | ||||
| } | } | ||||
| for (const auto &op_and_addr : saved_task_addrs_) { | |||||
| auto addr = op_and_addr.second; | |||||
| if (addr != nullptr) { | |||||
| GE_CHK_RT(rtFree(addr)); | |||||
| } | |||||
| addr = nullptr; | |||||
| } | |||||
| saved_task_addrs_.clear(); | |||||
| ClearTaskAddrs(); | |||||
| GE_CHK_STATUS(ModelRunStop()); | GE_CHK_STATUS(ModelRunStop()); | ||||
| op_list_.clear(); | op_list_.clear(); | ||||
| data_op_list_.clear(); | |||||
| tensor_name_to_fixed_addr_size_.clear(); | tensor_name_to_fixed_addr_size_.clear(); | ||||
| tensor_name_to_peer_output_index_.clear(); | tensor_name_to_peer_output_index_.clear(); | ||||
| GE_DELETE_NEW_SINGLE(data_inputer_); | GE_DELETE_NEW_SINGLE(data_inputer_); | ||||
| @@ -221,6 +213,17 @@ DavinciModel::~DavinciModel() { | |||||
| } | } | ||||
| } | } | ||||
| void DavinciModel::ClearTaskAddrs() { | |||||
| for (const auto &op_and_addr : saved_task_addrs_) { | |||||
| auto addr = op_and_addr.second; | |||||
| if (addr != nullptr) { | |||||
| GE_CHK_RT(rtFree(addr)); | |||||
| } | |||||
| addr = nullptr; | |||||
| } | |||||
| saved_task_addrs_.clear(); | |||||
| } | |||||
| void DavinciModel::UnbindHcomStream() { | void DavinciModel::UnbindHcomStream() { | ||||
| if (!all_hccl_stream_list_.empty()) { | if (!all_hccl_stream_list_.empty()) { | ||||
| for (size_t i = 0; i < all_hccl_stream_list_.size(); i++) { | for (size_t i = 0; i < all_hccl_stream_list_.size(); i++) { | ||||
| @@ -263,7 +266,10 @@ Status DavinciModel::Assign(const GeModelPtr &ge_model) { | |||||
| /// | /// | ||||
| void DavinciModel::Shrink() { | void DavinciModel::Shrink() { | ||||
| skt_info_ = {0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, -1, 0, nullptr}; | skt_info_ = {0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, -1, 0, nullptr}; | ||||
| DumperShrink(); | |||||
| ge_model_.reset(); // delete object. | ge_model_.reset(); // delete object. | ||||
| op_list_.clear(); | |||||
| ClearTaskAddrs(); | |||||
| } | } | ||||
| Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) { | Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) { | ||||
| @@ -738,7 +744,6 @@ Status DavinciModel::ReportProfilingData() { | |||||
| } | } | ||||
| ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); | ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); | ||||
| GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | ||||
| op_list_.clear(); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -861,13 +866,17 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); | GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| if (InitRealSizeAndShapeInfo(compute_graph, node) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Init real size and shape failed, Name: %s", op_desc->GetName().c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto it = op_desc_handle.find(op_desc->GetType()); | auto it = op_desc_handle.find(op_desc->GetType()); | ||||
| if (it != op_desc_handle.end()) { | if (it != op_desc_handle.end()) { | ||||
| if ((this->*it->second)(op_desc) != SUCCESS) { | if ((this->*it->second)(op_desc) != SUCCESS) { | ||||
| GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); | |||||
| GELOGE(PARAM_INVALID, "Node init failed, Name: %s", op_desc->GetName().c_str()); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| continue; | continue; | ||||
| @@ -920,7 +929,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); | GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); | ||||
| GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); | GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); | ||||
| return OptInputOutputInfo(data_by_index, output_op_list); | |||||
| return GenInputOutputInfo(data_by_index, output_op_list); | |||||
| } | } | ||||
| void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | ||||
| @@ -963,7 +972,6 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod | |||||
| } | } | ||||
| data_by_index[data_index] = op_desc; | data_by_index[data_index] = op_desc; | ||||
| data_op_list_.push_back(op_desc); | |||||
| if (known_node_) { | if (known_node_) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1009,21 +1017,18 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod | |||||
| /// @param [in] output_op_list: list of NetOutput op. | /// @param [in] output_op_list: list of NetOutput op. | ||||
| /// @return Status | /// @return Status | ||||
| /// | /// | ||||
| Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, | |||||
| Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, | |||||
| const vector<OpDescPtr> &output_op_list) { | const vector<OpDescPtr> &output_op_list) { | ||||
| GELOGD("Data node size: %zu, NetOutput node size: %zu", data_op_list_.size(), output_op_list.size()); | |||||
| if (data_by_index.size() != data_op_list_.size()) { | |||||
| GELOGE(INTERNAL_ERROR, "Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| data_op_list_.clear(); | |||||
| GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size()); | |||||
| for (auto &item : data_by_index) { | for (auto &item : data_by_index) { | ||||
| data_op_list_.emplace_back(item.second); | |||||
| auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | ||||
| GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); | GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); | ||||
| input_addrs_list_.emplace_back(output_addrs); | input_addrs_list_.emplace_back(output_addrs); | ||||
| GE_CHK_STATUS_RET(InitAippInfo(item.first, item.second), "Init AIPP Info failed"); | |||||
| GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed"); | |||||
| GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed"); | |||||
| GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed"); | |||||
| if (item.second->GetType() == AIPP_DATA_TYPE) { | if (item.second->GetType() == AIPP_DATA_TYPE) { | ||||
| GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); | GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); | ||||
| is_dynamic_aipp_ = true; | is_dynamic_aipp_ = true; | ||||
| @@ -1051,7 +1056,8 @@ Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_ | |||||
| } | } | ||||
| } | } | ||||
| return InitOutputDescInfo(output_op_list, output_descs_, output_formats_); | |||||
| GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed"); | |||||
| return InitOutputDescInfo(output_op_list); | |||||
| } | } | ||||
| bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | ||||
| @@ -1133,16 +1139,24 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & | |||||
| real_virtual_addrs_.insert(real_addr); | real_virtual_addrs_.insert(real_addr); | ||||
| } | } | ||||
| } | } | ||||
| return SUCCESS; | |||||
| } | |||||
| Status DavinciModel::InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node) { | |||||
| if (node->GetName().find(kMultiBatchNodePostfix) != string::npos) { | |||||
| GELOGD("No need to get size and shape of netoutput in subgraph."); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGD("Start init real size and shape info of %s.", node->GetName().c_str()); | |||||
| GetAllGearsInfo(node); | GetAllGearsInfo(node); | ||||
| if (is_getnext_sink_dynamic_) { | if (is_getnext_sink_dynamic_) { | ||||
| GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, | GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, | ||||
| GELOGE(PARAM_INVALID, "Failed to get info of getdynamicdims node."); return PARAM_INVALID;); | GELOGE(PARAM_INVALID, "Failed to get info of getdynamicdims node."); return PARAM_INVALID;); | ||||
| } | } | ||||
| if (is_online_infer_dynamic_) { | if (is_online_infer_dynamic_) { | ||||
| GE_IF_BOOL_EXEC(GetGearAndRealOutSizeInfo(input_count, node) != SUCCESS, | |||||
| GE_IF_BOOL_EXEC(GetGearAndRealOutSizeInfo(compute_graph, node) != SUCCESS, | |||||
| GELOGE(PARAM_INVALID, "Failed to get gear and real out size info."); return PARAM_INVALID;); | GELOGE(PARAM_INVALID, "Failed to get gear and real out size info."); return PARAM_INVALID;); | ||||
| GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, | |||||
| GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(compute_graph, node) != SUCCESS, | |||||
| GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); | GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); | ||||
| } | } | ||||
| @@ -1161,7 +1175,7 @@ void DavinciModel::GetAllGearsInfo(const NodePtr &node) { | |||||
| if (shape_str.empty()) { | if (shape_str.empty()) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| std::vector<int64_t> gear_info; | |||||
| std::vector<int32_t> gear_info; | |||||
| std::vector<std::string> dims = ge::StringUtils::Split(shape_str, ','); | std::vector<std::string> dims = ge::StringUtils::Split(shape_str, ','); | ||||
| for (const auto &dim : dims) { | for (const auto &dim : dims) { | ||||
| if (dim.empty()) { | if (dim.empty()) { | ||||
| @@ -1177,6 +1191,7 @@ void DavinciModel::GetAllGearsInfo(const NodePtr &node) { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) { | Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) { | ||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| size_t input_count = node->GetAllInDataAnchors().size(); | size_t input_count = node->GetAllInDataAnchors().size(); | ||||
| @@ -1214,11 +1229,11 @@ Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node) { | |||||
| GELOGD("Start get gear and real output size info of %s, input count is %zu.", node->GetName().c_str(), input_count); | |||||
| Status DavinciModel::GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node) { | |||||
| GELOGD("Start get gear and real output size info of %s.", node->GetName().c_str()); | |||||
| merge_nodes_gear_and_real_out_size_info_.clear(); | merge_nodes_gear_and_real_out_size_info_.clear(); | ||||
| for (size_t idx = 0; idx < input_count; ++idx) { | |||||
| auto in_anchor = node->GetAllInDataAnchors().at(idx); | |||||
| size_t idx = 0; | |||||
| for (const auto &in_anchor : node->GetAllInDataAnchors()) { | |||||
| auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); | auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); | ||||
| if (peer_out_anchor == nullptr) { | if (peer_out_anchor == nullptr) { | ||||
| continue; | continue; | ||||
| @@ -1226,89 +1241,106 @@ Status DavinciModel::GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr | |||||
| auto peer_node = peer_out_anchor->GetOwnerNode(); | auto peer_node = peer_out_anchor->GetOwnerNode(); | ||||
| auto op_desc = peer_node->GetOpDesc(); | auto op_desc = peer_node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| if ((peer_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { | |||||
| if (GetRealOutputSizeOfMerge(idx, peer_node) != SUCCESS) { | |||||
| if ((peer_node->GetType() == CASE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { | |||||
| if (GetRealOutputSizeOfCase(graph, idx, peer_node) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Get real output size of %s failed.", peer_node->GetName().c_str()); | GELOGE(PARAM_INVALID, "Get real output size of %s failed.", peer_node->GetName().c_str()); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| } | } | ||||
| idx++; | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node) { | |||||
| GELOGD("Start get output size of %s, which is %zu input to netoutput.", merge_node->GetName().c_str(), input_index); | |||||
| std::map<vector<int64_t>, int64_t> gear_and_real_out_size_info; | |||||
| for (auto &in_anchor : merge_node->GetAllInDataAnchors()) { | |||||
| auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); | |||||
| if (peer_out_anchor == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto in_node = peer_out_anchor->GetOwnerNode(); | |||||
| GELOGD("Input node of merge is %s.", in_node->GetName().c_str()); | |||||
| auto op_desc = in_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| string batch_label; | |||||
| if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { | |||||
| size_t batch_index = static_cast<size_t>(stoi(batch_label.substr(batch_label.rfind('_') + 1))); | |||||
| GELOGD("Batch index of %s is %zu.", op_desc->GetName().c_str(), batch_index); | |||||
| if (batch_index > all_gears_info_.size()) { | |||||
| GELOGE(PARAM_INVALID, "The value of ATTR_NAME_BATCH_LABEL is invalid."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| const vector<int64_t> output_size_list = ModelUtils::GetOutputSize(op_desc); | |||||
| int output_index = ge::AnchorUtils::GetIdx(peer_out_anchor); | |||||
| auto tensor_desc = op_desc->GetOutputDescPtr(output_index); | |||||
| GE_CHECK_NOTNULL(tensor_desc); | |||||
| int64_t data_size = 0; | |||||
| if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, data_size) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Get tensor size in bytes failed."); | |||||
| return FAILED; | |||||
| Status DavinciModel::GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, | |||||
| const NodePtr &case_node) { | |||||
| GELOGD("Start get output size of %s, which is %zu input to netoutput.", case_node->GetName().c_str(), input_index); | |||||
| const auto &func_desc = case_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(func_desc); | |||||
| std::map<vector<int32_t>, int64_t> gear_and_real_out_size_info; | |||||
| for (const auto &name : func_desc->GetSubgraphInstanceNames()) { | |||||
| const auto &subgraph = graph->GetSubgraph(name); | |||||
| if (subgraph == nullptr) { | |||||
| GELOGE(GE_GRAPH_EMPTY_SUBGRAPH, "Subgraph not found, name: %s.", name.c_str()); | |||||
| return GE_GRAPH_EMPTY_SUBGRAPH; | |||||
| } | |||||
| for (auto &node : subgraph->GetDirectNode()) { | |||||
| if (node->GetType() == NETOUTPUT) { | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| string batch_label; | |||||
| if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { | |||||
| size_t batch_index = static_cast<size_t>(stoi(batch_label.substr(batch_label.rfind('_') + 1))); | |||||
| GELOGD("Batch index of %s is %zu.", op_desc->GetName().c_str(), batch_index); | |||||
| if (batch_index > all_gears_info_.size()) { | |||||
| GELOGE(PARAM_INVALID, "The value of ATTR_NAME_BATCH_LABEL is invalid."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| const vector<int64_t> input_size_list = ModelUtils::GetInputSize(op_desc); | |||||
| auto tensor_desc = op_desc->GetInputDescPtr(input_index); | |||||
| GE_CHECK_NOTNULL(tensor_desc); | |||||
| int64_t data_size = 0; | |||||
| if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, data_size) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Get tensor size in bytes failed."); | |||||
| return FAILED; | |||||
| } | |||||
| gear_and_real_out_size_info[all_gears_info_[batch_index]] = data_size; | |||||
| GELOGD("Get real gear index is: %zu, gear info is %s, size is %ld, tensor size is %ld", | |||||
| batch_index, formats::JoinToString(all_gears_info_[batch_index]).c_str(), | |||||
| input_size_list[input_index], data_size); | |||||
| } | |||||
| break; | |||||
| } | } | ||||
| gear_and_real_out_size_info[all_gears_info_[batch_index]] = data_size; | |||||
| GELOGD("Get real gear index is: %zu, gear info is %s, size is %ld, tensor size is %ld", | |||||
| batch_index, formats::JoinToString(all_gears_info_[batch_index]).c_str(), | |||||
| output_size_list[output_index], data_size); | |||||
| } | } | ||||
| } | } | ||||
| merge_nodes_gear_and_real_out_size_info_[input_index] = gear_and_real_out_size_info; | merge_nodes_gear_and_real_out_size_info_[input_index] = gear_and_real_out_size_info; | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc) { | |||||
| GELOGD("Start to get dynamic output dims of %s.", op_desc->GetName().c_str()); | |||||
| Status DavinciModel::GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node) { | |||||
| GELOGD("Start to get dynamic output dims of %s.", node->GetName().c_str()); | |||||
| merge_nodes_gear_and_real_out_shape_info_.clear(); | merge_nodes_gear_and_real_out_shape_info_.clear(); | ||||
| std::vector<std::string> dynamic_output_shape_info; | |||||
| if (!AttrUtils::GetListStr(op_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { | |||||
| GELOGD("Can not get dynamic output dims attr"); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGI("Dynamic output shape info is %s", formats::JoinToString(dynamic_output_shape_info).c_str()); | |||||
| std::vector<vector<int64_t>> dynamic_output_shape; | |||||
| ParseDynamicOutShape(dynamic_output_shape_info, dynamic_output_shape); | |||||
| // idx: input_index to netoutput | |||||
| for (size_t idx = 0; idx < input_count; ++idx) { | |||||
| std::map<vector<int64_t>, vector<int64_t>> gear_and_real_out_shape_info; | |||||
| for (auto &it : dynamic_output_shape) { | |||||
| auto gear_index = static_cast<size_t>(it[0]); | |||||
| if (gear_index > all_gears_info_.size()) { | |||||
| GELOGE(PARAM_INVALID, "The value of cur index: %zu is invalid.", static_cast<size_t>(it[0])); | |||||
| return PARAM_INVALID; | |||||
| size_t idx = 0; | |||||
| for (const auto &in_anchor : node->GetAllInDataAnchors()) { | |||||
| auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); | |||||
| if (peer_out_anchor == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto peer_node = peer_out_anchor->GetOwnerNode(); | |||||
| auto op_desc = peer_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| if ((peer_node->GetType() == CASE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { | |||||
| std::vector<std::string> dynamic_output_shape_info; | |||||
| if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { | |||||
| GELOGD("Can not get dynamic output dims attr from %s.", node->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| GELOGI("Dynamic output shape info is %s", formats::JoinToString(dynamic_output_shape_info).c_str()); | |||||
| std::vector<vector<int64_t>> dynamic_output_shape; | |||||
| ParseDynamicOutShape(dynamic_output_shape_info, dynamic_output_shape); | |||||
| std::map<vector<int32_t>, vector<int64_t>> gear_and_real_out_shape_info; | |||||
| for (auto &it : dynamic_output_shape) { | |||||
| auto gear_index = static_cast<size_t>(it[0]); | |||||
| if (gear_index > all_gears_info_.size()) { | |||||
| GELOGE(PARAM_INVALID, "The value of cur index: %zu is invalid.", static_cast<size_t>(it[0])); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (static_cast<size_t>(it[1]) == idx) { | |||||
| vector<int64_t> output_shape; | |||||
| for (size_t i = 2; i < it.size(); ++i) { | |||||
| output_shape.emplace_back(it[i]); | |||||
| if (static_cast<size_t>(it[1]) == idx) { | |||||
| vector<int64_t> output_shape; | |||||
| for (size_t i = 2; i < it.size(); ++i) { | |||||
| output_shape.emplace_back(it[i]); | |||||
| } | |||||
| gear_and_real_out_shape_info[all_gears_info_[gear_index]] = output_shape; | |||||
| GELOGD("Get real gear index is: %zu, gear info is %s, output shape is %s.", | |||||
| gear_index, formats::JoinToString(all_gears_info_[gear_index]).c_str(), | |||||
| formats::JoinToString(output_shape).c_str()); | |||||
| } | } | ||||
| gear_and_real_out_shape_info[all_gears_info_[gear_index]] = output_shape; | |||||
| GELOGD("Get real gear index is: %zu, gear info is %s, output shape is %s.", | |||||
| gear_index, formats::JoinToString(all_gears_info_[gear_index]).c_str(), | |||||
| formats::JoinToString(output_shape).c_str()); | |||||
| } | } | ||||
| merge_nodes_gear_and_real_out_shape_info_[idx] = gear_and_real_out_shape_info; | |||||
| } | } | ||||
| merge_nodes_gear_and_real_out_shape_info_[idx] = gear_and_real_out_shape_info; | |||||
| idx++; | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1750,73 +1782,101 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_inp | |||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Get AIPP input info | /// @brief Get AIPP input info | ||||
| /// @param [in] index | /// @param [in] index | ||||
| /// @param [out] aipp_info | |||||
| /// @param [int] OpDescPtr | |||||
| /// @return execute result | /// @return execute result | ||||
| /// | /// | ||||
| Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { | |||||
| GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); | |||||
| OpDescPtr data_op = data_op_list_[index]; | |||||
| if (!data_op->HasAttr(ATTR_NAME_AIPP)) { | |||||
| GELOGW("GetAIPPInfo: there is not AIPP related with index %u.", index); | |||||
| return ACL_ERROR_GE_AIPP_NOT_EXIST; | |||||
| Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { | |||||
| if (!op_desc->HasAttr(ATTR_NAME_AIPP)) { | |||||
| GELOGW("there is not AIPP related with index %u.", index); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams()); | |||||
| GE_CHECK_NOTNULL(aipp_params); | |||||
| ge::GeAttrValue::NAMED_ATTRS aipp_attr; | |||||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | |||||
| domi::AippOpParams aipp_params; | |||||
| GeAttrValue::NAMED_ATTRS aipp_attr; | |||||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | |||||
| "Data node do not contain param aipp!"); | "Data node do not contain param aipp!"); | ||||
| GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | |||||
| GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u", | |||||
| data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank()); | |||||
| GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); | |||||
| GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u", | |||||
| op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank()); | |||||
| GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info), | |||||
| AippConfigInfo aipp_info; | |||||
| GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(&aipp_params, aipp_info), | |||||
| "convert aipp params to aipp config info failed"); | "convert aipp params to aipp config info failed"); | ||||
| aipp_info_list_[index] = aipp_info; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) { | |||||
| GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); | |||||
| // Set default value | |||||
| type = DATA_WITHOUT_AIPP; | |||||
| aipp_index = 0xFFFFFFFF; // default invalid value | |||||
| OpDescPtr data_op = data_op_list_[index]; | |||||
| GE_CHECK_NOTNULL(data_op); | |||||
| if (!data_op->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { | |||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get AIPP input info | |||||
| /// @param [in] index | |||||
| /// @param [out] aipp_info | |||||
| /// @return execute result | |||||
| /// | |||||
| Status DavinciModel::GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const { | |||||
| const auto it = aipp_info_list_.find(index); | |||||
| if (it == aipp_info_list_.end()) { | |||||
| GELOGW("there is not AIPP related with index %u.", index); | |||||
| return ACL_ERROR_GE_AIPP_NOT_EXIST; | |||||
| } | |||||
| aipp_info = it->second; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, const map<uint32_t, OpDescPtr> &data_list) { | |||||
| if (!op_desc->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { | |||||
| GELOGW("There is no aipp releated info with index %u.", index); | GELOGW("There is no aipp releated info with index %u.", index); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| std::string data_mode; | |||||
| (void)AttrUtils::GetStr(data_op, ATTR_DATA_RELATED_AIPP_MODE, data_mode); | |||||
| // Set default value | |||||
| InputAippType aipp_type = DATA_WITHOUT_AIPP; | |||||
| string data_mode; | |||||
| (void)AttrUtils::GetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, data_mode); | |||||
| if (data_mode == "static_aipp") { | if (data_mode == "static_aipp") { | ||||
| type = DATA_WITH_STATIC_AIPP; | |||||
| aipp_type = DATA_WITH_STATIC_AIPP; | |||||
| } else if (data_mode == "dynamic_aipp") { | } else if (data_mode == "dynamic_aipp") { | ||||
| type = DATA_WITH_DYNAMIC_AIPP; | |||||
| aipp_type = DATA_WITH_DYNAMIC_AIPP; | |||||
| } else if (data_mode == "dynamic_aipp_conf") { | } else if (data_mode == "dynamic_aipp_conf") { | ||||
| type = DYNAMIC_AIPP_NODE; | |||||
| aipp_type = DYNAMIC_AIPP_NODE; | |||||
| } else { | } else { | ||||
| GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, | GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, | ||||
| "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index); | "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index); | ||||
| return ACL_ERROR_GE_AIPP_MODE_INVALID; | return ACL_ERROR_GE_AIPP_MODE_INVALID; | ||||
| } | } | ||||
| if (type == DATA_WITH_DYNAMIC_AIPP) { | |||||
| size_t aipp_index = 0xFFFFFFFF; // default invalid value | |||||
| if (aipp_type == DATA_WITH_DYNAMIC_AIPP) { | |||||
| string releated_name; | string releated_name; | ||||
| (void)AttrUtils::GetStr(data_op, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); | |||||
| for (size_t i = 0; i < data_op_list_.size(); ++i) { | |||||
| GE_CHECK_NOTNULL(data_op_list_[i]); | |||||
| if (data_op_list_[i]->GetName() == releated_name) { | |||||
| GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), i, index); | |||||
| aipp_index = i; | |||||
| (void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); | |||||
| for (const auto item : data_list) { | |||||
| if (item.second->GetName() == releated_name) { | |||||
| GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), item.first, index); | |||||
| aipp_index = item.first; | |||||
| } | } | ||||
| } | } | ||||
| if (aipp_index == 0xFFFFFFFF) { | if (aipp_index == 0xFFFFFFFF) { | ||||
| GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "Can not find aipp data node from index %u", index); | |||||
| return ACL_ERROR_GE_AIPP_NOT_EXIST; | |||||
| GELOGW("Can not find aipp data node from index %u", index); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| } | } | ||||
| aipp_type_list_[index] = { aipp_type, aipp_index }; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DavinciModel::GetAippType(uint32_t index, InputAippType &aipp_type, size_t &aipp_index) const { | |||||
| const auto it = aipp_type_list_.find(index); | |||||
| if (it == aipp_type_list_.end()) { | |||||
| GELOGW("There is no aipp releated info with index %u.", index); | |||||
| return SUCCESS; | |||||
| } | |||||
| aipp_type = it->second.first; | |||||
| aipp_index = it->second.second; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1832,7 +1892,7 @@ void DavinciModel::SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_ | |||||
| dynamic_type_ = dynamic_type; | dynamic_type_ = dynamic_type; | ||||
| } | } | ||||
| void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | |||||
| void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type) const { | |||||
| if (batch_size_.empty()) { | if (batch_size_.empty()) { | ||||
| GELOGD("User does not set dynamic size"); | GELOGD("User does not set dynamic size"); | ||||
| } | } | ||||
| @@ -1844,38 +1904,10 @@ void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynami | |||||
| dynamic_type = dynamic_type_; | dynamic_type = dynamic_type_; | ||||
| } | } | ||||
| void DavinciModel::GetModelAttr(vector<string> &out_shape_info) { | |||||
| void DavinciModel::GetModelAttr(vector<string> &out_shape_info) const { | |||||
| out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end()); | out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end()); | ||||
| } | } | ||||
| Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &output_formats) { | |||||
| if (input_addrs_list_.empty() || input_addrs_list_[0].size() != kOutputNum) { | |||||
| GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); | |||||
| return FAILED; | |||||
| } | |||||
| GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); | |||||
| GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); | |||||
| GE_CHK_BOOL_RET_STATUS(output_desc.size() == output_memory_size_list_.size(), INTERNAL_ERROR, | |||||
| "output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc.size(), | |||||
| output_memory_size_list_.size()); | |||||
| /// For function zero copy,the momery should be aligned by 512 bytes. | |||||
| /// And, because of the cce op limit, size should be lager than the real shape size. The memory should be padded by 32 | |||||
| /// bytes. | |||||
| /// *size equals to ((tensorDesc->dataSize + 2 * 32 - 1) / 32) * 32; | |||||
| for (size_t i = 0; i < output_memory_size_list_.size(); i++) { | |||||
| output_desc[i].size = output_memory_size_list_[i]; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void DavinciModel::SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, | void DavinciModel::SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, | ||||
| InputOutputDescInfo &input) { | InputOutputDescInfo &input) { | ||||
| uint32_t n, c, h, w; | uint32_t n, c, h, w; | ||||
| @@ -1925,24 +1957,30 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, | |||||
| } | } | ||||
| } | } | ||||
| Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) { | |||||
| for (size_t index = 0; index < data_op_list_.size(); ++index) { | |||||
| InputOutputDescInfo input; | |||||
| GE_CHECK_NOTNULL(data_op_list_[index]); | |||||
| GE_CHECK_NOTNULL(data_op_list_[index]->GetInputDescPtr(0)); | |||||
| Status DavinciModel::InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index) { | |||||
| for (const auto &item : data_by_index) { | |||||
| const auto op_desc = item.second; | |||||
| GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); | |||||
| Format format = data_op_list_[index]->GetInputDescPtr(0)->GetFormat(); | |||||
| CreateInputDimsInfo(data_op_list_[index], format, input); | |||||
| InputOutputDescInfo input; | |||||
| Format format = op_desc->GetInputDescPtr(0)->GetFormat(); | |||||
| CreateInputDimsInfo(op_desc, format, input); | |||||
| input.data_type = data_op_list_[index]->GetInputDescPtr(0)->GetDataType(); | |||||
| input.name = data_op_list_[index]->GetName(); | |||||
| input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); | |||||
| input.name = op_desc->GetName(); | |||||
| int64_t input_size = 0; | int64_t input_size = 0; | ||||
| GE_CHK_STATUS_RET(TensorUtils::GetSize(*data_op_list_[index]->GetInputDescPtr(0), input_size), | |||||
| "get input size failed."); | |||||
| GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); | |||||
| input.size = input_size; | input.size = input_size; | ||||
| formats.push_back(format); | |||||
| input_desc.push_back(input); | |||||
| input_formats_.push_back(format); | |||||
| input_descs_.push_back(input); | |||||
| } | } | ||||
| return SUCCESS; | |||||
| } | |||||
| Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_descs, vector<uint32_t> &input_formats) { | |||||
| input_descs.insert(input_descs.end(), input_descs_.begin(), input_descs_.end()); | |||||
| input_formats.insert(input_formats.end(), input_formats_.begin(), input_formats_.end()); | |||||
| // cause GetInputDescInfo called not only once, set is_new_model_desc_ to false after calc the model input dims | // cause GetInputDescInfo called not only once, set is_new_model_desc_ to false after calc the model input dims | ||||
| is_new_model_desc_ = false; | is_new_model_desc_ = false; | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -1952,7 +1990,7 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO | |||||
| uint32_t &format_result) { | uint32_t &format_result) { | ||||
| /// netoutput input tensor desc | /// netoutput input tensor desc | ||||
| GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); | GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); | ||||
| return ); | |||||
| return); | |||||
| Format format = op_desc->GetInputDescPtr(index)->GetFormat(); | Format format = op_desc->GetInputDescPtr(index)->GetFormat(); | ||||
| GeShape shape = op_desc->GetInputDescPtr(index)->GetShape(); | GeShape shape = op_desc->GetInputDescPtr(index)->GetShape(); | ||||
| DataType data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | DataType data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | ||||
| @@ -2001,8 +2039,7 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO | |||||
| output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | ||||
| } | } | ||||
| Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list, | |||||
| vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) { | |||||
| Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list) { | |||||
| GELOGD("Output node size: %zu", output_op_list.size()); | GELOGD("Output node size: %zu", output_op_list.size()); | ||||
| for (const auto &op_desc : output_op_list) { | for (const auto &op_desc : output_op_list) { | ||||
| uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | ||||
| @@ -2027,28 +2064,20 @@ Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list, | |||||
| std::to_string(src_index[index]); | std::to_string(src_index[index]); | ||||
| } | } | ||||
| output.name = output_name; | output.name = output_name; | ||||
| output_descs.push_back(output); | |||||
| output_formats.push_back(format_result); | |||||
| output_descs_.push_back(output); | |||||
| output_formats_.push_back(format_result); | |||||
| } | } | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) { | |||||
| Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs, | |||||
| vector<uint32_t> &output_formats) const { | |||||
| output_descs.insert(output_descs.end(), output_descs_.begin(), output_descs_.end()); | output_descs.insert(output_descs.end(), output_descs_.begin(), output_descs_.end()); | ||||
| output_formats.insert(output_formats.end(), output_formats_.begin(), output_formats_.end()); | output_formats.insert(output_formats.end(), output_formats_.begin(), output_formats_.end()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| ge::Format DavinciModel::GetFormat() { | |||||
| if ((data_op_list_.empty()) || data_op_list_[0] == nullptr || data_op_list_[0]->GetInputDescPtr(0) == nullptr) { | |||||
| GELOGW("OP List Pointer is null or input_desc size is not 1!"); | |||||
| return FORMAT_NCHW; | |||||
| } | |||||
| return data_op_list_[0]->GetInputDescPtr(0)->GetFormat(); | |||||
| } | |||||
| Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { | Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { | ||||
| rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; | rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; | ||||
| const std::vector<DataBuffer> &blobs = input_data.blobs; | const std::vector<DataBuffer> &blobs = input_data.blobs; | ||||
| @@ -2099,6 +2128,12 @@ Status DavinciModel::SyncVarData() { | |||||
| RT_MEMCPY_HOST_TO_DEVICE)); | RT_MEMCPY_HOST_TO_DEVICE)); | ||||
| } | } | ||||
| for (auto op_desc : variable_op_list_) { | |||||
| ret = | |||||
| VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); | |||||
| GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | |||||
| op_desc->GetName().c_str()); | |||||
| } | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -2450,19 +2485,10 @@ Status DavinciModel::InitOutputTensorInfo(const OpDescPtr &op_desc) { | |||||
| GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, | GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, | ||||
| GELOGE(ret, "Get size from TensorDesc failed, op:%s, input id:%zu", op_desc->GetName().c_str(), i); | GELOGE(ret, "Get size from TensorDesc failed, op:%s, input id:%zu", op_desc->GetName().c_str(), i); | ||||
| return ret); | return ret); | ||||
| std::vector<int64_t> output_shape = input_desc->GetShape().GetDims(); | |||||
| if (is_online_infer_dynamic_) { | |||||
| if (merge_nodes_gear_and_real_out_size_info_.find(i) != merge_nodes_gear_and_real_out_size_info_.end()) { | |||||
| auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i]; | |||||
| size = gear_and_real_out_size_info[cur_dynamic_dims_]; | |||||
| auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i]; | |||||
| output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_]; | |||||
| is_dynamic_ = true; | |||||
| } | |||||
| } | |||||
| GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str()); | |||||
| output_buffer_size_.push_back(size); | |||||
| output_shape_info_.push_back(output_shape); | |||||
| const GeShape &shape = input_desc->GetShape(); | |||||
| GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(shape.GetDims()).c_str()); | |||||
| output_buffer_size_.emplace_back(size); | |||||
| output_shape_info_.emplace_back(shape); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -2475,18 +2501,38 @@ Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector<OutputT | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| vector<int64_t> output_buffer_size; | |||||
| vector<vector<int64_t>> output_shape_info; | |||||
| size_t output_num = output_buffer_size_.size(); | |||||
| for (size_t i = 0; i < output_num; ++i) { | |||||
| int64_t output_size = output_buffer_size_[i]; | |||||
| vector<int64_t> output_shape = output_shape_info_[i].GetDims(); | |||||
| if (is_online_infer_dynamic_) { | |||||
| if (merge_nodes_gear_and_real_out_size_info_.find(i) != merge_nodes_gear_and_real_out_size_info_.end()) { | |||||
| auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i]; | |||||
| output_size = gear_and_real_out_size_info[cur_dynamic_dims_]; | |||||
| auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i]; | |||||
| output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_]; | |||||
| is_dynamic_ = true; | |||||
| } | |||||
| } | |||||
| GELOGI("Output size is %ld, output shape is %s.", output_size, formats::JoinToString(output_shape).c_str()); | |||||
| output_buffer_size.push_back(output_size); | |||||
| output_shape_info.push_back(output_shape); | |||||
| } | |||||
| GELOGI("Output blobs size:%zu, model id:%u", output_buffer_size_.size(), model_id_); | GELOGI("Output blobs size:%zu, model id:%u", output_buffer_size_.size(), model_id_); | ||||
| for (size_t i = 0; i < output_buffer_size_.size(); ++i) { | |||||
| std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[output_buffer_size_[i]]); | |||||
| for (size_t i = 0; i < output_buffer_size.size(); ++i) { | |||||
| std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[output_buffer_size[i]]); | |||||
| if (data_buf == nullptr) { | if (data_buf == nullptr) { | ||||
| GELOGE(GE_GRAPH_MALLOC_FAILED, "Malloc buffer failed."); | GELOGE(GE_GRAPH_MALLOC_FAILED, "Malloc buffer failed."); | ||||
| return GE_GRAPH_MALLOC_FAILED; | return GE_GRAPH_MALLOC_FAILED; | ||||
| } | } | ||||
| output_data->blobs.push_back({data_buf.get(), static_cast<uint64_t>(output_buffer_size_[i]), false}); | |||||
| ge::OutputTensorInfo output; | |||||
| output.dims = output_shape_info_[i]; | |||||
| output_data->blobs.push_back({data_buf.get(), static_cast<uint64_t>(output_buffer_size[i]), false}); | |||||
| OutputTensorInfo output; | |||||
| output.dims = output_shape_info[i]; | |||||
| output.data = std::move(data_buf); | output.data = std::move(data_buf); | ||||
| output.length = output_buffer_size_[i]; | |||||
| output.length = output_buffer_size[i]; | |||||
| outputs.emplace_back(std::move(output)); | outputs.emplace_back(std::move(output)); | ||||
| GELOGD("Output index:%zu, output dims is %s, data length:%lu.", i, | GELOGD("Output index:%zu, output dims is %s, data length:%lu.", i, | ||||
| formats::JoinToString(output.dims).c_str(), output.length); | formats::JoinToString(output.dims).c_str(), output.length); | ||||
| @@ -2540,7 +2586,7 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b | |||||
| GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); | GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); | ||||
| cur_dynamic_dims_.clear(); | cur_dynamic_dims_.clear(); | ||||
| cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); | cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); | ||||
| auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), | |||||
| auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int32_t), | |||||
| netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); | netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); | ||||
| GE_CHK_RT_RET(ret); | GE_CHK_RT_RET(ret); | ||||
| } | } | ||||
| @@ -2571,6 +2617,12 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b | |||||
| /// | /// | ||||
| Status DavinciModel::ReturnNoOutput(uint32_t data_id) { | Status DavinciModel::ReturnNoOutput(uint32_t data_id) { | ||||
| GELOGI("ReturnNoOutput model id:%u", model_id_); | GELOGI("ReturnNoOutput model id:%u", model_id_); | ||||
| for (auto op_desc : variable_op_list_) { | |||||
| Status ret = VarManager::Instance(session_id_) | |||||
| ->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); | |||||
| GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | |||||
| op_desc->GetName().c_str()); | |||||
| } | |||||
| GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); | GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); | ||||
| std::vector<ge::OutputTensorInfo> outputs; | std::vector<ge::OutputTensorInfo> outputs; | ||||
| @@ -2635,11 +2687,11 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
| GE_IF_BOOL_EXEC(current_data.blobs.empty(), break); | GE_IF_BOOL_EXEC(current_data.blobs.empty(), break); | ||||
| auto shape_data_buffer_data = current_data.blobs.back().data; | auto shape_data_buffer_data = current_data.blobs.back().data; | ||||
| auto shape_data_buffer_length = current_data.blobs.back().length; | auto shape_data_buffer_length = current_data.blobs.back().length; | ||||
| model->cur_dynamic_dims_.assign(reinterpret_cast<int64_t *>(shape_data_buffer_data), | |||||
| reinterpret_cast<int64_t *>(shape_data_buffer_data) + | |||||
| shape_data_buffer_length / sizeof(int64_t)); | |||||
| model->cur_dynamic_dims_.assign(reinterpret_cast<int32_t *>(shape_data_buffer_data), | |||||
| reinterpret_cast<int32_t *>(shape_data_buffer_data) + | |||||
| shape_data_buffer_length / sizeof(int32_t)); | |||||
| GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); | GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); | ||||
| delete[] reinterpret_cast<int64_t *>(current_data.blobs.back().data); | |||||
| delete[] reinterpret_cast<int32_t *>(current_data.blobs.back().data); | |||||
| current_data.blobs.pop_back(); | current_data.blobs.pop_back(); | ||||
| } | } | ||||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); | ||||
| @@ -2859,8 +2911,8 @@ void DavinciModel::SetTotalIOAddrs(const vector<void *> &io_addrs) { | |||||
| } | } | ||||
| } | } | ||||
| Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) { | |||||
| if (fixed_mem_base_ != reinterpret_cast<uintptr_t>(mem_base_)) { | |||||
| Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args) { | |||||
| if (fixed_mem_base_ != reinterpret_cast<uintptr_t>(mem_base_) && update_args) { | |||||
| for (size_t i = 0; i < total_io_addrs.size(); ++i) { | for (size_t i = 0; i < total_io_addrs.size(); ++i) { | ||||
| total_io_addrs[i] = GetRunAddress(total_io_addrs[i]); | total_io_addrs[i] = GetRunAddress(total_io_addrs[i]); | ||||
| } | } | ||||
| @@ -2904,7 +2956,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||||
| } else { | } else { | ||||
| total_io_addrs_ = orig_total_io_addrs_; | total_io_addrs_ = orig_total_io_addrs_; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||||
| if (total_args_size_ == 0) { | if (total_args_size_ == 0) { | ||||
| GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); | GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); | ||||
| @@ -3049,6 +3101,8 @@ Status DavinciModel::DistributeTask() { | |||||
| task_desc_info.stream_id = task->GetStreamId(); | task_desc_info.stream_id = task->GetStreamId(); | ||||
| task_desc_info.shape_type = "static"; | task_desc_info.shape_type = "static"; | ||||
| task_desc_info.cur_iter_num = 0; | task_desc_info.cur_iter_num = 0; | ||||
| profiler_report_op_info_[task_desc_info.op_name] = | |||||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
| task_desc_info_.emplace_back(task_desc_info); | task_desc_info_.emplace_back(task_desc_info); | ||||
| if (flag) { | if (flag) { | ||||
| if (task->GetSktTaskID() != 0xFFFFFFFF) { | if (task->GetSktTaskID() != 0xFFFFFFFF) { | ||||
| @@ -3056,6 +3110,8 @@ Status DavinciModel::DistributeTask() { | |||||
| string op_name = "super_kernel_" + to_string(task_index); | string op_name = "super_kernel_" + to_string(task_index); | ||||
| task_desc_info.op_name = op_name; | task_desc_info.op_name = op_name; | ||||
| task_desc_info.task_id = task->GetSktTaskID(); | task_desc_info.task_id = task->GetSktTaskID(); | ||||
| profiler_report_op_info_[task_desc_info.op_name] = | |||||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
| task_desc_info_.emplace_back(task_desc_info); | task_desc_info_.emplace_back(task_desc_info); | ||||
| } | } | ||||
| } | } | ||||
| @@ -3927,7 +3983,15 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des | |||||
| compute_graph_info.output_format = op_desc.output_format; | compute_graph_info.output_format = op_desc.output_format; | ||||
| compute_graph_info.output_shape = op_desc.output_shape; | compute_graph_info.output_shape = op_desc.output_shape; | ||||
| compute_graph_info.output_data_type = op_desc.output_data_type; | compute_graph_info.output_data_type = op_desc.output_data_type; | ||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| auto iter = profiler_report_op_info_.find(op_desc.op_name); | |||||
| if (iter != profiler_report_op_info_.end()) { | |||||
| task_id = iter->second.first; | |||||
| stream_id = iter->second.second; | |||||
| } | |||||
| compute_graph_info.task_id = task_id; | |||||
| compute_graph_info.stream_id = stream_id; | |||||
| graph_desc_info.emplace_back(compute_graph_info); | graph_desc_info.emplace_back(compute_graph_info); | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -3940,25 +4004,45 @@ void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_s | |||||
| } | } | ||||
| } | } | ||||
| Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) { | |||||
| GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); | |||||
| OpDescPtr data_op = data_op_list_[index]; | |||||
| if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { | |||||
| GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "GetOrigInputInfo: there is not AIPP related with index %u.", index); | |||||
| return ACL_ERROR_GE_AIPP_NOT_EXIST; | |||||
| Status DavinciModel::InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc) { | |||||
| if (!op_desc->HasAttr(ATTR_NAME_AIPP_INPUTS) || !op_desc->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { | |||||
| GELOGI("there is not AIPP related with index %u, node: %s.", index, op_desc->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| vector<std::string> inputs; | |||||
| if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { | |||||
| vector<string> inputs; | |||||
| if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { | |||||
| std::string input = inputs[kAippOriginInputIndex]; | std::string input = inputs[kAippOriginInputIndex]; | ||||
| GELOGI("GetOrigInputInfo: origin input str: %s", input.c_str()); | |||||
| GELOGI("origin input str: %s", input.c_str()); | |||||
| std::vector<std::string> infos = ge::StringUtils::Split(input, ':'); | std::vector<std::string> infos = ge::StringUtils::Split(input, ':'); | ||||
| if (infos.size() != kAippInfoNum) { | if (infos.size() != kAippInfoNum) { | ||||
| GELOGW("origin input str is invalid."); | |||||
| GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum); | |||||
| return ACL_ERROR_GE_AIPP_MODE_INVALID; | |||||
| } | } | ||||
| orig_input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]); | |||||
| orig_input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]); | |||||
| orig_input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal); | |||||
| OriginInputInfo input_info; | |||||
| input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]); | |||||
| input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]); | |||||
| input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal); | |||||
| orig_input_info_[index] = input_info; | |||||
| } else { | |||||
| OriginInputInfo input_info = { FORMAT_RESERVED, DT_UNDEFINED, 0 }; | |||||
| orig_input_info_[index] = input_info; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const { | |||||
| const auto it = orig_input_info_.find(index); | |||||
| if (it == orig_input_info_.end()) { | |||||
| GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index); | |||||
| return ACL_ERROR_GE_AIPP_NOT_EXIST; | |||||
| } | |||||
| const OriginInputInfo &input_info = it->second; | |||||
| if (input_info.format != FORMAT_RESERVED || input_info.data_type != DT_UNDEFINED) { | |||||
| orig_input_info = input_info; | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -3968,7 +4052,8 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_ | |||||
| GELOGI("ParseAIPPInfo: origin str: %s", in_out_info.c_str()); | GELOGI("ParseAIPPInfo: origin str: %s", in_out_info.c_str()); | ||||
| std::vector<std::string> infos = ge::StringUtils::Split(in_out_info, ':'); | std::vector<std::string> infos = ge::StringUtils::Split(in_out_info, ':'); | ||||
| if (infos.size() != kAippInfoNum) { | if (infos.size() != kAippInfoNum) { | ||||
| GELOGW("origin input str is invalid."); | |||||
| GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum); | |||||
| return; | |||||
| } | } | ||||
| dims_info.name = infos[kAippInfoTensorName]; | dims_info.name = infos[kAippInfoTensorName]; | ||||
| dims_info.size = std::strtol(infos[kAippInfoTensorSize].c_str(), nullptr, kDecimal); | dims_info.size = std::strtol(infos[kAippInfoTensorSize].c_str(), nullptr, kDecimal); | ||||
| @@ -3983,47 +4068,58 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_ | |||||
| } | } | ||||
| } | } | ||||
| Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims, | |||||
| std::vector<InputOutputDims> &output_dims) { | |||||
| GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); | |||||
| OpDescPtr data_op = data_op_list_[index]; | |||||
| if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { | |||||
| GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "GetAllAippInputOutputDims: there is not AIPP related with index %u.", index); | |||||
| return ACL_ERROR_GE_AIPP_NOT_EXIST; | |||||
| Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc) { | |||||
| if (!op_desc->HasAttr(ATTR_NAME_AIPP_INPUTS) || !op_desc->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { | |||||
| GELOGI("there is not AIPP related with index %u.", index); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| vector<std::string> inputs; | |||||
| if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { | |||||
| GELOGI("GetAllAippInputOutputDims: Data: %s has %zu related aippInfo.", data_op->GetName().c_str(), inputs.size()); | |||||
| vector<string> inputs; | |||||
| vector<InputOutputDims> input_dims; | |||||
| if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { | |||||
| GELOGI("Data: %s has %zu related aippInfo.", op_desc->GetName().c_str(), inputs.size()); | |||||
| for (auto it : inputs) { | for (auto it : inputs) { | ||||
| InputOutputDims input_info; | InputOutputDims input_info; | ||||
| ParseAIPPInfo(it, input_info); | ParseAIPPInfo(it, input_info); | ||||
| input_dims.emplace_back(input_info); | input_dims.emplace_back(input_info); | ||||
| GELOGD("GetAllAippInputOutputDims Aipp origin input dims info: %s", it.c_str()); | |||||
| GELOGD("Aipp origin input dims info: %s", it.c_str()); | |||||
| ConstGeTensorDescPtr data_input_desc = data_op->GetInputDescPtr(kDataIndex); | |||||
| ConstGeTensorDescPtr data_input_desc = op_desc->GetInputDescPtr(kDataIndex); | |||||
| int64_t data_input_size; | int64_t data_input_size; | ||||
| (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size); | |||||
| GELOGD( | |||||
| "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %zu, tensor_size: %zu, format: " | |||||
| "%s, data_type: %s, shape: %s .", | |||||
| index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||||
| TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | |||||
| formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); | |||||
| (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); | |||||
| GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s", | |||||
| index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||||
| TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | |||||
| formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); | |||||
| } | } | ||||
| } | } | ||||
| vector<std::string> outputs; | |||||
| if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) { | |||||
| vector<string> outputs; | |||||
| vector<InputOutputDims> output_dims; | |||||
| if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) { | |||||
| for (auto it : outputs) { | for (auto it : outputs) { | ||||
| InputOutputDims output_info; | InputOutputDims output_info; | ||||
| ParseAIPPInfo(it, output_info); | ParseAIPPInfo(it, output_info); | ||||
| output_dims.emplace_back(output_info); | output_dims.emplace_back(output_info); | ||||
| GELOGD("GetAllAippInputOutputDims Aipp output dims info: %s", it.c_str()); | |||||
| GELOGD("Aipp output dims info: %s", it.c_str()); | |||||
| } | } | ||||
| } | } | ||||
| aipp_dims_info_[index] = { input_dims, input_dims }; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims, | |||||
| vector<InputOutputDims> &output_dims) const { | |||||
| const auto it = aipp_dims_info_.find(index); | |||||
| if (it == aipp_dims_info_.end()) { | |||||
| GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index); | |||||
| return ACL_ERROR_GE_AIPP_NOT_EXIST; | |||||
| } | |||||
| input_dims = it->second.first; | |||||
| output_dims = it->second.second; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -286,13 +286,6 @@ class DavinciModel { | |||||
| // Modified from KernelTaskInfo. | // Modified from KernelTaskInfo. | ||||
| SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; } | SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; } | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief get model input and output format | |||||
| /// @return ccTensorFormat_t current model input and output format | |||||
| /// | |||||
| Format GetFormat(); | |||||
| rtModel_t GetRtModelHandle() const { return rt_model_handle_; } | rtModel_t GetRtModelHandle() const { return rt_model_handle_; } | ||||
| rtStream_t GetRtModelStream() const { return rt_model_stream_; } | rtStream_t GetRtModelStream() const { return rt_model_stream_; } | ||||
| @@ -326,7 +319,7 @@ class DavinciModel { | |||||
| Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc); | Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc); | ||||
| Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc, | Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc, | ||||
| vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats); | |||||
| vector<uint32_t> &input_formats, vector<uint32_t> &output_formats); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -347,9 +340,9 @@ class DavinciModel { | |||||
| void GetUserDesignateShapeOrder(vector<string> &user_input_shape_order) const; | void GetUserDesignateShapeOrder(vector<string> &user_input_shape_order) const; | ||||
| void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type); | |||||
| void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type) const; | |||||
| void GetModelAttr(vector<string> &dynamic_output_shape_info); | |||||
| void GetModelAttr(vector<string> &dynamic_output_shape_info) const; | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -358,9 +351,9 @@ class DavinciModel { | |||||
| /// @param [out] aipp_info | /// @param [out] aipp_info | ||||
| /// @return execute result | /// @return execute result | ||||
| /// | /// | ||||
| Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); | |||||
| Status GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const; | |||||
| Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index); | |||||
| Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) const; | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -378,17 +371,6 @@ class DavinciModel { | |||||
| /// | /// | ||||
| void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification); | void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification); | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief get model input and output desc for zero copy | |||||
| /// @param [out] input_shape model input size | |||||
| /// @param [out] output_shape model output size | |||||
| /// @return execute result | |||||
| /// | |||||
| Status GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc, | |||||
| vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats); | |||||
| Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data); | Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data); | ||||
| Status ReturnNoOutput(uint32_t data_id); | Status ReturnNoOutput(uint32_t data_id); | ||||
| @@ -481,6 +463,10 @@ class DavinciModel { | |||||
| data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | ||||
| } | } | ||||
| void DumperShrink() { | |||||
| data_dumper_.DumpShrink(); | |||||
| } | |||||
| void SetEndGraphId(uint32_t task_id, uint32_t stream_id); | void SetEndGraphId(uint32_t task_id, uint32_t stream_id); | ||||
| DavinciModel &operator=(const DavinciModel &model) = delete; | DavinciModel &operator=(const DavinciModel &model) = delete; | ||||
| @@ -531,12 +517,12 @@ class DavinciModel { | |||||
| Status MallocKnownArgs(); | Status MallocKnownArgs(); | ||||
| Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
| Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
| Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs); | |||||
| Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true); | |||||
| void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | ||||
| Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | |||||
| Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const; | |||||
| Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims, | Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims, | ||||
| vector<InputOutputDims> &output_dims); | |||||
| vector<InputOutputDims> &output_dims) const; | |||||
| void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } | void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } | ||||
| // om file name | // om file name | ||||
| void SetOmName(string om_name) { om_name_ = om_name; } | void SetOmName(string om_name) { om_name_ = om_name; } | ||||
| @@ -622,7 +608,7 @@ class DavinciModel { | |||||
| void SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, InputOutputDescInfo &input); | void SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, InputOutputDescInfo &input); | ||||
| Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<uint32_t> &input_formats); | Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<uint32_t> &input_formats); | ||||
| Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats); | |||||
| Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats) const; | |||||
| Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); | Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); | ||||
| @@ -644,6 +630,8 @@ class DavinciModel { | |||||
| void ReleaseTask(); | void ReleaseTask(); | ||||
| void ClearTaskAddrs(); | |||||
| void UnbindTaskSinkStream(); | void UnbindTaskSinkStream(); | ||||
| bool IsAicpuKernelConnectSpecifiedLayer(); | bool IsAicpuKernelConnectSpecifiedLayer(); | ||||
| @@ -682,7 +670,7 @@ class DavinciModel { | |||||
| /// @param [in] output_op_list: list of NetOutput op. | /// @param [in] output_op_list: list of NetOutput op. | ||||
| /// @return Status | /// @return Status | ||||
| /// | /// | ||||
| Status OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list); | |||||
| Status GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -850,19 +838,26 @@ class DavinciModel { | |||||
| Status InitOutputTensorInfo(const OpDescPtr &op_desc); | Status InitOutputTensorInfo(const OpDescPtr &op_desc); | ||||
| Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs); | Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs); | ||||
| Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list, | |||||
| vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &formats); | |||||
| Status InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index); | |||||
| Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list); | |||||
| Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc); | |||||
| Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc); | |||||
| Status InitAippType(uint32_t index, const OpDescPtr &op_desc, const map<uint32_t, OpDescPtr> &data_list); | |||||
| Status InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc); | |||||
| void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info); | void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info); | ||||
| void SetLabelForDynamic(const NodePtr &node); | void SetLabelForDynamic(const NodePtr &node); | ||||
| void ParseDynamicOutShape(const vector<string> &str_info, vector<vector<int64_t>> &vec_info); | void ParseDynamicOutShape(const vector<string> &str_info, vector<vector<int64_t>> &vec_info); | ||||
| bool IsGetNextSinkDynamic(const OpDescPtr &op_desc); | bool IsGetNextSinkDynamic(const OpDescPtr &op_desc); | ||||
| Status InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node); | |||||
| void GetAllGearsInfo(const NodePtr &node); | void GetAllGearsInfo(const NodePtr &node); | ||||
| Status GetGetDynamicDimsNodeInfo(const NodePtr &node); | Status GetGetDynamicDimsNodeInfo(const NodePtr &node); | ||||
| Status GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node); | |||||
| Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node); | |||||
| Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc); | |||||
| Status GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node); | |||||
| Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node); | |||||
| Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node); | |||||
| bool is_weight_mem_has_inited_; | bool is_weight_mem_has_inited_; | ||||
| bool is_feature_map_mem_has_inited_; | bool is_feature_map_mem_has_inited_; | ||||
| @@ -875,15 +870,12 @@ class DavinciModel { | |||||
| string om_name_; | string om_name_; | ||||
| uint32_t version_; | uint32_t version_; | ||||
| GeModelPtr ge_model_; | |||||
| GeModelPtr ge_model_; // release after DavinciModel::Init | |||||
| bool need_destroy_aicpu_kernel_{false}; | bool need_destroy_aicpu_kernel_{false}; | ||||
| vector<string> out_node_name_; | vector<string> out_node_name_; | ||||
| map<uint32_t, OpDescPtr> op_list_; | |||||
| // data op_desc | |||||
| vector<OpDescPtr> data_op_list_; | |||||
| map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init | |||||
| vector<OpDescPtr> variable_op_list_; | vector<OpDescPtr> variable_op_list_; | ||||
| @@ -970,12 +962,14 @@ class DavinciModel { | |||||
| // for profiling task and graph info | // for profiling task and graph info | ||||
| vector<TaskDescInfo> task_desc_info_; | vector<TaskDescInfo> task_desc_info_; | ||||
| std::map<std::string, std::pair<uint32_t, uint32_t>> profiler_report_op_info_; | |||||
| int64_t maxDumpOpNum_; | int64_t maxDumpOpNum_; | ||||
| // for data dump | // for data dump | ||||
| DataDumper data_dumper_; | DataDumper data_dumper_; | ||||
| uint64_t iterator_count_; | uint64_t iterator_count_; | ||||
| bool is_l1_fusion_enable_; | bool is_l1_fusion_enable_; | ||||
| map<OpDescPtr, void *> saved_task_addrs_; | |||||
| map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init | |||||
| void *l1_fusion_addr_ = nullptr; | void *l1_fusion_addr_ = nullptr; | ||||
| bool known_node_ = false; | bool known_node_ = false; | ||||
| @@ -1015,15 +1009,15 @@ class DavinciModel { | |||||
| bool is_new_model_desc_{false}; | bool is_new_model_desc_{false}; | ||||
| bool is_online_infer_dynamic_ = false; | bool is_online_infer_dynamic_ = false; | ||||
| bool is_getnext_sink_dynamic_ = false; | bool is_getnext_sink_dynamic_ = false; | ||||
| vector<int64_t> cur_dynamic_dims_; | |||||
| vector<int32_t> cur_dynamic_dims_; | |||||
| void *netoutput_last_input_addr_ = nullptr; | void *netoutput_last_input_addr_ = nullptr; | ||||
| int64_t netoutput_last_input_size_ = 0; | int64_t netoutput_last_input_size_ = 0; | ||||
| size_t shape_of_cur_dynamic_dims_ = 0; | size_t shape_of_cur_dynamic_dims_ = 0; | ||||
| // key: input_index: input is merge node; value: each gear info and each output size | // key: input_index: input is merge node; value: each gear info and each output size | ||||
| map<size_t, map<vector<int64_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_; | |||||
| map<size_t, map<vector<int32_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_; | |||||
| // key: input_index: input is merge node; value: each gear info and each output shape | // key: input_index: input is merge node; value: each gear info and each output shape | ||||
| map<size_t, map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_; | |||||
| vector<vector<int64_t>> all_gears_info_; | |||||
| map<size_t, map<vector<int32_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_; | |||||
| vector<vector<int32_t>> all_gears_info_; | |||||
| multimap<uint32_t, uint32_t> op_id_map_; | multimap<uint32_t, uint32_t> op_id_map_; | ||||
| vector<ProfileInfo> profile_list_; | vector<ProfileInfo> profile_list_; | ||||
| @@ -1038,8 +1032,15 @@ class DavinciModel { | |||||
| vector<vector<void *>> output_addrs_list_; | vector<vector<void *>> output_addrs_list_; | ||||
| vector<int64_t> output_buffer_size_; | vector<int64_t> output_buffer_size_; | ||||
| vector<vector<int64_t>> output_shape_info_; | |||||
| vector<GeShape> output_shape_info_; | |||||
| map<uint32_t, OriginInputInfo> orig_input_info_; | |||||
| map<uint32_t, AippConfigInfo> aipp_info_list_; | |||||
| map<uint32_t, pair<InputAippType, size_t>> aipp_type_list_; | |||||
| map<uint32_t, pair<vector<InputOutputDims>, vector<InputOutputDims>>> aipp_dims_info_; | |||||
| vector<InputOutputDescInfo> input_descs_; | |||||
| vector<uint32_t> input_formats_; | |||||
| vector<InputOutputDescInfo> output_descs_; | vector<InputOutputDescInfo> output_descs_; | ||||
| vector<uint32_t> output_formats_; | vector<uint32_t> output_formats_; | ||||
| }; | }; | ||||
| @@ -16,82 +16,7 @@ | |||||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | #include "graph/load/new_model_manager/davinci_model_parser.h" | ||||
| #include <fstream> | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include "securec.h" | |||||
| #include "common/debug/log.h" | |||||
| #include "graph/load/new_model_manager/davinci_model.h" | |||||
| namespace ge { | namespace ge { | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelInfoParser(const ModelData &model, ModelInfo &model_info) { | |||||
| GE_CHK_RT_RET(rtSetDevice(0)); | |||||
| try { | |||||
| uint32_t model_len = 0; | |||||
| uint8_t *model_data = nullptr; | |||||
| Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); return ret, "Parse model failed"); | |||||
| auto *file_header = reinterpret_cast<ModelFileHeader *>(model.model_data); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_header == nullptr, GE_CHK_RT(rtDeviceReset(0)); | |||||
| return PARAM_INVALID, "file_header is null."); | |||||
| model_info.version = file_header->version; | |||||
| model_info.is_encrypt = false; | |||||
| GE_IF_BOOL_EXEC(ENCRYPTED == file_header->is_encrypt, model_info.is_encrypt = true); | |||||
| std::shared_ptr<DavinciModel> davinci_model = | |||||
| std::shared_ptr<DavinciModel>(new (std::nothrow) DavinciModel(model.priority, nullptr)); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(davinci_model == nullptr, GE_CHK_RT(rtDeviceReset(0)); | |||||
| return PARAM_INVALID, "davinci_model is null."); | |||||
| GE_MAKE_GUARD(davinci_model, [&] { davinci_model = nullptr; }); | |||||
| ModelHelper model_helper; | |||||
| ret = model_helper.LoadModel(model); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((ret != SUCCESS), GE_CHK_RT(rtDeviceReset(0)); return FAILED, "load model failed"); | |||||
| ret = davinci_model->Assign(model_helper.GetGeModel()); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); | |||||
| return ret, "Parse davinci model data failed"); | |||||
| ret = davinci_model->Init(); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); | |||||
| return ret, "Davinci model init failed"); | |||||
| vector<InputOutputDescInfo> input_list; | |||||
| vector<InputOutputDescInfo> output_list; | |||||
| ret = davinci_model->GetInputOutputDescInfo(input_list, output_list); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); | |||||
| return ret, "Davinci model GetInputOutputDescInfo failed"); | |||||
| for (const auto &desc : input_list) { | |||||
| model_info.input_desc.push_back(desc.shape_info); | |||||
| } | |||||
| for (const auto &desc : output_list) { | |||||
| model_info.output_desc.push_back(desc.shape_info); | |||||
| } | |||||
| model_info.name = davinci_model->Name(); | |||||
| } catch (...) { | |||||
| DOMI_LOGE("OM model parser failed, some exceptions occur !"); | |||||
| GE_CHK_RT(rtDeviceReset(0)); | |||||
| return FAILED; | |||||
| } | |||||
| GE_CHK_RT(rtDeviceReset(0)); | |||||
| return SUCCESS; | |||||
| } | |||||
| DavinciModelParser::DavinciModelParser() {} | DavinciModelParser::DavinciModelParser() {} | ||||
| DavinciModelParser::~DavinciModelParser() {} | DavinciModelParser::~DavinciModelParser() {} | ||||
| @@ -460,8 +460,8 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d | |||||
| Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_input_dims, | Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_input_dims, | ||||
| const vector<pair<string, vector<int64_t>>> &user_input_dims, | const vector<pair<string, vector<int64_t>>> &user_input_dims, | ||||
| vector<int64_t> &cur_dynamic_dims) { | |||||
| GELOGD(" Start get cur dynamic dims."); | |||||
| vector<int32_t> &cur_dynamic_dims) { | |||||
| GELOGD("Start get cur dynamic dims."); | |||||
| if (user_real_input_dims.size() != user_input_dims.size()) { | if (user_real_input_dims.size() != user_input_dims.size()) { | ||||
| GELOGE(INTERNAL_ERROR, | GELOGE(INTERNAL_ERROR, | ||||
| "The input count of user: %zu should be equal to the data count of graph: %zu", | "The input count of user: %zu should be equal to the data count of graph: %zu", | ||||
| @@ -478,7 +478,7 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_ | |||||
| } | } | ||||
| for (size_t j = 0; j < user_input_dims.at(i).second.size(); ++j) { | for (size_t j = 0; j < user_input_dims.at(i).second.size(); ++j) { | ||||
| if (user_input_dims.at(i).second.at(j) < 0) { | if (user_input_dims.at(i).second.at(j) < 0) { | ||||
| cur_dynamic_dims.emplace_back(user_real_input_dims[i][j]); | |||||
| cur_dynamic_dims.emplace_back(static_cast<int32_t>(user_real_input_dims[i][j])); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -523,7 +523,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT | |||||
| input_data.blobs.push_back(data); | input_data.blobs.push_back(data); | ||||
| } | } | ||||
| if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) { | if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) { | ||||
| std::vector<int64_t> cur_dynamic_dims; | |||||
| std::vector<int32_t> cur_dynamic_dims; | |||||
| if (!GetLocalOmgContext().user_real_input_dims.empty()) { | if (!GetLocalOmgContext().user_real_input_dims.empty()) { | ||||
| if (GetCurDynamicDims(GetLocalOmgContext().user_real_input_dims, GetLocalOmgContext().user_input_dims, | if (GetCurDynamicDims(GetLocalOmgContext().user_real_input_dims, GetLocalOmgContext().user_input_dims, | ||||
| cur_dynamic_dims) != SUCCESS) { | cur_dynamic_dims) != SUCCESS) { | ||||
| @@ -531,9 +531,9 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| DataBuffer data; | DataBuffer data; | ||||
| data.data = new(std::nothrow) int64_t[cur_dynamic_dims.size()]; | |||||
| data.data = new(std::nothrow) int32_t[cur_dynamic_dims.size()]; | |||||
| GE_CHECK_NOTNULL(data.data); | GE_CHECK_NOTNULL(data.data); | ||||
| uint64_t length = static_cast<uint64_t>(cur_dynamic_dims.size() * sizeof(int64_t)); | |||||
| uint32_t length = static_cast<uint32_t>(cur_dynamic_dims.size() * sizeof(int32_t)); | |||||
| GE_CHK_BOOL_EXEC(memcpy_s(data.data, length, cur_dynamic_dims.data(), length) == EOK, return INTERNAL_ERROR, | GE_CHK_BOOL_EXEC(memcpy_s(data.data, length, cur_dynamic_dims.data(), length) == EOK, return INTERNAL_ERROR, | ||||
| "Failed to memcpy data."); | "Failed to memcpy data."); | ||||
| data.length = length; | data.length = length; | ||||
| @@ -995,16 +995,6 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &inputFormats, | |||||
| std::vector<uint32_t> &outputFormats) { | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||||
| "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); | |||||
| return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); | |||||
| } | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Get AIPP info | /// @brief Get AIPP info | ||||
| @@ -1013,11 +1003,11 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, | |||||
| /// @param [out] aipp_info | /// @param [out] aipp_info | ||||
| /// @return execute result | /// @return execute result | ||||
| /// | /// | ||||
| Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | |||||
| Status ModelManager::GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | ||||
| "GetAIPPInfo failed, invalid model_id is %u.", model_id); | "GetAIPPInfo failed, invalid model_id is %u.", model_id); | ||||
| return davinci_model->GetAIPPInfo(index, aipp_info); | |||||
| return davinci_model->GetAippInfo(index, aipp_info); | |||||
| } | } | ||||
| Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | ||||
| @@ -1563,6 +1553,12 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op | |||||
| size_t aicpu_op_nums = aicpu_optype_list.size(); | size_t aicpu_op_nums = aicpu_optype_list.size(); | ||||
| size_t tf_op_nums = aicpu_tf_optype_list.size(); | size_t tf_op_nums = aicpu_tf_optype_list.size(); | ||||
| size_t op_nums = aicpu_op_nums + tf_op_nums; | size_t op_nums = aicpu_op_nums + tf_op_nums; | ||||
| std::function<void()> callback = [&]() { | |||||
| for (auto mem : allocated_mem) { | |||||
| GE_CHK_RT(rtFree(mem)); | |||||
| } | |||||
| }; | |||||
| GE_MAKE_GUARD(release, callback); | |||||
| // malloc sysOpInfoList in SysOpCheckInfo | // malloc sysOpInfoList in SysOpCheckInfo | ||||
| status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); | status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); | ||||
| if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
| @@ -1642,34 +1638,33 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op | |||||
| return RT_ERROR_TO_GE_STATUS(status); | return RT_ERROR_TO_GE_STATUS(status); | ||||
| } | } | ||||
| allocated_mem.push_back(args); | allocated_mem.push_back(args); | ||||
| GE_CHK_RT( | |||||
| rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast<void *>(&op_check_info_req), sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)), | |||||
| sizeof(SysOpCheckResp), reinterpret_cast<void *>(&op_check_info_res), sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast<void *>(&op_check_info_req), sizeof(SysOpCheckInfo), | |||||
| RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| GE_CHK_RT(rtMemcpy( | |||||
| reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + | |||||
| op_check_info_req.offSetLen)), sizeof(SysOpCheckResp), reinterpret_cast<void *>(&op_check_info_res), | |||||
| sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| GE_CHK_RT(rtStreamCreate(&stream, 0)); | GE_CHK_RT(rtStreamCreate(&stream, 0)); | ||||
| GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream)); | GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream)); | ||||
| status = rtStreamSynchronize(stream); | status = rtStreamSynchronize(stream); | ||||
| if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); | ||||
| GE_CHK_RT(rtStreamDestroy(stream)); | |||||
| return RT_ERROR_TO_GE_STATUS(status); | return RT_ERROR_TO_GE_STATUS(status); | ||||
| } | } | ||||
| // Check the response | // Check the response | ||||
| SysOpCheckResp *d_op_check_info_res = reinterpret_cast<SysOpCheckResp *>(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen))); | |||||
| SysOpCheckResp *d_op_check_info_res = | |||||
| reinterpret_cast<SysOpCheckResp *>(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>( | |||||
| reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen))); | |||||
| (void)memset_s(&op_check_info_res, sizeof(SysOpCheckResp), 0, sizeof(SysOpCheckResp)); | (void)memset_s(&op_check_info_res, sizeof(SysOpCheckResp), 0, sizeof(SysOpCheckResp)); | ||||
| GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), | GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), | ||||
| RT_MEMCPY_DEVICE_TO_HOST)); | RT_MEMCPY_DEVICE_TO_HOST)); | ||||
| std::function<void()> callback = [&]() { | |||||
| for (auto mem : allocated_mem) { | |||||
| GE_CHK_RT(rtFree(mem)); | |||||
| } | |||||
| GE_CHK_RT(rtStreamDestroy(stream)); | |||||
| }; | |||||
| if (op_check_info_res.isWithoutJson) { | if (op_check_info_res.isWithoutJson) { | ||||
| GELOGI("No need to check aicpu in this scenoria."); | GELOGI("No need to check aicpu in this scenoria."); | ||||
| GE_MAKE_GUARD(release, callback); | |||||
| GE_CHK_RT(rtStreamDestroy(stream)); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| uint64_t res_op_nums = op_check_info_res.opListNum; | uint64_t res_op_nums = op_check_info_res.opListNum; | ||||
| @@ -1687,7 +1682,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op | |||||
| sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); | sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); | ||||
| if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { | if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { | ||||
| GELOGE(FAILED, "Number of retcode is not equal to number of op type."); | GELOGE(FAILED, "Number of retcode is not equal to number of op type."); | ||||
| GE_MAKE_GUARD(release, callback); | |||||
| GE_CHK_RT(rtStreamDestroy(stream)); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| std::string fail_reason; | std::string fail_reason; | ||||
| @@ -1710,11 +1705,11 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op | |||||
| } | } | ||||
| fail_reason += "not support."; | fail_reason += "not support."; | ||||
| GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); | GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); | ||||
| GE_MAKE_GUARD(release, callback); | |||||
| GE_CHK_RT(rtStreamDestroy(stream)); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GE_MAKE_GUARD(release, callback); | |||||
| GE_CHK_RT(rtStreamDestroy(stream)); | |||||
| GELOGI("Cpu kernel launch check optype task success."); | GELOGI("Cpu kernel launch check optype task success."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -126,14 +126,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| /// | /// | ||||
| /// @ingroup domi_ome | /// @ingroup domi_ome | ||||
| /// @brief Get cur_dynamic_dims for all input. | /// @brief Get cur_dynamic_dims for all input. | ||||
| /// @param [in] vector<vector<uint64_t>> &user_real_input_dims: dims info of all user_inputs. | |||||
| /// @param [in] vector<vector<int64_t>> &user_real_input_dims: dims info of all user_inputs. | |||||
| /// @param [in] vector<pair<string, vector<int64_t>>> &user_input_dims: key:name. value:dynamic dims from option. | /// @param [in] vector<pair<string, vector<int64_t>>> &user_input_dims: key:name. value:dynamic dims from option. | ||||
| /// @param [out] vector<uint64_t> &cur_dynamic_dims: real dims gather, where the index of -1. | |||||
| /// @param [out] vector<int32_t> &cur_dynamic_dims: real dims gather, where the index of -1. | |||||
| /// @return 0: SUCCESS / others: INTERNAL_ERROR | /// @return 0: SUCCESS / others: INTERNAL_ERROR | ||||
| /// | /// | ||||
| Status GetCurDynamicDims(const vector<vector<int64_t>> &user_real_input_dims, | Status GetCurDynamicDims(const vector<vector<int64_t>> &user_real_input_dims, | ||||
| const vector<pair<string, vector<int64_t>>> &user_input_dims, | const vector<pair<string, vector<int64_t>>> &user_input_dims, | ||||
| vector<int64_t> &cur_dynamic_dims); | |||||
| vector<int32_t> &cur_dynamic_dims); | |||||
| /// | /// | ||||
| /// @ingroup domi_ome | /// @ingroup domi_ome | ||||
| @@ -239,24 +239,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| /// @param [out] aipp_info | /// @param [out] aipp_info | ||||
| /// @return execute result | /// @return execute result | ||||
| /// | /// | ||||
| ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | |||||
| ge::Status GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | |||||
| ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | ||||
| /// | |||||
| /// @ingroup domi_ome | |||||
| /// @brief set model input and output size zero copy | |||||
| /// @param [in] model_id model id | |||||
| /// @param [out] input_shape input tensor | |||||
| /// @param [out] output_shape output tensor | |||||
| /// @return SUCCESS success | |||||
| /// @return PARAM_INVALID parameter invalid | |||||
| /// | |||||
| ge::Status GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||||
| std::vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &inputFormats, | |||||
| std::vector<uint32_t> &outputFormats); | |||||
| ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | ||||
| ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | ||||
| @@ -145,7 +145,9 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM | |||||
| } else { | } else { | ||||
| GELOGI("need to reuse follow stream and create new follow stream."); | GELOGI("need to reuse follow stream and create new follow stream."); | ||||
| size_t created_stream_num = follow_stream_usage.size(); | size_t created_stream_num = follow_stream_usage.size(); | ||||
| hccl_stream_list_ = follow_stream_usage; | |||||
| for (const auto &stream : follow_stream_usage) { | |||||
| hccl_stream_list_.emplace_back(stream); | |||||
| } | |||||
| ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model, main_stream_id); | ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model, main_stream_id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(RT_FAILED, "Create hccl stream failed."); | GELOGE(RT_FAILED, "Create hccl stream failed."); | ||||
| @@ -38,10 +38,8 @@ | |||||
| #include "graph/partition/stage_partition.h" | #include "graph/partition/stage_partition.h" | ||||
| #include "graph/passes/addn_pass.h" | #include "graph/passes/addn_pass.h" | ||||
| #include "graph/passes/bitcast_pass.h" | #include "graph/passes/bitcast_pass.h" | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/passes/assign_remove_pass.h" | #include "graph/passes/assign_remove_pass.h" | ||||
| #include "graph/passes/inplace_support_check_pass.h" | #include "graph/passes/inplace_support_check_pass.h" | ||||
| #endif | |||||
| #include "graph/passes/atomic_addr_clean_pass.h" | #include "graph/passes/atomic_addr_clean_pass.h" | ||||
| #include "graph/passes/attach_stream_label_pass.h" | #include "graph/passes/attach_stream_label_pass.h" | ||||
| #include "graph/passes/cast_remove_pass.h" | #include "graph/passes/cast_remove_pass.h" | ||||
| @@ -93,7 +91,6 @@ | |||||
| #include "graph/passes/unused_args_clean_pass.h" | #include "graph/passes/unused_args_clean_pass.h" | ||||
| #include "graph/passes/global_step_insert_pass.h" | #include "graph/passes/global_step_insert_pass.h" | ||||
| #include "graph/passes/memcpy_addr_async_pass.h" | #include "graph/passes/memcpy_addr_async_pass.h" | ||||
| #include "graph/passes/hccl_memcpy_pass.h" | |||||
| #include "graph/build/label_allocator.h" | #include "graph/build/label_allocator.h" | ||||
| #include "graph/utils/tensor_adapter.h" | #include "graph/utils/tensor_adapter.h" | ||||
| #include "inc/pass_manager.h" | #include "inc/pass_manager.h" | ||||
| @@ -102,6 +99,7 @@ | |||||
| #include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
| #include "graph/common/omg_util.h" | #include "graph/common/omg_util.h" | ||||
| #include "common/formats/utils/formats_trans_utils.h" | #include "common/formats/utils/formats_trans_utils.h" | ||||
| #include "register/custom_pass_helper.h" | |||||
| namespace { | namespace { | ||||
| const char *const kSummary = "Summary"; | const char *const kSummary = "Summary"; | ||||
| @@ -687,7 +685,7 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, | |||||
| CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); | CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); | ||||
| GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", stages.optimizer.OptimizeOriginalGraphForQuantize, compute_graph); | GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", stages.optimizer.OptimizeOriginalGraphForQuantize, compute_graph); | ||||
| GM_RUN_AND_DUMP_PERF("HandleSummaryOp", stages.optimizer.HandleSummaryOp, compute_graph); | GM_RUN_AND_DUMP_PERF("HandleSummaryOp", stages.optimizer.HandleSummaryOp, compute_graph); | ||||
| GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, | |||||
| GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node, inputs, compute_graph, | |||||
| session_id); | session_id); | ||||
| GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", stages.optimizer.OptimizeOriginalGraph, compute_graph); | GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", stages.optimizer.OptimizeOriginalGraph, compute_graph); | ||||
| @@ -732,6 +730,9 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, | |||||
| GeRootModelPtr &ge_root_model, uint64_t session_id) { | GeRootModelPtr &ge_root_model, uint64_t session_id) { | ||||
| GE_CHECK_NOTNULL(graph_node); | GE_CHECK_NOTNULL(graph_node); | ||||
| GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
| CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); | |||||
| GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph); | |||||
| GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); | GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); | ||||
| GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", | GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", | ||||
| GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, | GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, | ||||
| @@ -766,10 +767,24 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) { | |||||
| ConstGraphPtr const_graph = graph_node->GetGraph(); | |||||
| auto comp_graph = GraphUtils::GetComputeGraph(*const_graph); | |||||
| GE_DUMP(comp_graph, "RunCustomPassBegin"); | |||||
| GE_TIMESTAMP_START(RunCustomPass); | |||||
| GraphPtr graph = std::const_pointer_cast<Graph>(const_graph); | |||||
| GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail.", | |||||
| comp_graph->GetName().c_str()); | |||||
| GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass"); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, | Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, | ||||
| GeRootModelPtr &ge_root_model, uint64_t session_id) { | GeRootModelPtr &ge_root_model, uint64_t session_id) { | ||||
| GE_CHECK_NOTNULL(graph_node); | GE_CHECK_NOTNULL(graph_node); | ||||
| GE_CHECK_NOTNULL(graph_node->GetGraph()); | GE_CHECK_NOTNULL(graph_node->GetGraph()); | ||||
| GE_CHK_STATUS_RET_NOLOG(RunCustomPass(graph_node)); | |||||
| auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); | auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); | ||||
| GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
| compute_graph->SetSessionID(session_id); | compute_graph->SetSessionID(session_id); | ||||
| @@ -1173,7 +1188,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const | |||||
| auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); | auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); | ||||
| GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
| GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node->GetGraph(), inputs, | |||||
| GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node, inputs, | |||||
| compute_graph, session_id); | compute_graph, session_id); | ||||
| for (auto &node : compute_graph->GetAllNodes()) { | for (auto &node : compute_graph->GetAllNodes()) { | ||||
| @@ -2122,8 +2137,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||||
| new (std::nothrow) TransOpWithoutReshapeFusionPass)) | new (std::nothrow) TransOpWithoutReshapeFusionPass)) | ||||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", | GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", | ||||
| new (std::nothrow) TransOpBreadthFusionPass)) | new (std::nothrow) TransOpBreadthFusionPass)) | ||||
| GE_CHK_STATUS_RET( | |||||
| after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass)); | |||||
| GE_TIMESTAMP_START(after_merge_passes); | GE_TIMESTAMP_START(after_merge_passes); | ||||
| auto ret = after_merge_passes.Run(compute_graph); | auto ret = after_merge_passes.Run(compute_graph); | ||||
| @@ -2254,20 +2267,16 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
| ReshapeRemovePass reshape_remove_pass; | ReshapeRemovePass reshape_remove_pass; | ||||
| CondRemovePass condition_remove_pass; | CondRemovePass condition_remove_pass; | ||||
| BitcastPass bitcast_pass; | BitcastPass bitcast_pass; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| AssignRemovePass assign_remove_pass; | AssignRemovePass assign_remove_pass; | ||||
| InplaceSupportCheckPass inplace_support_check_pass; | InplaceSupportCheckPass inplace_support_check_pass; | ||||
| #endif | |||||
| names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | ||||
| names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | ||||
| names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); | names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); | ||||
| names_to_passes.emplace_back("BitcastPass", &bitcast_pass); | names_to_passes.emplace_back("BitcastPass", &bitcast_pass); | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if (GetContext().GetHostExecFlag()) { | if (GetContext().GetHostExecFlag()) { | ||||
| names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass); | names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass); | ||||
| names_to_passes.emplace_back("InplaceSupportCheckPass", &inplace_support_check_pass); | names_to_passes.emplace_back("InplaceSupportCheckPass", &inplace_support_check_pass); | ||||
| } | } | ||||
| #endif | |||||
| GE_TIMESTAMP_START(names_to_passes); | GE_TIMESTAMP_START(names_to_passes); | ||||
| ret = GEPass(compute_graph).Run(names_to_passes); | ret = GEPass(compute_graph).Run(names_to_passes); | ||||
| GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); | GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); | ||||
| @@ -2765,8 +2774,10 @@ Status GraphManager::ParseInputsDims(const std::vector<InputTensorInfo> &input_t | |||||
| if (!GetLocalOmgContext().dynamic_node_type.empty()) { | if (!GetLocalOmgContext().dynamic_node_type.empty()) { | ||||
| vector<NodePtr> data_nodes; | vector<NodePtr> data_nodes; | ||||
| vector<NodePtr> getnext_nosink_nodes; | vector<NodePtr> getnext_nosink_nodes; | ||||
| data_nodes = compute_graph_->TryGetExtAttr(kExtAttrDataNodes, data_nodes); | |||||
| getnext_nosink_nodes = compute_graph_->TryGetExtAttr(kExtAttrGetNextNoSink, getnext_nosink_nodes); | |||||
| data_nodes = GetLocalOmgContext().data_nodes; | |||||
| getnext_nosink_nodes = GetLocalOmgContext().getnext_nosink_nodes; | |||||
| GELOGD("Data nodes count is %zu, getnext nosink nodes count is %zu.", data_nodes.size(), | |||||
| getnext_nosink_nodes.size()); | |||||
| if (GetLocalOmgContext().dynamic_node_type == DATA) { | if (GetLocalOmgContext().dynamic_node_type == DATA) { | ||||
| if (getnext_nosink_nodes.empty()) { | if (getnext_nosink_nodes.empty()) { | ||||
| // just data or data+getnext_sink | // just data or data+getnext_sink | ||||
| @@ -226,6 +226,7 @@ class GraphManager { | |||||
| void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor); | void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor); | ||||
| Status ParseInputsDimsForGetNexNosinkAndData(const vector<NodePtr> &dynamic_nodes, | Status ParseInputsDimsForGetNexNosinkAndData(const vector<NodePtr> &dynamic_nodes, | ||||
| const std::vector<InputTensorInfo> &input_tensor); | const std::vector<InputTensorInfo> &input_tensor); | ||||
| Status RunCustomPass(const GraphNodePtr &graph_node); | |||||
| Status PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, GeRootModelPtr &ge_root_model, | Status PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, GeRootModelPtr &ge_root_model, | ||||
| uint64_t session_id = INVALID_SESSION_ID); | uint64_t session_id = INVALID_SESSION_ID); | ||||
| @@ -19,9 +19,7 @@ | |||||
| #include <string> | #include <string> | ||||
| #include "graph/manager/graph_caching_allocator.h" | #include "graph/manager/graph_caching_allocator.h" | ||||
| #include "graph/manager/rdma_pool_allocator.h" | #include "graph/manager/rdma_pool_allocator.h" | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/manager/host_mem_allocator.h" | #include "graph/manager/host_mem_allocator.h" | ||||
| #endif | |||||
| namespace ge { | namespace ge { | ||||
| void MemoryAllocator::Initialize(uint32_t device_id) { | void MemoryAllocator::Initialize(uint32_t device_id) { | ||||
| GELOGI("MemoryAllocator::Initialize"); | GELOGI("MemoryAllocator::Initialize"); | ||||
| @@ -192,12 +190,10 @@ Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) { | |||||
| GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); | GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); | ||||
| return ge::INTERNAL_ERROR; | return ge::INTERNAL_ERROR; | ||||
| } | } | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) { | if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) { | ||||
| GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed."); | GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed."); | ||||
| return ge::INTERNAL_ERROR; | return ge::INTERNAL_ERROR; | ||||
| } | } | ||||
| #endif | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -219,9 +215,7 @@ void MemManager::Finalize() noexcept { | |||||
| // caching and rdma allocator use memory allocator, so finalize them first | // caching and rdma allocator use memory allocator, so finalize them first | ||||
| FinalizeAllocatorMap(caching_allocator_map_); | FinalizeAllocatorMap(caching_allocator_map_); | ||||
| FinalizeAllocatorMap(rdma_allocator_map_); | FinalizeAllocatorMap(rdma_allocator_map_); | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| FinalizeAllocatorMap(host_allocator_map_); | FinalizeAllocatorMap(host_allocator_map_); | ||||
| #endif | |||||
| FinalizeAllocatorMap(memory_allocator_map_); | FinalizeAllocatorMap(memory_allocator_map_); | ||||
| } | } | ||||
| @@ -250,9 +244,7 @@ CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { | |||||
| RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { | RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { | ||||
| return Instance().GetAllocator(memory_type, rdma_allocator_map_); | return Instance().GetAllocator(memory_type, rdma_allocator_map_); | ||||
| } | } | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { | HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { | ||||
| return Instance().GetAllocator(memory_type, host_allocator_map_); | return Instance().GetAllocator(memory_type, host_allocator_map_); | ||||
| } | } | ||||
| #endif | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -139,9 +139,7 @@ class MemoryAllocator { | |||||
| using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | ||||
| class CachingAllocator; | class CachingAllocator; | ||||
| class RdmaPoolAllocator; | class RdmaPoolAllocator; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| class HostMemAllocator; | class HostMemAllocator; | ||||
| #endif | |||||
| class MemManager { | class MemManager { | ||||
| public: | public: | ||||
| MemManager(); | MemManager(); | ||||
| @@ -150,9 +148,7 @@ class MemManager { | |||||
| static MemoryAllocator *Instance(rtMemType_t memory_type); | static MemoryAllocator *Instance(rtMemType_t memory_type); | ||||
| CachingAllocator &CachingInstance(rtMemType_t memory_type); | CachingAllocator &CachingInstance(rtMemType_t memory_type); | ||||
| RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); | RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| HostMemAllocator &HostMemInstance(rtMemType_t memory_type); | HostMemAllocator &HostMemInstance(rtMemType_t memory_type); | ||||
| #endif | |||||
| MemManager(const MemManager &) = delete; | MemManager(const MemManager &) = delete; | ||||
| MemManager &operator=(const MemManager &) = delete; | MemManager &operator=(const MemManager &) = delete; | ||||
| /// | /// | ||||
| @@ -240,9 +236,7 @@ class MemManager { | |||||
| std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_; | std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_; | ||||
| std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_; | std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_; | ||||
| std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_; | std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_; | std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_; | ||||
| #endif | |||||
| std::recursive_mutex allocator_mutex_; | std::recursive_mutex allocator_mutex_; | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -27,7 +27,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| class HostMemAllocator { | class HostMemAllocator { | ||||
| public: | public: | ||||
| explicit HostMemAllocator(rtMemType_t) {} | |||||
| explicit HostMemAllocator(rtMemType_t) {} | |||||
| ~HostMemAllocator() = default; | ~HostMemAllocator() = default; | ||||
| HostMemAllocator(const HostMemAllocator &) = delete; | HostMemAllocator(const HostMemAllocator &) = delete; | ||||
| @@ -43,29 +43,20 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) { | |||||
| return GE_GRAPH_MEMORY_ALLOC_FAILED; | return GE_GRAPH_MEMORY_ALLOC_FAILED; | ||||
| } | } | ||||
| mem_info.fd = output_para.fd; | mem_info.fd = output_para.fd; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc([&output_para](std::unique_ptr<uint8_t[], deleter> &ptr) { | mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc([&output_para](std::unique_ptr<uint8_t[], deleter> &ptr) { | ||||
| ptr.reset(reinterpret_cast<uint8_t *>(output_para.ptr)); | ptr.reset(reinterpret_cast<uint8_t *>(output_para.ptr)); | ||||
| }, | }, | ||||
| [](uint8_t *ptr) { | [](uint8_t *ptr) { | ||||
| ptr = nullptr; | ptr = nullptr; | ||||
| }); | }); | ||||
| #else | |||||
| mem_info.host_address = reinterpret_cast<uint8_t *>(output_para.ptr); | |||||
| #endif | |||||
| mem_info.device_address = reinterpret_cast<uint8_t *>(output_para.devPtr); | mem_info.device_address = reinterpret_cast<uint8_t *>(output_para.devPtr); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { | Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { | ||||
| GELOGD("SharedMemAllocator::DeAllocate"); | GELOGD("SharedMemAllocator::DeAllocate"); | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, | rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, | ||||
| mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address}; | mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address}; | ||||
| #else | |||||
| rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, | |||||
| mem_info.host_address, mem_info.device_address}; | |||||
| #endif | |||||
| rtError_t rt_ret = rtFreeHostSharedMemory(&free_para); | rtError_t rt_ret = rtFreeHostSharedMemory(&free_para); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret); | GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret); | ||||
| @@ -42,11 +42,7 @@ struct SharedMemInfo { | |||||
| uint64_t mem_size = 0; | uint64_t mem_size = 0; | ||||
| int fd = 0; | int fd = 0; | ||||
| uint8_t *device_address = nullptr; | uint8_t *device_address = nullptr; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| std::shared_ptr<AlignedPtr> host_aligned_ptr = nullptr; | std::shared_ptr<AlignedPtr> host_aligned_ptr = nullptr; | ||||
| #else | |||||
| uint8_t *host_address = nullptr; | |||||
| #endif | |||||
| SharedMemInfo() = default; | SharedMemInfo() = default; | ||||
| SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {} | SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {} | ||||
| }; | }; | ||||
| @@ -127,6 +127,10 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std | |||||
| } | } | ||||
| Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { | Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { | ||||
| if (GetContext().GetHostExecFlag()) { | |||||
| // graph exec on host, no need OptimizeOriginalGraph | |||||
| return SUCCESS; | |||||
| } | |||||
| if (compute_graph == nullptr) { | if (compute_graph == nullptr) { | ||||
| GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeOriginalGraph]: compute_graph is nullptr."); | GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeOriginalGraph]: compute_graph is nullptr."); | ||||
| return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; | return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; | ||||
| @@ -162,7 +166,7 @@ Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { | |||||
| Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_graph) { | Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_graph) { | ||||
| GELOGD("OptimizeOriginalGraphJudgeInsert in"); | GELOGD("OptimizeOriginalGraphJudgeInsert in"); | ||||
| if (GetContext().GetHostExecFlag()) { | if (GetContext().GetHostExecFlag()) { | ||||
| // graph exec on host, no need OptimizeOriginalGraph | |||||
| // graph exec on host, no need OptimizeOriginalGraphJudgeInsert | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -336,4 +340,37 @@ Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) { | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphOptimize::OptimizeWholeGraph(ComputeGraphPtr &compute_graph) { | |||||
| if (compute_graph == nullptr) { | |||||
| GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeWholeGraph]: compute_graph is nullptr."); | |||||
| return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; | |||||
| } | |||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "OptimizeWholeGraph failed."); | |||||
| return GE_CLI_GE_NOT_INITIALIZED; | |||||
| } | |||||
| auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority(); | |||||
| GELOGI("optimize by opskernel in OptimizeWholeGraph. num of graph_optimizer is %zu.", graph_optimizer.size()); | |||||
| Status ret = SUCCESS; | |||||
| string exclude_core_type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine; | |||||
| GELOGD("[OptimizeWholeGraph]: engine type will exclude: %s", exclude_core_type.c_str()); | |||||
| if (!graph_optimizer.empty()) { | |||||
| for (auto &iter : graph_optimizer) { | |||||
| if (iter.first == exclude_core_type || iter.second == nullptr) { | |||||
| continue; | |||||
| } | |||||
| GELOGI("Begin to optimize whole graph by engine %s", iter.first.c_str()); | |||||
| ret = iter.second->OptimizeWholeGraph(*compute_graph); | |||||
| GE_DUMP(compute_graph, "OptimizeWholeGraph" + iter.first); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "[OptimizeWholeGraph]: graph optimize failed, ret:%u", ret); | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -52,6 +52,9 @@ class GraphOptimize { | |||||
| // for fe prepare optimize in quantize scene | // for fe prepare optimize in quantize scene | ||||
| Status OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_graph); | Status OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_graph); | ||||
| // for engine to optimize merged whole graph before ge Optimize2 | |||||
| Status OptimizeWholeGraph(ComputeGraphPtr &compute_graph); | |||||
| // for rts optimize before build to add attr and insert memcpy op | // for rts optimize before build to add attr and insert memcpy op | ||||
| Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph); | Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph); | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| namespace ge { | |||||
| namespace { | namespace { | ||||
| constexpr uint32_t kValidInputNodeOutputNum = 1; | constexpr uint32_t kValidInputNodeOutputNum = 1; | ||||
| constexpr int32_t kAssignRefInputIndex = 0; | constexpr int32_t kAssignRefInputIndex = 0; | ||||
| @@ -28,8 +29,6 @@ static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, | |||||
| ge::VARIABLE, ge::VARIABLEV2 }; | ge::VARIABLE, ge::VARIABLEV2 }; | ||||
| } | } | ||||
| namespace ge { | |||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| Status AssignRemovePass::Run(NodePtr &node) { | Status AssignRemovePass::Run(NodePtr &node) { | ||||
| GELOGD("AssignRemovePass running"); | GELOGD("AssignRemovePass running"); | ||||
| @@ -145,71 +144,7 @@ Status AssignRemovePass::TransformAttr(NodePtr &node) { | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| #else | |||||
| Status AssignRemovePass::Run(NodePtr &node) { | |||||
| GELOGD("AssignRemovePass running"); | |||||
| if (node->GetType() != ASSIGN) { | |||||
| GELOGD("No need run AssignRemovePass on [%s, %s].", node->GetName().c_str(), node->GetType().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| const auto &ref_in_anchor = node->GetInDataAnchor(kAssignRefInputIndex); | |||||
| const auto &value_in_anchor = node->GetInDataAnchor(kAssignValueInputIndex); | |||||
| if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) { | |||||
| GELOGE(FAILED, "In data anchor is null, node:%s", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor(); | |||||
| const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor(); | |||||
| if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) { | |||||
| GELOGE(FAILED, "Peer data anchor is null, node:%s", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| if (IsCondMatch(node, ref_peer_anchor, value_peer_anchor)) { | |||||
| /// | |||||
| /// variable not-const not-const | |||||
| /// \ / | | |||||
| /// \ / | | |||||
| /// Assign ----> variable | |||||
| /// | | | |||||
| /// | | | |||||
| /// node node | |||||
| /// | |||||
| GELOGI("Optimization for assign_node %s start", node->GetName().c_str()); | |||||
| if (IsolateAndDeleteNode(node, {kAssignRefInputIndex}) != SUCCESS) { | |||||
| GELOGE(FAILED, "Isolate and delete assign_node %s failed.", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| AddNodeDeleted(node); | |||||
| const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc(); | |||||
| const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc(); | |||||
| if ((ref_input == nullptr) || (value_input == nullptr)) { | |||||
| GELOGE(FAILED, "value input is null"); | |||||
| return FAILED; | |||||
| } | |||||
| if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME, | |||||
| ref_input->GetName())) { | |||||
| GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed."); | |||||
| return FAILED; | |||||
| } | |||||
| // variable has and only has one input | |||||
| if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| GELOGD("AssignRemovePass success"); | |||||
| return SUCCESS; | |||||
| } | |||||
| #endif | |||||
| /// | /// | ||||
| /// @brief Check if need optimize for assign_node | /// @brief Check if need optimize for assign_node | ||||
| /// @param [in] assign_node | /// @param [in] assign_node | ||||
| @@ -218,7 +153,7 @@ Status AssignRemovePass::Run(NodePtr &node) { | |||||
| /// @return Status | /// @return Status | ||||
| /// | /// | ||||
| bool AssignRemovePass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_peer_anchor, | bool AssignRemovePass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_peer_anchor, | ||||
| const OutDataAnchorPtr &value_peer_anchor) { | |||||
| const OutDataAnchorPtr &value_peer_anchor) { | |||||
| GELOGD("Check if assign_node %s match optimization condition, ref_input: %s, value_input: %s", | GELOGD("Check if assign_node %s match optimization condition, ref_input: %s, value_input: %s", | ||||
| node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(), | node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(), | ||||
| value_peer_anchor->GetOwnerNode()->GetName().c_str()); | value_peer_anchor->GetOwnerNode()->GetName().c_str()); | ||||
| @@ -25,7 +25,6 @@ class AssignRemovePass : public BaseNodePass { | |||||
| Status Run(NodePtr &node) override; | Status Run(NodePtr &node) override; | ||||
| private: | private: | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| /// | /// | ||||
| /// @brief Optimize for assign_node | /// @brief Optimize for assign_node | ||||
| /// @param [in] assign_node | /// @param [in] assign_node | ||||
| @@ -39,7 +38,7 @@ class AssignRemovePass : public BaseNodePass { | |||||
| /// @return Status | /// @return Status | ||||
| /// | /// | ||||
| Status TransformAttr(NodePtr &node); | Status TransformAttr(NodePtr &node); | ||||
| #endif | |||||
| /// | /// | ||||
| /// @brief Check if need optimize for assign_node | /// @brief Check if need optimize for assign_node | ||||
| /// @param [in] assign_node | /// @param [in] assign_node | ||||
| @@ -26,6 +26,10 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace { | namespace { | ||||
| std::set<std::string> un_compute_attrs = { | |||||
| {ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES}, | |||||
| }; | |||||
| std::string GetCseKey(const NodePtr &node) { | std::string GetCseKey(const NodePtr &node) { | ||||
| std::stringstream ss; | std::stringstream ss; | ||||
| ss << node->GetType() << "-data-inputs-"; | ss << node->GetType() << "-data-inputs-"; | ||||
| @@ -49,7 +53,7 @@ std::string GetCseKey(const NodePtr &node) { | |||||
| ss << name << "-"; | ss << name << "-"; | ||||
| } | } | ||||
| ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc()); | |||||
| ss << "attrs-" << AttrUtils::GetAttrsStrAfterRid(node->GetOpDesc(), un_compute_attrs); | |||||
| return ss.str(); | return ss.str(); | ||||
| } | } | ||||
| @@ -115,21 +115,15 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph, | |||||
| TypeUtils::DataTypeToSerialString(data_type).c_str()); | TypeUtils::DataTypeToSerialString(data_type).c_str()); | ||||
| continue; | continue; | ||||
| } | } | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if ((type_size != 0) && (weight->MutableData().GetAlignedPtr() == nullptr)) { | if ((type_size != 0) && (weight->MutableData().GetAlignedPtr() == nullptr)) { | ||||
| GELOGW("aligned_ptr is null while size is not 0"); | GELOGW("aligned_ptr is null while size is not 0"); | ||||
| continue; | continue; | ||||
| } | } | ||||
| #endif | |||||
| ++insert_const_nums; | ++insert_const_nums; | ||||
| SameConstKey map_key; | SameConstKey map_key; | ||||
| map_key.data_size = type_size; | map_key.data_size = type_size; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| map_key.aligned_ptr = weight->MutableData().GetAlignedPtr(); | map_key.aligned_ptr = weight->MutableData().GetAlignedPtr(); | ||||
| #else | |||||
| map_key.data = weight->GetData().GetData(); | |||||
| #endif | |||||
| map_key.data_type = data_type; | map_key.data_type = data_type; | ||||
| map_key.format = output_tensor->GetFormat(); | map_key.format = output_tensor->GetFormat(); | ||||
| map_key.shape = output_tensor->GetShape().GetDims(); | map_key.shape = output_tensor->GetShape().GetDims(); | ||||
| @@ -21,20 +21,14 @@ | |||||
| #include <set> | #include <set> | ||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | #include <vector> | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/aligned_ptr.h" | #include "graph/aligned_ptr.h" | ||||
| #endif | |||||
| #include "graph/types.h" | #include "graph/types.h" | ||||
| #include "inc/graph_pass.h" | #include "inc/graph_pass.h" | ||||
| namespace ge { | namespace ge { | ||||
| struct SameConstKey { | struct SameConstKey { | ||||
| int data_size; | int data_size; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| std::shared_ptr<AlignedPtr> aligned_ptr; | std::shared_ptr<AlignedPtr> aligned_ptr; | ||||
| #else | |||||
| const uint8_t *data; | |||||
| #endif | |||||
| DataType data_type; | DataType data_type; | ||||
| Format format; | Format format; | ||||
| std::vector<int64_t> shape; | std::vector<int64_t> shape; | ||||
| @@ -44,19 +38,12 @@ struct SameConstKey { | |||||
| if (data_size != key.data_size) { | if (data_size != key.data_size) { | ||||
| return data_size < key.data_size; | return data_size < key.data_size; | ||||
| } | } | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if (data_size != 0) { | if (data_size != 0) { | ||||
| int ret = memcmp(aligned_ptr->Get(), key.aligned_ptr->Get(), data_size); | int ret = memcmp(aligned_ptr->Get(), key.aligned_ptr->Get(), data_size); | ||||
| if (ret != 0) { | if (ret != 0) { | ||||
| return ret < 0; | return ret < 0; | ||||
| } | } | ||||
| } | } | ||||
| #else | |||||
| int ret = memcmp(data, key.data, data_size); | |||||
| if (ret != 0) { | |||||
| return ret < 0; | |||||
| } | |||||
| #endif | |||||
| if (data_type != key.data_type) { | if (data_type != key.data_type) { | ||||
| return data_type < key.data_type; | return data_type < key.data_type; | ||||
| } | } | ||||
| @@ -28,157 +28,50 @@ | |||||
| namespace { | namespace { | ||||
| const int32_t kAnchorSize = 1; | const int32_t kAnchorSize = 1; | ||||
| const int kAnchorNum = 0; | const int kAnchorNum = 0; | ||||
| const int32_t kAnchorAssignRefIndex = 0; | |||||
| const int32_t kAnchorAssignValueIndex = 1; | |||||
| const char *const kInputMutable = "_input_mutable"; | const char *const kInputMutable = "_input_mutable"; | ||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { | Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { | ||||
| Status ret = SUCCESS; | |||||
| GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID); | GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID); | ||||
| for (const auto &node : graph->GetDirectNode()) { | for (const auto &node : graph->GetDirectNode()) { | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| if (op_desc == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| ret = ContinuousInputProcess(graph, node); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy, node_name:%s.", node->GetName().c_str()); | |||||
| return ret; | |||||
| } | |||||
| ret = MutableInputProcess(graph, node); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "failed MutableInputProcess, node_name:%s.", node->GetName().c_str()); | |||||
| return ret; | |||||
| } | |||||
| ret = P2pmemInputProcess(graph, node); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str()); | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| // If node has _input_mutable attr, means input mem may be modified when op execute. | |||||
| // In order to avoid to affect another op execute with same input when data modified, | |||||
| // need to inset memcpy node between. | |||||
| // also works on situation that input is variable or const. | |||||
| Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | |||||
| bool node_input_mutable = false; | |||||
| if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { | |||||
| return SUCCESS; | |||||
| } | |||||
| if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) { | |||||
| GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| if (!node_input_mutable) { | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGI("input mutable hcom op is:%s.", op_desc->GetName().c_str()); | |||||
| for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { | |||||
| if (hccl_in_anchor == nullptr) { | |||||
| bool node_input_mutable = false; | |||||
| if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); | |||||
| GE_CHECK_NOTNULL(src_out_anchor); | |||||
| int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); | |||||
| if (src_out_anchor_size == kAnchorSize) { | |||||
| // Identity needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. | |||||
| if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { | |||||
| Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable), | |||||
| GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED); | |||||
| if (!node_input_mutable) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| // If broadcast input size is bigger than 1, and input from variable, | |||||
| // cause by broadcast input memory should be continuous, | |||||
| // another featuremap mem will be allocated for broadcast input. | |||||
| // In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step. | |||||
| // In order to avoid move action out of model, use memcpy node instead of move action code. | |||||
| Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| bool is_input_continuous = false; | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||||
| if (is_input_continuous && op_desc->GetInputsSize() > 1) { | |||||
| GELOGI("continuous input op is:%s.", op_desc->GetName().c_str()); | |||||
| // if input size bigger than one, insert memcpy between var data for support continous mem alloc | |||||
| GELOGI("hcom op is:%s.", op_desc->GetName().c_str()); | |||||
| for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { | for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { | ||||
| if (hccl_in_anchor == nullptr) { | if (hccl_in_anchor == nullptr) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); | auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); | ||||
| if (src_out_anchor == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { | |||||
| Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); | |||||
| return ret; | |||||
| GE_CHECK_NOTNULL(src_out_anchor); | |||||
| int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); | |||||
| if (src_out_anchor_size == kAnchorSize) { | |||||
| // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. | |||||
| NodePtr src_node = src_out_anchor->GetOwnerNode(); | |||||
| std::string src_type = src_node->GetType(); | |||||
| bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT); | |||||
| if (check_src_type) { | |||||
| Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); | |||||
| return ret; | |||||
| } | |||||
| } | } | ||||
| continue; | |||||
| } | } | ||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| // if input is var type, and node input need p2p mem, then memcpy should be insert between the two | |||||
| Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| vector<int64_t> input_memory_types; | |||||
| (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types); | |||||
| if (input_memory_types.empty()) { | |||||
| return SUCCESS; | |||||
| } | |||||
| for (uint32_t index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) { | |||||
| if (input_memory_types[index] != RT_MEMORY_P2P_DDR) { | |||||
| continue; | |||||
| } | |||||
| GELOGD("p2p input op is:%s.", op_desc->GetName().c_str()); | |||||
| auto hccl_in_anchor = node->GetInDataAnchor(index); | |||||
| if (hccl_in_anchor == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); | |||||
| if (src_out_anchor == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { | |||||
| Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); | Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); | GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); | ||||
| @@ -189,12 +82,8 @@ Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const No | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| bool HcclMemcpyPass::IsDataNode(const std::string& node_type) { | |||||
| return (node_type == CONSTANTOP) || (node_type == VARIABLE) || (node_type == DATA) || (node_type == CONSTANT); | |||||
| } | |||||
| /// | /// | ||||
| /// @brief Add Identity Node | |||||
| /// @brief Add MemcpyAsync Node | |||||
| /// @param [in] ge::ComputeGraphPtr graph | /// @param [in] ge::ComputeGraphPtr graph | ||||
| /// @param [in] ge::OutDataAnchorPtr in_node | /// @param [in] ge::OutDataAnchorPtr in_node | ||||
| /// @return ge::NodePtr | /// @return ge::NodePtr | ||||
| @@ -212,20 +101,20 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O | |||||
| node_name = CheckDuplicateName(node_name); | node_name = CheckDuplicateName(node_name); | ||||
| OpDescPtr op_desc = MakeShared<OpDesc>(node_name.c_str(), IDENTITY); | OpDescPtr op_desc = MakeShared<OpDesc>(node_name.c_str(), IDENTITY); | ||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| GELOGE(INTERNAL_ERROR, "Create Identity op: MakeShared op_desc fail."); | |||||
| GELOGE(INTERNAL_ERROR, "Create identity op: MakeShared op_desc fail."); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| GELOGI("Create Identity op:%s.", op_desc->GetName().c_str()); | |||||
| GELOGI("Create identity op:%s.", op_desc->GetName().c_str()); | |||||
| graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | ||||
| if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
| GELOGE(INTERNAL_ERROR, "Create Identity op: add input desc fail."); | |||||
| GELOGE(INTERNAL_ERROR, "Create identity op: add input desc fail."); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | ||||
| if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
| GELOGE(INTERNAL_ERROR, "Create Identity op: add output desc fail."); | |||||
| GELOGE(INTERNAL_ERROR, "Create identity op: add output desc fail."); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| // because history reason ,this pass can not do work after constant fold so mark it | // because history reason ,this pass can not do work after constant fold so mark it | ||||
| @@ -233,7 +122,7 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O | |||||
| NodePtr memcpy_node = graph->AddNode(op_desc); | NodePtr memcpy_node = graph->AddNode(op_desc); | ||||
| if (memcpy_node == nullptr) { | if (memcpy_node == nullptr) { | ||||
| GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); | |||||
| GELOGE(INTERNAL_ERROR, "Insert identity node fail."); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -266,38 +155,7 @@ std::string HcclMemcpyPass::CheckDuplicateName(const std::string &node_name) { | |||||
| /// | /// | ||||
| Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | ||||
| const InDataAnchorPtr &hccl_in_anchor) { | const InDataAnchorPtr &hccl_in_anchor) { | ||||
| GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); | |||||
| GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); | |||||
| Status ret = InsertIdentityBeforeHccl(graph, src_out_anchor, hccl_in_anchor); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "add identity failed, var_node:%s, hccl_node:%s.", | |||||
| src_out_anchor->GetOwnerNode()->GetName().c_str(), | |||||
| hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| return ret; | |||||
| } | |||||
| ret = InsertAssignAfterBroadcastIfNeed(graph, src_out_anchor, hccl_in_anchor); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "add assign failed, var_node:%s, hccl_node:%s.", | |||||
| src_out_anchor->GetOwnerNode()->GetName().c_str(), | |||||
| hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| /// | |||||
| /// @brief Insert Identity node Between Hccl node and variable | |||||
| /// @param [in] ComputeGraphPtr graph | |||||
| /// @param [in] OutDataAnchorPtr src_out_anchor | |||||
| /// @param [in] InDataAnchorPtr hccl_in_anchor | |||||
| /// @return status | |||||
| /// | |||||
| Status HcclMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), | |||||
| hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| GELOGI("The op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); | NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); | ||||
| GE_CHECK_NOTNULL(memcpy_node); | GE_CHECK_NOTNULL(memcpy_node); | ||||
| @@ -324,139 +182,6 @@ Status HcclMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, co | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| /// | |||||
| /// @brief Insert assign node after broadcast node and variable to refresh variable data | |||||
| /// @param [in] ComputeGraphPtr graph | |||||
| /// @param [in] OutDataAnchorPtr var_out_anchor | |||||
| /// @param [in] InDataAnchorPtr hccl_in_anchor | |||||
| /// @return status | |||||
| /// | |||||
| Status HcclMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, | |||||
| const OutDataAnchorPtr &var_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { | |||||
| GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| if (var_out_anchor->GetOwnerNode()->GetType() != VARIABLE) { | |||||
| GELOGD("%s not variable, no need to insert assign node", var_out_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGI("after op %s and op %s need insert assign op.", var_out_anchor->GetOwnerNode()->GetName().c_str(), | |||||
| hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| for (auto peer_in_anchor : var_out_anchor->GetPeerInDataAnchors()) { | |||||
| if (peer_in_anchor->GetOwnerNode()->GetType() == ASSIGN) { | |||||
| GELOGD("variable %s out assign node is exist.", var_out_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| } | |||||
| NodePtr assign_node = CreateAssignNode(graph, var_out_anchor); | |||||
| GE_CHECK_NOTNULL(assign_node); | |||||
| OutDataAnchorPtr hccl_out_anchor = hccl_in_anchor->GetOwnerNode()->GetOutDataAnchor(hccl_in_anchor->GetIdx()); | |||||
| GE_CHECK_NOTNULL(hccl_out_anchor); | |||||
| Status ret = hccl_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignValueIndex)); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", hccl_out_anchor->GetOwnerNode()->GetName().c_str(), | |||||
| assign_node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| ret = var_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignRefIndex)); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", var_out_anchor->GetOwnerNode()->GetName().c_str(), | |||||
| assign_node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| // add control edge between assign node and node after broadcast node | |||||
| OutControlAnchorPtr assign_out_control_anchor = assign_node->GetOutControlAnchor(); | |||||
| GE_CHECK_NOTNULL(assign_out_control_anchor); | |||||
| for (auto in_data_anchor : hccl_out_anchor->GetPeerInDataAnchors()) { | |||||
| if (in_data_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) { | |||||
| continue; | |||||
| } | |||||
| ret = assign_out_control_anchor->LinkTo(in_data_anchor->GetOwnerNode()->GetInControlAnchor()); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", assign_out_control_anchor->GetOwnerNode()->GetName().c_str(), | |||||
| in_data_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| for (auto in_control_anchor : hccl_out_anchor->GetOwnerNode()->GetOutControlAnchor()->GetPeerInControlAnchors()) { | |||||
| if (in_control_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) { | |||||
| continue; | |||||
| } | |||||
| ret = assign_out_control_anchor->LinkTo(in_control_anchor); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", assign_out_control_anchor->GetOwnerNode()->GetName().c_str(), | |||||
| in_control_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| /// | |||||
| /// @brief create assign Node, add to graph | |||||
| /// @param [in] ge::ComputeGraphPtr graph | |||||
| /// @param [in] ge::OutDataAnchorPtr variable node out anchor | |||||
| /// @return ge::NodePtr | |||||
| /// | |||||
| NodePtr HcclMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { | |||||
| GE_IF_BOOL_EXEC(graph == nullptr, return nullptr); | |||||
| NodePtr pre_node = out_data_anchor->GetOwnerNode(); | |||||
| OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | |||||
| if (pre_op_desc == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "OpDesc of pre node is invalid."); | |||||
| return nullptr; | |||||
| } | |||||
| std::string node_name = pre_node->GetName() + "_" + ASSIGN; | |||||
| node_name = CheckDuplicateName(node_name); | |||||
| OpDescPtr op_desc = MakeShared<OpDesc>(node_name.c_str(), ASSIGN); | |||||
| if (op_desc == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "Create Assign op: MakeShared op_desc fail."); | |||||
| return nullptr; | |||||
| } | |||||
| GELOGI("Create Assign op:%s.", op_desc->GetName().c_str()); | |||||
| graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail."); | |||||
| return nullptr; | |||||
| } | |||||
| ret = op_desc->AddInputDesc("value", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Create Assign op: add value input desc fail."); | |||||
| return nullptr; | |||||
| } | |||||
| ret = op_desc->AddOutputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Create Assign op: add output desc fail."); | |||||
| return nullptr; | |||||
| } | |||||
| NodePtr assign_node = graph->AddNode(op_desc); | |||||
| if (assign_node == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); | |||||
| return nullptr; | |||||
| } | |||||
| return assign_node; | |||||
| } | |||||
| /// | /// | ||||
| /// @brief Clear Status, used for subgraph pass | /// @brief Clear Status, used for subgraph pass | ||||
| /// @return SUCCESS | /// @return SUCCESS | ||||
| @@ -32,28 +32,11 @@ class HcclMemcpyPass : public GraphPass { | |||||
| private: | private: | ||||
| NodePtr CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); | NodePtr CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); | ||||
| NodePtr CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); | |||||
| std::string CheckDuplicateName(const std::string &node_name); | std::string CheckDuplicateName(const std::string &node_name); | ||||
| Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | ||||
| const InDataAnchorPtr &hccl_in_anchor); | const InDataAnchorPtr &hccl_in_anchor); | ||||
| Status InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor); | |||||
| Status InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, | |||||
| const OutDataAnchorPtr &src_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor); | |||||
| Status ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node); | |||||
| Status MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node); | |||||
| Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node); | |||||
| bool IsDataNode(const std::string& node_type); | |||||
| std::unordered_map<std::string, uint32_t> node_num_map_; | std::unordered_map<std::string, uint32_t> node_num_map_; | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| namespace ge { | |||||
| namespace { | namespace { | ||||
| constexpr uint32_t kInplaceSupportOutputIndex = 0; | constexpr uint32_t kInplaceSupportOutputIndex = 0; | ||||
| constexpr uint32_t kInplaceSupportOutputNum = 1; | constexpr uint32_t kInplaceSupportOutputNum = 1; | ||||
| @@ -26,8 +27,6 @@ static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge: | |||||
| ge::CONSTANT, ge::CONSTANTOP, | ge::CONSTANT, ge::CONSTANTOP, | ||||
| ge::VARIABLE, ge::VARIABLEV2 }; | ge::VARIABLE, ge::VARIABLEV2 }; | ||||
| } | } | ||||
| namespace ge { | |||||
| Status InplaceSupportCheckPass::Run(NodePtr &node) { | Status InplaceSupportCheckPass::Run(NodePtr &node) { | ||||
| GELOGD("InplaceSupportCheckPass running"); | GELOGD("InplaceSupportCheckPass running"); | ||||
| if (node->GetAllOutDataAnchorsSize() != kInplaceSupportOutputNum) { | if (node->GetAllOutDataAnchorsSize() != kInplaceSupportOutputNum) { | ||||
| @@ -25,31 +25,65 @@ | |||||
| #include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
| #include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
| #include "register/op_registry.h" | #include "register/op_registry.h" | ||||
| #include "graph/common/omg_util.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace { | namespace { | ||||
| constexpr uint8_t kDataInIndex = 0; | constexpr uint8_t kDataInIndex = 0; | ||||
| constexpr uint8_t kDataOutIndex = 0; | constexpr uint8_t kDataOutIndex = 0; | ||||
| constexpr uint8_t kCaseArgIndex = 1; | constexpr uint8_t kCaseArgIndex = 1; | ||||
| const int kDivisionConst = 2; | |||||
| const size_t kNumOfGetnextNode = 1; | |||||
| const std::string kMultiBatchCaseNode = "ascend_mbatch_shape_case"; | const std::string kMultiBatchCaseNode = "ascend_mbatch_shape_case"; | ||||
| const std::string kMultiBatchDataNode = "ascend_mbatch_shape_data"; | const std::string kMultiBatchDataNode = "ascend_mbatch_shape_data"; | ||||
| const std::string kMultiBatchGetDynamicDimsNode = "ascend_mbatch_get_dynamic_dims_node"; | |||||
| const std::string kMultiBatchConstNode = "ascend_mbatch_shape_const"; | const std::string kMultiBatchConstNode = "ascend_mbatch_shape_const"; | ||||
| const std::string kMultiBatchMapIndexNode = "ascend_mbatch_shape_mapindex"; | const std::string kMultiBatchMapIndexNode = "ascend_mbatch_shape_mapindex"; | ||||
| const std::string kMultiBatchNodePostfix = "_ascend_mbatch_batch_"; | const std::string kMultiBatchNodePostfix = "_ascend_mbatch_batch_"; | ||||
| const char *const kGetNextName = "IteratorV2"; | |||||
| } // namespace | } // namespace | ||||
| inline bool IsGetNextType(const NodePtr &node) { | |||||
| std::string original_type; | |||||
| GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, | |||||
| GELOGW("Get original type failed."); return false); | |||||
| return (original_type == kGetNextName); | |||||
| } | |||||
| Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { | Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { | ||||
| GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(FAILED, "Original graph is nullptr"); return FAILED); | |||||
| if (graph->GetParentGraph() != nullptr) { | if (graph->GetParentGraph() != nullptr) { | ||||
| GELOGD("Subgraph %s skip the MultiBatchClonePass", graph->GetName().c_str()); | GELOGD("Subgraph %s skip the MultiBatchClonePass", graph->GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| if (!GetLocalOmgContext().need_multi_batch) { | |||||
| GELOGI("No need to process_multi for no_train graph."); | |||||
| return SUCCESS; | |||||
| } | |||||
| std::vector<NodePtr> data_nodes; | |||||
| std::vector<NodePtr> getnext_nosink_nodes; | |||||
| std::vector<NodePtr> getnext_sink_nodes; | |||||
| if (multibatch::CheckSequenceOfOptions(graph, data_nodes, getnext_nosink_nodes, getnext_sink_nodes) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "[Train_Dynamic] CheckSequenceOfOptions failed."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (multibatch::UpdateNameOfInputShape(graph, data_nodes, getnext_nosink_nodes, getnext_sink_nodes) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "[Train_Dynamic] UpdateNameForInputShapeOfOption failed."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (multibatch::DeleteIdentityInsertByAdapter(graph) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "[Train_Dynamic] DeleteIdentityInsertByAdapter failed."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (!multibatch::InitDynamicParams(batch_shapes_)) { | if (!multibatch::InitDynamicParams(batch_shapes_)) { | ||||
| GELOGD("There is no multi-batch options, no need clone multi-batch graph"); | GELOGD("There is no multi-batch options, no need clone multi-batch graph"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| if (multibatch::CheckNegativeCountOfOptions(batch_shapes_) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "[Train_Dynamic] Input_shape and dynamic_dims should set correct params."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GELOGD("Begin to run Multi-batch clone on graph: %s", graph->GetName().c_str()); | GELOGD("Begin to run Multi-batch clone on graph: %s", graph->GetName().c_str()); | ||||
| GE_CHK_STATUS_RET(multibatch::CheckDynamicParams(batch_shapes_), "Invalid multi-batch param"); | GE_CHK_STATUS_RET(multibatch::CheckDynamicParams(batch_shapes_), "Invalid multi-batch param"); | ||||
| if (CollectIoNodes(graph) != SUCCESS) { | if (CollectIoNodes(graph) != SUCCESS) { | ||||
| @@ -66,21 +100,14 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { | |||||
| (void)AttrUtils::GetStr(graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); | (void)AttrUtils::GetStr(graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); | ||||
| ComputeGraphPtr branch = MakeShared<ComputeGraph>(graph->GetName()); | ComputeGraphPtr branch = MakeShared<ComputeGraph>(graph->GetName()); | ||||
| if (branch == nullptr) { | |||||
| GELOGE(OUT_OF_MEMORY, "Create multi-batch graph failed"); | |||||
| return OUT_OF_MEMORY; | |||||
| } | |||||
| GE_IF_BOOL_EXEC(branch == nullptr, GELOGE(OUT_OF_MEMORY, "Create multi batch graph failed"); return OUT_OF_MEMORY); | |||||
| (void)AttrUtils::SetStr(branch, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); | (void)AttrUtils::SetStr(branch, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); | ||||
| graph->InValid(); // Will modify, need topological again. | graph->InValid(); // Will modify, need topological again. | ||||
| graph->Swap(*branch); | graph->Swap(*branch); | ||||
| if (CreateRootGraph(graph) != SUCCESS) { | |||||
| return FAILED; | |||||
| } | |||||
| if (CreateSubgraphs(graph, branch) != SUCCESS) { | |||||
| return FAILED; | |||||
| } | |||||
| GE_CHK_STATUS_RET(CreateRootGraph(graph), "Construct root graph failed."); | |||||
| GE_CHK_STATUS_RET(CreateOriGraph(branch), "Construct original graph failed.") | |||||
| GE_CHK_STATUS_RET(CreateSubgraphs(graph, branch), "Construct subgraph failed."); | |||||
| GE_CHK_STATUS_RET(PruneDirectOutput(graph), "Prune direct output failed"); | GE_CHK_STATUS_RET(PruneDirectOutput(graph), "Prune direct output failed"); | ||||
| GELOGD("MultiBatchClonePass Leave"); | GELOGD("MultiBatchClonePass Leave"); | ||||
| @@ -95,9 +122,13 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { | |||||
| /// | /// | ||||
| Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { | Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { | ||||
| for (const auto &node : graph->GetDirectNode()) { | for (const auto &node : graph->GetDirectNode()) { | ||||
| if (!GetLocalOmgContext().dynamic_node_type.empty() && IsGetNextType(node)) { | |||||
| all_data_nodes_.emplace_back(node); | |||||
| GE_CHK_STATUS_RET(InitParamsOfGetNext(node), "Init params of %s failed.", node->GetName().c_str()); | |||||
| } | |||||
| if (node->GetType() == DATA) { | if (node->GetType() == DATA) { | ||||
| all_data_nodes_.emplace_back(node); | all_data_nodes_.emplace_back(node); | ||||
| } else if (node->GetType() == CONSTANT) { | |||||
| } else if (node->GetType() == CONSTANT || node->GetType() == CONSTANTOP) { | |||||
| all_const_nodes_.emplace_back(node); | all_const_nodes_.emplace_back(node); | ||||
| } else if (node->GetType() == NETOUTPUT) { | } else if (node->GetType() == NETOUTPUT) { | ||||
| all_output_nodes_.emplace_back(node); | all_output_nodes_.emplace_back(node); | ||||
| @@ -114,10 +145,16 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { | |||||
| } | } | ||||
| int64_t data_index = 0; | int64_t data_index = 0; | ||||
| size_t getnext_node_count = 0; | |||||
| for (size_t i = 0; i < all_data_nodes_.size(); ++i) { | for (size_t i = 0; i < all_data_nodes_.size(); ++i) { | ||||
| if (IsGetNextType(all_data_nodes_[i])) { | |||||
| // just one getnext node in graph | |||||
| getnext_node_count++; | |||||
| continue; | |||||
| } | |||||
| const auto &op_desc = all_data_nodes_[i]->GetOpDesc(); | const auto &op_desc = all_data_nodes_[i]->GetOpDesc(); | ||||
| if (!AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { | if (!AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { | ||||
| (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, i); | |||||
| (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, i - getnext_node_count); | |||||
| } | } | ||||
| } | } | ||||
| @@ -133,7 +170,43 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { | |||||
| "Remove edge failed"); | "Remove edge failed"); | ||||
| } | } | ||||
| } | } | ||||
| GELOGD("Data count is %zu, const count is %zu, getnext count is %zu, output count is %zu, direct out count is %zu.", | |||||
| all_data_nodes_.size(), all_const_nodes_.size(), getnext_node_count, all_output_nodes_.size(), | |||||
| direct_output_.size()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) { | |||||
| data_count_from_getnext_ = 0; | |||||
| getnext_sink_dynamic_dims_ = false; | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| data_count_from_getnext_ = node->GetOpDesc()->GetOutputsSize(); | |||||
| if (GetLocalOmgContext().dynamic_node_type == GETNEXT) { | |||||
| data_count_from_getnext_ = data_count_from_getnext_ / kDivisionConst; | |||||
| for (size_t i = 0; i < data_count_from_getnext_; ++i) { | |||||
| GeTensorDesc output_desc = node->GetOpDesc()->GetOutputDesc(i); | |||||
| GELOGD("The %zu data shape from getnext sink is %s.", i, | |||||
| formats::JoinToString(output_desc.GetShape().GetDims()).c_str()); | |||||
| const auto &dims = output_desc.GetShape().GetDims(); | |||||
| if (std::all_of(dims.begin(), dims.end(), [](int64_t val) {return val >= 0; })) { | |||||
| GELOGD("The %zu data from %s is static.", i, node->GetName().c_str()); | |||||
| } else { | |||||
| getnext_sink_dynamic_dims_ = true; | |||||
| GELOGD("Dynamic dims in the pattern of getnext sink."); | |||||
| } | |||||
| } | |||||
| } | |||||
| if (node->GetOutControlAnchor() != nullptr) { | |||||
| for (const auto &peer_in_control_anchor : node->GetOutControlAnchor()->GetPeerInControlAnchors()) { | |||||
| NodePtr next_node = peer_in_control_anchor->GetOwnerNode(); | |||||
| GE_CHECK_NOTNULL(next_node); | |||||
| if (next_node->GetType() == CONSTANTOP) { | |||||
| out_control_nodes_.insert(next_node); | |||||
| GELOGD("Control edge: %s connect with %s.", node->GetName().c_str(), next_node->GetName().c_str()); | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -144,7 +217,11 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { | Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { | ||||
| GELOGD("Start create root graph of %s.", graph->GetName().c_str()); | |||||
| uint32_t input_num = all_data_nodes_.size() + all_const_nodes_.size(); | uint32_t input_num = all_data_nodes_.size() + all_const_nodes_.size(); | ||||
| if (data_count_from_getnext_ != 0) { | |||||
| input_num = input_num + data_count_from_getnext_ - kNumOfGetnextNode; | |||||
| } | |||||
| uint32_t output_num = all_output_nodes_[0]->GetAllInDataAnchorsSize(); | uint32_t output_num = all_output_nodes_[0]->GetAllInDataAnchorsSize(); | ||||
| OpDescBuilder op_builder(kMultiBatchCaseNode, CASE); | OpDescBuilder op_builder(kMultiBatchCaseNode, CASE); | ||||
| @@ -185,6 +262,10 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { | |||||
| op_desc->GetName().c_str()); | op_desc->GetName().c_str()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| if (!AttrUtils::SetBool(op_desc, ATTR_INSERT_BY_MBATCH, true)) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to add insert attr on case node %s", op_desc->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| GE_CHK_STATUS_RET(multibatch::StampDynamicType(op_desc), "Set dynamic type failed"); | GE_CHK_STATUS_RET(multibatch::StampDynamicType(op_desc), "Set dynamic type failed"); | ||||
| GE_CHK_STATUS_RET(CreateIndexNode(graph), "Create index node failed"); | GE_CHK_STATUS_RET(CreateIndexNode(graph), "Create index node failed"); | ||||
| @@ -202,7 +283,7 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { | |||||
| /// @param [in] NodePtr node: index data node. | /// @param [in] NodePtr node: index data node. | ||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &node) { | |||||
| Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &shape_node) { | |||||
| const OpDescPtr data_desc = MakeShared<OpDesc>(kMultiBatchDataNode, DATA); | const OpDescPtr data_desc = MakeShared<OpDesc>(kMultiBatchDataNode, DATA); | ||||
| if (data_desc == nullptr) { | if (data_desc == nullptr) { | ||||
| GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed"); | GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed"); | ||||
| @@ -220,11 +301,12 @@ Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, No | |||||
| } | } | ||||
| size_t data_index = all_data_nodes_.size(); | size_t data_index = all_data_nodes_.size(); | ||||
| data_index = data_count_from_getnext_ != 0 ? data_index - kNumOfGetnextNode : data_index; | |||||
| (void)AttrUtils::SetInt(data_desc, ATTR_NAME_INDEX, data_index); | (void)AttrUtils::SetInt(data_desc, ATTR_NAME_INDEX, data_index); | ||||
| (void)AttrUtils::SetBool(data_desc, ATTR_INSERT_BY_MBATCH, true); | (void)AttrUtils::SetBool(data_desc, ATTR_INSERT_BY_MBATCH, true); | ||||
| node = graph->AddNode(data_desc); | |||||
| if (node == nullptr) { | |||||
| shape_node = graph->AddNode(data_desc); | |||||
| if (shape_node == nullptr) { | |||||
| GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed"); | GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed"); | ||||
| return OUT_OF_MEMORY; | return OUT_OF_MEMORY; | ||||
| } | } | ||||
| @@ -286,15 +368,19 @@ Status MultiBatchClonePass::CreateIndexConstNode(const ComputeGraphPtr &graph, N | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { | Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { | ||||
| // Data --> MapIndex --> Case | |||||
| NodePtr data_node; | |||||
| GE_CHK_STATUS_RET(CreateIndexDataNode(graph, data_node), "Create data node failed"); | |||||
| // Data/GetDynamicDims --> MapIndex --> Case | |||||
| if (!getnext_sink_dynamic_dims_) { | |||||
| GE_CHK_STATUS_RET(CreateIndexDataNode(graph, shape_node_), "Create data node failed"); | |||||
| } else { | |||||
| GE_CHK_STATUS_RET(CreateGetDynamicDimsNode(graph, shape_node_), "Create get dynamic dims node failed"); | |||||
| } | |||||
| NodePtr const_node; | NodePtr const_node; | ||||
| GE_CHK_STATUS_RET(CreateIndexConstNode(graph, const_node), "Create const node failed"); | GE_CHK_STATUS_RET(CreateIndexConstNode(graph, const_node), "Create const node failed"); | ||||
| GELOGD("Shape node name is %s, type is %s, const node name is %s.", shape_node_->GetName().c_str(), | |||||
| shape_node_->GetType().c_str(), const_node->GetName().c_str()); | |||||
| OpDescBuilder op_builder(kMultiBatchMapIndexNode, "MapIndex"); | OpDescBuilder op_builder(kMultiBatchMapIndexNode, "MapIndex"); | ||||
| op_builder.AddInput("x", data_node->GetOpDesc()->GetOutputDesc(0)) | |||||
| op_builder.AddInput("x", shape_node_->GetOpDesc()->GetOutputDesc(0)) | |||||
| .AddInput("data_seq", const_node->GetOpDesc()->GetOutputDesc(0)) | .AddInput("data_seq", const_node->GetOpDesc()->GetOutputDesc(0)) | ||||
| .AddOutput("y", GeTensorDesc(GeShape(), FORMAT_ND, DT_INT32)); | .AddOutput("y", GeTensorDesc(GeShape(), FORMAT_ND, DT_INT32)); | ||||
| @@ -309,8 +395,10 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { | |||||
| return OUT_OF_MEMORY; | return OUT_OF_MEMORY; | ||||
| } | } | ||||
| if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), index_node->GetInDataAnchor(0)) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Failed to add edge between node:%s to MapIndex:%s", data_node->GetName().c_str(), | |||||
| GE_CHK_STATUS_RET(AddAttrForGetDynamicDims(shape_node_), "Failed to add attr for %s.", | |||||
| shape_node_->GetName().c_str()); | |||||
| if (GraphUtils::AddEdge(shape_node_->GetOutDataAnchor(0), index_node->GetInDataAnchor(0)) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Failed to add edge between node:%s to MapIndex:%s", shape_node_->GetName().c_str(), | |||||
| index_node->GetName().c_str()); | index_node->GetName().c_str()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -328,6 +416,120 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status MultiBatchClonePass::CreateGetDynamicDimsNode(const ComputeGraphPtr &graph, NodePtr &shape_node) { | |||||
| const OpDescPtr data_desc = MakeShared<OpDesc>(kMultiBatchGetDynamicDimsNode, GETDYNAMICDIMS); | |||||
| if (data_desc == nullptr) { | |||||
| GELOGE(OUT_OF_MEMORY, "Create multi-batch get dynamic dims node failed"); | |||||
| return OUT_OF_MEMORY; | |||||
| } | |||||
| // input of GetDynamicDims is shape_of_each_data, output is gear_info | |||||
| for (size_t i = 0; i < GetLocalOmgContext().user_input_dims.size(); ++i) { | |||||
| size_t input_shape_dims = GetLocalOmgContext().user_input_dims.at(i).second.size(); | |||||
| // add input desc without GeShape for const input, value of input_shape is 1 transferred by adapter | |||||
| if (input_shape_dims == 1 && GetLocalOmgContext().user_input_dims.at(i).second.at(0) == 0) { | |||||
| GeTensorDesc tensor_desc; | |||||
| tensor_desc.SetFormat(FORMAT_ND); | |||||
| tensor_desc.SetDataType(DT_INT32); | |||||
| auto ret = data_desc->AddInputDesc(tensor_desc); | |||||
| GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data"); | |||||
| return FAILED); | |||||
| continue; | |||||
| } | |||||
| GeTensorDesc tensor_desc(GeShape({static_cast<int32_t>(input_shape_dims)}), FORMAT_ND, DT_INT32); | |||||
| auto ret = data_desc->AddInputDesc(tensor_desc); | |||||
| GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data"); | |||||
| return FAILED); | |||||
| } | |||||
| GeTensorDesc tensor_desc(GeShape({static_cast<int32_t>(batch_shapes_.at(0).size())}), FORMAT_ND, DT_INT32); | |||||
| auto ret = data_desc->AddOutputDesc(tensor_desc); | |||||
| GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add output desc for created data"); | |||||
| return FAILED); | |||||
| (void)AttrUtils::SetBool(data_desc, ATTR_INSERT_BY_MBATCH, true); | |||||
| shape_node = graph->AddNode(data_desc); | |||||
| if (shape_node == nullptr) { | |||||
| GELOGE(OUT_OF_MEMORY, "Create multi-batch dynamic dims node failed"); | |||||
| return OUT_OF_MEMORY; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status MultiBatchClonePass::AddAttrForGetDynamicDims(const NodePtr &shape_node) { | |||||
| if (!getnext_sink_dynamic_dims_) { | |||||
| GELOGD("No need to add attr when not insert get dynamic dims node."); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGD("Add attr for :%s, type is %s:", shape_node->GetName().c_str(), shape_node->GetType().c_str()); | |||||
| if (!AttrUtils::SetInt(shape_node->GetOpDesc(), ATTR_GETNEXT_SINK_DATA_COUNT, data_count_from_getnext_)) { | |||||
| GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_DATA_COUNT failed"); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| vector<int64_t> shape_info; | |||||
| for (size_t i = 0; i < GetLocalOmgContext().user_input_dims.size(); ++i) { | |||||
| if (GetLocalOmgContext().user_input_dims.at(i).second.size() == 1 && | |||||
| GetLocalOmgContext().user_input_dims.at(i).second.at(0) == 0) { | |||||
| shape_info.emplace_back(0); | |||||
| continue; | |||||
| } | |||||
| shape_info.emplace_back(GetLocalOmgContext().user_input_dims.at(i).second.size()); | |||||
| for (size_t j = 0; j < GetLocalOmgContext().user_input_dims.at(i).second.size(); ++j) { | |||||
| shape_info.emplace_back(GetLocalOmgContext().user_input_dims.at(i).second.at(j)); | |||||
| } | |||||
| } | |||||
| if (!AttrUtils::SetListInt(shape_node->GetOpDesc(), ATTR_GETNEXT_SINK_SHAPE_INFO, shape_info)) { | |||||
| GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_SHAPE_INFO failed"); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status MultiBatchClonePass::LinkGetNextToGetDynamicDims(const NodePtr &getnext_node, const NodePtr &shape_node) { | |||||
| GELOGD("Start relink shape anchor of %s to %s.", getnext_node->GetName().c_str(), shape_node->GetName().c_str()); | |||||
| size_t input_index = 0; | |||||
| size_t data_count = getnext_node->GetAllOutDataAnchors().size() / kDivisionConst; | |||||
| for (size_t out_index = data_count; out_index < getnext_node->GetAllOutDataAnchors().size(); ++out_index, | |||||
| ++input_index) { | |||||
| GELOGD("Start add %s of %zu out_anchor to %s of %zu in_anchor.", getnext_node->GetName().c_str(), out_index, | |||||
| shape_node->GetName().c_str(), input_index); | |||||
| auto out_data_anchor = getnext_node->GetOutDataAnchor(out_index); | |||||
| auto ret = GraphUtils::AddEdge(out_data_anchor, shape_node->GetInDataAnchor(input_index)); | |||||
| GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link getnext %s to getdynamicdims %s", | |||||
| getnext_node->GetName().c_str(), shape_node->GetName().c_str()); | |||||
| return INTERNAL_ERROR); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status MultiBatchClonePass::LinkGetDynamicDimsToNetOutput(const NodePtr &output_node) { | |||||
| if (!GetLocalOmgContext().dynamic_node_type.empty()) { | |||||
| if (!AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, GetLocalOmgContext().dynamic_dims)) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to set all gears info attr on netoutput %s.", output_node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| } | |||||
| if (getnext_sink_dynamic_dims_) { | |||||
| GELOGD("Start link %s to %s.", shape_node_->GetName().c_str(), output_node->GetName().c_str()); | |||||
| size_t input_index = output_node->GetAllInDataAnchors().size(); | |||||
| if (NodeUtils::AppendInputAnchor(output_node, input_index + 1) != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Append input anchor of %s of %zu failed.", output_node->GetName().c_str(), input_index); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| auto ret = GraphUtils::AddEdge(shape_node_->GetOutDataAnchor(kDataOutIndex), | |||||
| output_node->GetInDataAnchor(input_index)); | |||||
| GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link netoutput %s to getdynamicdims %s", | |||||
| output_node->GetName().c_str(), shape_node_->GetName().c_str()); | |||||
| return INTERNAL_ERROR); | |||||
| if (!AttrUtils::SetBool(output_node->GetOpDesc(), ATTR_GETNEXT_SINK_DYNMAIC, true)) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to set getnext sink dynamic attr on netoutput %s.", | |||||
| output_node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Create input node for root graph. | /// @brief Create input node for root graph. | ||||
| @@ -337,8 +539,10 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { | |||||
| Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { | Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { | ||||
| // Data --> Case | // Data --> Case | ||||
| std::vector<NodePtr> all_data_nodes; | std::vector<NodePtr> all_data_nodes; | ||||
| const size_t arg_index = kCaseArgIndex; | |||||
| for (size_t i = 0; i < all_data_nodes_.size(); ++i) { | |||||
| size_t case_input_index = kCaseArgIndex; | |||||
| NodePtr getnext_node = nullptr; | |||||
| size_t input_index_of_getnext = 0; | |||||
| for (size_t i = 0; i < all_data_nodes_.size(); ++i, ++case_input_index) { | |||||
| const auto &node = all_data_nodes_[i]; | const auto &node = all_data_nodes_[i]; | ||||
| const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc()); | const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc()); | ||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| @@ -353,22 +557,60 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { | |||||
| op_desc->SetName(node->GetName()); | op_desc->SetName(node->GetName()); | ||||
| const NodePtr &data = graph->AddNode(op_desc); | const NodePtr &data = graph->AddNode(op_desc); | ||||
| GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); | GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); | ||||
| if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", | |||||
| data->GetName().c_str(), case_node_->GetName().c_str()); | |||||
| return FAILED; | |||||
| if (IsGetNextType(node)) { | |||||
| getnext_node = data; | |||||
| input_index_of_getnext = case_input_index; | |||||
| case_input_index = case_input_index + data_count_from_getnext_; | |||||
| continue; | |||||
| } else { | |||||
| if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(case_input_index)) != | |||||
| GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", data->GetName().c_str(), | |||||
| case_node_->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | } | ||||
| if (SetMaxShapeToData(data) != SUCCESS) { | |||||
| if (SetMaxShape(data) != SUCCESS) { | |||||
| GELOGE(FAILED, "Set max shape of %s failed.", data->GetName().c_str()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| all_data_nodes.emplace_back(data); | all_data_nodes.emplace_back(data); | ||||
| } | } | ||||
| if (getnext_node != nullptr) { | |||||
| if (LinkEdgeForGetNext(getnext_node, input_index_of_getnext) != SUCCESS) { | |||||
| GELOGE(FAILED, "Failed to link edge for %s.", getnext_node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| if (SetMaxShape(getnext_node) != SUCCESS) { | |||||
| GELOGE(FAILED, "Set max shape of %s failed.", getnext_node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| all_data_nodes.emplace_back(getnext_node); | |||||
| } | |||||
| all_data_nodes_.swap(all_data_nodes); | all_data_nodes_.swap(all_data_nodes); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status MultiBatchClonePass::LinkEdgeForGetNext(const NodePtr &getnext_node, size_t &case_input_index) { | |||||
| GELOGD("Start link edge for %s, which is the %zu input of %s.", getnext_node->GetName().c_str(), | |||||
| case_input_index, case_node_->GetName().c_str()); | |||||
| for (size_t out_index = 0; out_index < data_count_from_getnext_; ++out_index, ++case_input_index) { | |||||
| if (GraphUtils::AddEdge(getnext_node->GetOutDataAnchor(out_index), | |||||
| case_node_->GetInDataAnchor(case_input_index)) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Failed to add data edge between %zu Data:%s to %zu Case:%s", out_index, | |||||
| getnext_node->GetName().c_str(), case_input_index, case_node_->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| if (getnext_sink_dynamic_dims_) { | |||||
| GE_CHK_STATUS_RET(LinkGetNextToGetDynamicDims(getnext_node, shape_node_), "Failed to add link for %s.", | |||||
| shape_node_->GetName().c_str()); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Create Const node for root graph. | /// @brief Create Const node for root graph. | ||||
| @@ -378,7 +620,11 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { | |||||
| Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { | Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { | ||||
| // Const --> Case | // Const --> Case | ||||
| std::vector<NodePtr> all_const_nodes; | std::vector<NodePtr> all_const_nodes; | ||||
| const size_t arg_index = kCaseArgIndex + all_data_nodes_.size(); | |||||
| size_t arg_index = kCaseArgIndex + all_data_nodes_.size(); | |||||
| if (data_count_from_getnext_ != 0) { | |||||
| arg_index = arg_index + data_count_from_getnext_ - kNumOfGetnextNode; | |||||
| } | |||||
| for (size_t i = 0; i < all_const_nodes_.size(); ++i) { | for (size_t i = 0; i < all_const_nodes_.size(); ++i) { | ||||
| const auto &node = all_const_nodes_[i]; | const auto &node = all_const_nodes_[i]; | ||||
| const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc()); | const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc()); | ||||
| @@ -395,15 +641,33 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { | |||||
| const NodePtr &data = graph->AddNode(op_desc); | const NodePtr &data = graph->AddNode(op_desc); | ||||
| GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); | GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); | ||||
| if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) { | if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) { | ||||
| GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", | |||||
| data->GetName().c_str(), case_node_->GetName().c_str()); | |||||
| GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", data->GetName().c_str(), | |||||
| case_node_->GetName().c_str()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| all_const_nodes.emplace_back(data); | all_const_nodes.emplace_back(data); | ||||
| } | } | ||||
| ChangeConstToData(); | |||||
| all_const_nodes_.swap(all_const_nodes); | |||||
| return SUCCESS; | |||||
| } | |||||
| void MultiBatchClonePass::ChangeConstToData() { | |||||
| size_t data_index = all_data_nodes_.size(); | size_t data_index = all_data_nodes_.size(); | ||||
| if (data_count_from_getnext_ != 0) { | |||||
| data_index = data_index + data_count_from_getnext_ - kNumOfGetnextNode; | |||||
| } | |||||
| for (size_t i = 0; i < all_const_nodes_.size(); ++i, ++data_index) { // Trans subgraph Const to Data. | for (size_t i = 0; i < all_const_nodes_.size(); ++i, ++data_index) { // Trans subgraph Const to Data. | ||||
| auto &const_node = all_const_nodes_[i]; | |||||
| bool need_change_type = true; | |||||
| if (out_control_nodes_.find(const_node) != out_control_nodes_.end()) { | |||||
| GELOGD("No need to change %s to data type.", const_node->GetName().c_str()); | |||||
| need_change_type = false; | |||||
| break; | |||||
| } | |||||
| if (!need_change_type) { | |||||
| continue; | |||||
| } | |||||
| const OpDescPtr &op_desc = all_const_nodes_[i]->GetOpDesc(); | const OpDescPtr &op_desc = all_const_nodes_[i]->GetOpDesc(); | ||||
| op_desc->SetType(DATA); | op_desc->SetType(DATA); | ||||
| (void)op_desc->DelAttr(ATTR_NAME_WEIGHTS); // Delete weight. | (void)op_desc->DelAttr(ATTR_NAME_WEIGHTS); // Delete weight. | ||||
| @@ -413,9 +677,6 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { | |||||
| (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index); | (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index); | ||||
| (void)NodeUtils::AppendInputAnchor(all_const_nodes_[i], 1); | (void)NodeUtils::AppendInputAnchor(all_const_nodes_[i], 1); | ||||
| } | } | ||||
| all_const_nodes_.swap(all_const_nodes); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| /// | /// | ||||
| @@ -461,7 +722,8 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(LinkGetDynamicDimsToNetOutput(node), "Failed to add edge between %s to netoutput: %s.", | |||||
| shape_node_->GetName().c_str(), output->GetName().c_str()); | |||||
| all_output_nodes_.clear(); | all_output_nodes_.clear(); | ||||
| all_output_nodes_.emplace_back(node); | all_output_nodes_.emplace_back(node); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -473,34 +735,69 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { | |||||
| /// @param [in] const NodePtr &data: data in Root/Case graph. | /// @param [in] const NodePtr &data: data in Root/Case graph. | ||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { | |||||
| auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); | |||||
| auto data_name = data->GetName(); | |||||
| Status MultiBatchClonePass::SetMaxShape(const NodePtr &data) { | |||||
| GELOGD("Start set max shape for %s.", data->GetName().c_str()); | |||||
| if (!IsGetNextType(data)) { | |||||
| if (SetMaxShapeToData(data, kDataOutIndex) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Failed to update max shape of %s.", data->GetName().c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| } else { | |||||
| for (size_t out_anchor_index = 0; out_anchor_index < data_count_from_getnext_; ++out_anchor_index) { | |||||
| if (SetMaxShapeToData(data, out_anchor_index) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Failed to update max shape of %s.", data->GetName().c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &node, size_t out_anchor_index) { | |||||
| GELOGD("Start update max shape of %s, %zu output.", node->GetName().c_str(), out_anchor_index); | |||||
| auto data_shape = NodeUtils::GetOutputDesc(*node, out_anchor_index).GetShape(); | |||||
| string data_name = node->GetName(); | |||||
| if (IsGetNextType(node)) { | |||||
| data_name.append("_").append(std::to_string(out_anchor_index)); | |||||
| } | |||||
| GELOGD("Update max shape of %s, shape dims is %s.", data_name.c_str(), | |||||
| formats::JoinToString(data_shape.GetDims()).c_str()); | |||||
| const auto &dims = data_shape.GetDims(); | const auto &dims = data_shape.GetDims(); | ||||
| if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { | |||||
| return SUCCESS; | |||||
| if (!IsGetNextType(node)) { | |||||
| if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { | |||||
| GELOGD("No need to do anything for static data."); | |||||
| return SUCCESS; | |||||
| } | |||||
| } else { | |||||
| if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { | |||||
| if (getnext_sink_dynamic_dims_) { | |||||
| // need to update shape of Shape_node when getnext node has dynamic data | |||||
| GE_CHK_STATUS_RET(UpdateShapeOfShapeNode(node, out_anchor_index), "Failed to update shape of shape node"); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } | } | ||||
| (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims()); | |||||
| (void)AttrUtils::SetListInt(node->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims()); | |||||
| GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex)); | |||||
| GeTensorDesc tensor(NodeUtils::GetOutputDesc(*node, kDataOutIndex)); | |||||
| std::vector<std::string> input_dims_str; | std::vector<std::string> input_dims_str; | ||||
| for (size_t i = 0; i < batch_shapes_.size(); ++i) { | for (size_t i = 0; i < batch_shapes_.size(); ++i) { | ||||
| auto shape = data_shape; | auto shape = data_shape; | ||||
| auto ret = multibatch::CalcShape(data_to_dynamic_info_.at(data_name).at(i), shape); | auto ret = multibatch::CalcShape(data_to_dynamic_info_.at(data_name).at(i), shape); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", data->GetName().c_str()); | |||||
| GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", node->GetName().c_str()); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| tensor.SetShape(shape); | tensor.SetShape(shape); | ||||
| int64_t tensor_size = 0; | int64_t tensor_size = 0; | ||||
| (void)TensorUtils::GetTensorSizeInBytes(tensor, tensor_size); | (void)TensorUtils::GetTensorSizeInBytes(tensor, tensor_size); | ||||
| string input_str = TypeUtils::FormatToSerialString(tensor.GetFormat()) + ":" + | string input_str = TypeUtils::FormatToSerialString(tensor.GetFormat()) + ":" + | ||||
| TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + data->GetName() + ":" + | |||||
| TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + node->GetName() + ":" + | |||||
| std::to_string(tensor_size) + ":" + std::to_string(tensor.GetShape().GetDimNum()) + ":" + | std::to_string(tensor_size) + ":" + std::to_string(tensor.GetShape().GetDimNum()) + ":" + | ||||
| formats::JoinToString(tensor.GetShape().GetDims()); | formats::JoinToString(tensor.GetShape().GetDims()); | ||||
| input_dims_str.emplace_back(input_str); | input_dims_str.emplace_back(input_str); | ||||
| } | } | ||||
| (void)AttrUtils::SetListStr(data->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str); | |||||
| (void)AttrUtils::SetListStr(node->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str); | |||||
| size_t max_shape_index = 0; | size_t max_shape_index = 0; | ||||
| int64_t max_size = 0; | int64_t max_size = 0; | ||||
| @@ -519,18 +816,72 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { | |||||
| max_shape_index = i; | max_shape_index = i; | ||||
| } | } | ||||
| } | } | ||||
| return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), node, data_shape, out_anchor_index); | |||||
| } | |||||
| return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), data, data_shape); | |||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Set max shape to Data/GetNext node in root graph. | |||||
| /// @param [in] const std::vector<int64_t> &shapes: dims of shape. | |||||
| /// @param [in] const NodePtr &data: data in Root/Case graph. | |||||
| /// @param [in] GeShape &data_shape: dims of data node. | |||||
| /// @param [in] size_t out_anchor_index: out anchor index of data node. | |||||
| /// @return 0: SUCCESS / others: FAILED | |||||
| /// | |||||
| Status MultiBatchClonePass::SetShapeToData(const std::vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape, | |||||
| size_t out_anchor_index) { | |||||
| GELOGD("Start set shape to %zu out of %s.", out_anchor_index, data->GetName().c_str()); | |||||
| if (multibatch::CalcShape(shapes, data_shape) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to calculate the batched shape for data node %s, the shapes may not match", | |||||
| data->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if (NodeUtils::UpdateOutputShape(*data, out_anchor_index, data_shape) != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to update output shape for data %s", data->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if (!IsGetNextType(data)) { | |||||
| if (NodeUtils::UpdateInputShape(*data, kDataInIndex, data_shape) != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to update input shape for data %s", data->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| } else { | |||||
| if (getnext_sink_dynamic_dims_) { | |||||
| // need to update shape of Shape_node when getnext_sink_dynamic | |||||
| GE_CHK_STATUS_RET(UpdateShapeOfShapeNode(data, out_anchor_index), "Failed to update shape of shape node"); | |||||
| } | |||||
| } | |||||
| GELOGI("Update the data %s input/output shape to the max %s", data->GetName().c_str(), | |||||
| formats::ShapeToString(data_shape).c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status MultiBatchClonePass::UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index) { | |||||
| GELOGD("Start update output shape of shape node insert by adapter, which is the %zu out of %s.", out_anchor_index, | |||||
| node->GetName().c_str()); | |||||
| auto data_shape = NodeUtils::GetOutputDesc(*node, out_anchor_index).GetShape(); | |||||
| size_t shape_index = out_anchor_index + (node->GetAllOutDataAnchors().size() / kDivisionConst); | |||||
| GeTensorDesc output_desc = node->GetOpDesc()->GetOutputDesc(shape_index); | |||||
| std::vector<int64_t> output_dims = {static_cast<int64_t>(data_shape.GetDims().size())}; | |||||
| GeShape output_shape(output_dims); | |||||
| output_desc.SetShape(output_shape); | |||||
| if (node->GetOpDesc()->UpdateOutputDesc(shape_index, output_desc) != SUCCESS) { | |||||
| GELOGE(FAILED, "Update output desc fail."); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | } | ||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Update Data node in Subgraph. | /// @brief Update Data node in Subgraph. | ||||
| /// @param [in] const NodePtr &data: data in Subgraph. | /// @param [in] const NodePtr &data: data in Subgraph. | ||||
| /// @param [in] size_t index: The batch index. | |||||
| /// @param [in] size_t batch_index: The batch index. | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index) { | |||||
| Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t batch_index) { | |||||
| int node_index = -1; | int node_index = -1; | ||||
| if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_INDEX, node_index)) { | if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_INDEX, node_index)) { | ||||
| GELOGE(FAILED, "Failed to get index from data[%s]", data->GetName().c_str()); | GELOGE(FAILED, "Failed to get index from data[%s]", data->GetName().c_str()); | ||||
| @@ -545,6 +896,8 @@ Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index | |||||
| auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); | auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); | ||||
| const auto &dims = data_shape.GetDims(); | const auto &dims = data_shape.GetDims(); | ||||
| GELOGD("Start update shape of %s , batch index is %zu, dims is %s.", data->GetName().c_str(), batch_index, | |||||
| formats::JoinToString(dims).c_str()); | |||||
| if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { | if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -559,35 +912,77 @@ Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index | |||||
| } | } | ||||
| auto parent_name = data_name.substr(0, pos); | auto parent_name = data_name.substr(0, pos); | ||||
| return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(index), data, data_shape); | |||||
| return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(batch_index), data, data_shape, kDataOutIndex); | |||||
| } | } | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Set max shape to Data node in root graph. | |||||
| /// @param [in] const std::vector<int64_t> &shapes: dims of shape. | |||||
| /// @param [in] const NodePtr &data: data in Root/Case graph. | |||||
| /// @param [in] GeShape &data_shape: dims of data node. | |||||
| /// @return 0: SUCCESS / others: FAILED | |||||
| /// | |||||
| Status MultiBatchClonePass::SetShapeToData(const vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape) { | |||||
| // must not be error, the calc result has been checked in function InsertSwitchNForData | |||||
| if (multibatch::CalcShape(shapes, data_shape) != SUCCESS) { | |||||
| return INTERNAL_ERROR; | |||||
| Status MultiBatchClonePass::CreateOriGraph(const ComputeGraphPtr &graph) { | |||||
| if (data_count_from_getnext_ == 0) { | |||||
| GELOGD("No need to change original graph without getnext node."); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| if (NodeUtils::UpdateInputShape(*data, kDataInIndex, data_shape) != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to update input shape for data %s", data->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| GELOGD("Start change original graph: %s when exit getnext node.", graph->GetName().c_str()); | |||||
| size_t data_index = all_data_nodes_.size() - kNumOfGetnextNode; | |||||
| for (const auto &node : graph->GetDirectNode()) { | |||||
| if (IsGetNextType(node)) { | |||||
| for (size_t out_index = 0; out_index < data_count_from_getnext_; ++out_index, ++data_index) { | |||||
| auto out_data_anchor = node->GetOutDataAnchor(out_index); | |||||
| GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue); | |||||
| NodePtr data_node = CreateDataNode(graph, out_data_anchor, data_index); | |||||
| GE_IF_BOOL_EXEC(data_node == nullptr, GELOGE(INTERNAL_ERROR, "Create %zu data node failed.", | |||||
| out_data_anchor->GetIdx()); return INTERNAL_ERROR); | |||||
| for (auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(in_anchor == nullptr, continue); | |||||
| NodePtr dst_node = in_anchor->GetOwnerNode(); | |||||
| if (GraphUtils::RemoveEdge(out_data_anchor, in_anchor) != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to remove edge between %s to %s", node->GetName().c_str(), | |||||
| dst_node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), dst_node->GetInDataAnchor(in_anchor->GetIdx())) != | |||||
| GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to add edge between %s to %s", data_node->GetName().c_str(), | |||||
| dst_node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (graph->RemoveNode(node) != GRAPH_SUCCESS) { | |||||
| GELOGE(GRAPH_FAILED, "Remove node %s failed!", node->GetName().c_str()); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| return SUCCESS; | |||||
| } | |||||
| if (NodeUtils::UpdateOutputShape(*data, kDataOutIndex, data_shape) != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to update output shape for data %s", data->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| NodePtr MultiBatchClonePass::CreateDataNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, | |||||
| size_t data_index) { | |||||
| size_t out_anchor_index = out_data_anchor->GetIdx(); | |||||
| std::string node_name = out_data_anchor->GetOwnerNode()->GetName() + "_" + std::to_string(out_anchor_index); | |||||
| OpDescPtr op_desc = MakeShared<OpDesc>(node_name, DATA); | |||||
| if (op_desc == nullptr) { | |||||
| GELOGE(OUT_OF_MEMORY, "Create data node failed."); | |||||
| return nullptr; | |||||
| } | } | ||||
| (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index); | |||||
| GELOGI("Update %s input/output shape to %s", data->GetName().c_str(), formats::ShapeToString(data_shape).c_str()); | |||||
| return SUCCESS; | |||||
| OpDescPtr getnext_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc(); | |||||
| if (getnext_op_desc == nullptr) { | |||||
| GELOGE(OUT_OF_MEMORY, "Op desc of %s is nullptr.", out_data_anchor->GetOwnerNode()->GetName().c_str()); | |||||
| return nullptr; | |||||
| } | |||||
| if (op_desc->AddInputDesc(getnext_op_desc->GetOutputDesc(out_anchor_index)) != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Add %s input desc failed.", op_desc->GetName().c_str()); | |||||
| return nullptr; | |||||
| } | |||||
| if (op_desc->AddOutputDesc(getnext_op_desc->GetOutputDesc(out_anchor_index)) != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Add %s output desc failed.", op_desc->GetName().c_str()); | |||||
| return nullptr; | |||||
| } | |||||
| NodePtr data_node = graph->AddNode(op_desc); | |||||
| GELOGD("Success create %s node.", data_node->GetName().c_str()); | |||||
| return data_node; | |||||
| } | } | ||||
| /// | /// | ||||
| @@ -598,17 +993,14 @@ Status MultiBatchClonePass::SetShapeToData(const vector<int64_t> &shapes, const | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const ComputeGraphPtr &branch) { | Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const ComputeGraphPtr &branch) { | ||||
| GELOGD("Start create subgraphs for %s.", graph->GetName().c_str()); | |||||
| const auto &op_desc = case_node_->GetOpDesc(); | const auto &op_desc = case_node_->GetOpDesc(); | ||||
| for (size_t i = 0; i < batch_shapes_.size(); ++i) { | for (size_t i = 0; i < batch_shapes_.size(); ++i) { | ||||
| std::vector<NodePtr> input_nodes; | std::vector<NodePtr> input_nodes; | ||||
| std::vector<NodePtr> output_nodes; | std::vector<NodePtr> output_nodes; | ||||
| const std::string postfix = kMultiBatchNodePostfix + std::to_string(i); | const std::string postfix = kMultiBatchNodePostfix + std::to_string(i); | ||||
| ComputeGraphPtr subgraph = (i == 0) ? branch : GraphUtils::CloneGraph(branch, postfix, input_nodes, output_nodes); | ComputeGraphPtr subgraph = (i == 0) ? branch : GraphUtils::CloneGraph(branch, postfix, input_nodes, output_nodes); | ||||
| if (subgraph == nullptr) { | |||||
| GELOGE(FAILED, "Create multi-batch case node failed"); | |||||
| return FAILED; | |||||
| } | |||||
| GE_IF_BOOL_EXEC(subgraph == nullptr, GELOGE(FAILED, "Create multi-batch case node failed"); return FAILED); | |||||
| subgraph->SetName("Batch_" + std::to_string(i)); | subgraph->SetName("Batch_" + std::to_string(i)); | ||||
| subgraph->SetParentNode(case_node_); | subgraph->SetParentNode(case_node_); | ||||
| subgraph->SetParentGraph(graph); | subgraph->SetParentGraph(graph); | ||||
| @@ -621,6 +1013,7 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const | |||||
| op_desc->AddSubgraphName(key_name); | op_desc->AddSubgraphName(key_name); | ||||
| op_desc->SetSubgraphInstanceName(i, subgraph->GetName()); | op_desc->SetSubgraphInstanceName(i, subgraph->GetName()); | ||||
| GELOGD("The %s has %zu input, %zu output.", subgraph->GetName().c_str(), input_nodes.size(), output_nodes.size()); | |||||
| for (const auto &data : input_nodes) { | for (const auto &data : input_nodes) { | ||||
| GE_CHK_STATUS_RET(UpdateSubgraphData(data, i), "Update %s failed", subgraph->GetName().c_str()); | GE_CHK_STATUS_RET(UpdateSubgraphData(data, i), "Update %s failed", subgraph->GetName().c_str()); | ||||
| } | } | ||||
| @@ -666,6 +1059,7 @@ Status MultiBatchClonePass::UpdateSubgraphOutput(const NodePtr &output_node) { | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status MultiBatchClonePass::PruneDirectOutput(const ComputeGraphPtr &graph) { | Status MultiBatchClonePass::PruneDirectOutput(const ComputeGraphPtr &graph) { | ||||
| GELOGD("Start prune direct output."); | |||||
| const auto &func_desc = case_node_->GetOpDesc(); | const auto &func_desc = case_node_->GetOpDesc(); | ||||
| uint32_t unused_num = 0; | uint32_t unused_num = 0; | ||||
| uint32_t output_num = func_desc->GetOutputsSize(); | uint32_t output_num = func_desc->GetOutputsSize(); | ||||
| @@ -710,6 +1104,7 @@ Status MultiBatchClonePass::PruneDirectOutput(const ComputeGraphPtr &graph) { | |||||
| /// | /// | ||||
| Status MultiBatchClonePass::UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num) { | Status MultiBatchClonePass::UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num) { | ||||
| if (unused_num == 0) { | if (unused_num == 0) { | ||||
| GELOGD("No need to update output tensor."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -36,6 +36,7 @@ class MultiBatchClonePass : public GraphPass { | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status CollectIoNodes(const ComputeGraphPtr &graph); | Status CollectIoNodes(const ComputeGraphPtr &graph); | ||||
| Status InitParamsOfGetNext(const NodePtr &node); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -49,10 +50,12 @@ class MultiBatchClonePass : public GraphPass { | |||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Create index data node for root graph. | /// @brief Create index data node for root graph. | ||||
| /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. | /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. | ||||
| /// @param [in] NodePtr node: index data node. | |||||
| /// @param [in] NodePtr shape_node: index data node, DATA or GETDYNAMICDIMS type. | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &node); | |||||
| Status CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &shape_node); | |||||
| Status CreateGetDynamicDimsNode(const ComputeGraphPtr &graph, NodePtr &shape_node); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -70,6 +73,9 @@ class MultiBatchClonePass : public GraphPass { | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status CreateIndexNode(const ComputeGraphPtr &graph); | Status CreateIndexNode(const ComputeGraphPtr &graph); | ||||
| Status AddAttrForGetDynamicDims(const NodePtr &shape_node); | |||||
| Status LinkGetNextToGetDynamicDims(const NodePtr &getnext_node, const NodePtr &shape_node); | |||||
| Status LinkGetDynamicDimsToNetOutput(const NodePtr &output_node); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -78,39 +84,54 @@ class MultiBatchClonePass : public GraphPass { | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status CreateInputNode(const ComputeGraphPtr &graph); | Status CreateInputNode(const ComputeGraphPtr &graph); | ||||
| Status LinkEdgeForGetNext(const NodePtr &getnext_node, size_t &case_input_index); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Create Const node for root graph. | |||||
| /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. | |||||
| /// @brief Set max shape to Data node in root graph. | |||||
| /// @param [in] const NodePtr &data: data in Root/Case graph. | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status CreateConstNode(const ComputeGraphPtr &graph); | |||||
| Status SetMaxShape(const NodePtr &data); | |||||
| Status SetMaxShapeToData(const NodePtr &node, size_t out_anchor_index); | |||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Set max shape to Data/GetNext node in root graph. | |||||
| /// @param [in] const std::vector<int64_t> &shapes: dims of shape. | |||||
| /// @param [in] const NodePtr &data: data in Root/Case graph. | |||||
| /// @param [in] GeShape &data_shape: dims of data node. | |||||
| /// @param [in] size_t out_anchor_index: out anchor index of data node. | |||||
| /// @return 0: SUCCESS / others: FAILED | |||||
| /// | |||||
| Status SetShapeToData(const std::vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape, | |||||
| size_t out_anchor_index); | |||||
| Status UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Create output node for root graph. | |||||
| /// @brief Create Const node for root graph. | |||||
| /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. | /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. | ||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status CreateOutputNode(const ComputeGraphPtr &graph); | |||||
| Status CreateConstNode(const ComputeGraphPtr &graph); | |||||
| void ChangeConstToData(); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Set max shape to Data node in root graph. | |||||
| /// @param [in] const NodePtr &data: data in Root/Case graph. | |||||
| /// @brief Create output node for root graph. | |||||
| /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status SetMaxShapeToData(const NodePtr &data); | |||||
| Status CreateOutputNode(const ComputeGraphPtr &graph); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Update Data node in Subgraph. | /// @brief Update Data node in Subgraph. | ||||
| /// @param [in] const NodePtr &data: data in Subgraph. | /// @param [in] const NodePtr &data: data in Subgraph. | ||||
| /// @param [in] size_t index: The batch index. | |||||
| /// @param [in] size_t batch_index: The batch index. | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status UpdateSubgraphData(const NodePtr &data, size_t index); | |||||
| Status UpdateSubgraphData(const NodePtr &data, size_t batch_index); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -122,13 +143,12 @@ class MultiBatchClonePass : public GraphPass { | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Set max shape to Data node in root graph. | |||||
| /// @param [in] const std::vector<int64_t> &shapes: dims of shape. | |||||
| /// @param [in] const NodePtr &data: data in Root/Case graph. | |||||
| /// @param [in] GeShape &data_shape: dims of data node. | |||||
| /// @brief Create nodes for root graph. | |||||
| /// @param [in] const ComputeGraphPtr &graph: Original graph. | |||||
| /// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
| /// | /// | ||||
| Status SetShapeToData(const std::vector<int64_t> &shapes, const NodePtr &data, GeShape &data_shape); | |||||
| Status CreateOriGraph(const ComputeGraphPtr &graph); | |||||
| NodePtr CreateDataNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, size_t data_index); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -168,6 +188,10 @@ class MultiBatchClonePass : public GraphPass { | |||||
| std::map<string, vector<vector<int64_t>>> data_to_dynamic_info_; | std::map<string, vector<vector<int64_t>>> data_to_dynamic_info_; | ||||
| NodePtr case_node_; | NodePtr case_node_; | ||||
| size_t data_count_from_getnext_ = 0; | |||||
| bool getnext_sink_dynamic_dims_ = false; | |||||
| NodePtr shape_node_; | |||||
| std::set<NodePtr> out_control_nodes_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_PASSES_MULTI_BATCH_CLONE_PASS_H_ | #endif // GE_GRAPH_PASSES_MULTI_BATCH_CLONE_PASS_H_ | ||||
| @@ -204,6 +204,10 @@ Status UnusedArgsCleanPass::RemoveInputTensor(const map<ComputeGraphPtr, map<uin | |||||
| GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, old_anchor), "Remove edge failed"); | GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, old_anchor), "Remove edge failed"); | ||||
| GELOGI("Remove edge: %s %s", out_node->GetName().c_str(), func_node->GetName().c_str()); | GELOGI("Remove edge: %s %s", out_node->GetName().c_str(), func_node->GetName().c_str()); | ||||
| if (out_node->GetInDataNodes().size() == 0 && out_node->GetOutAllNodes().size() == 0) { | |||||
| GE_CHK_GRAPH_STATUS_RET(out_node->GetOwnerComputeGraph()->RemoveNode(out_node), "Remove node failed: %s", | |||||
| out_node->GetName().c_str()); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -37,9 +37,7 @@ | |||||
| #include "graph/passes/addn_pass.h" | #include "graph/passes/addn_pass.h" | ||||
| #include "graph/passes/aicpu_constant_folding_pass.h" | #include "graph/passes/aicpu_constant_folding_pass.h" | ||||
| #include "graph/passes/assert_pass.h" | #include "graph/passes/assert_pass.h" | ||||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/passes/assign_remove_pass.h" | |||||
| #endif | |||||
| #include "ge/ge_api_types.h" | |||||
| #include "graph/passes/common_subexpression_elimination_pass.h" | #include "graph/passes/common_subexpression_elimination_pass.h" | ||||
| #include "graph/passes/cond_pass.h" | #include "graph/passes/cond_pass.h" | ||||
| #include "graph/passes/cond_remove_pass.h" | #include "graph/passes/cond_remove_pass.h" | ||||
| @@ -51,6 +49,7 @@ | |||||
| #include "graph/passes/for_pass.h" | #include "graph/passes/for_pass.h" | ||||
| #include "graph/passes/guarantee_const_pass.h" | #include "graph/passes/guarantee_const_pass.h" | ||||
| #include "graph/passes/hccl_group_pass.h" | #include "graph/passes/hccl_group_pass.h" | ||||
| #include "graph/passes/hccl_memcpy_pass.h" | |||||
| #include "graph/passes/identity_pass.h" | #include "graph/passes/identity_pass.h" | ||||
| #include "graph/passes/infershape_pass.h" | #include "graph/passes/infershape_pass.h" | ||||
| #include "graph/passes/merge_pass.h" | #include "graph/passes/merge_pass.h" | ||||
| @@ -900,6 +899,160 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| long StringToLongNoThrow(const string &str) { | |||||
| try { | |||||
| return std::stol(str); | |||||
| } catch (const std::invalid_argument) { | |||||
| GELOGE(PARAM_INVALID, | |||||
| "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: " | |||||
| "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", | |||||
| str.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } catch (const std::out_of_range) { | |||||
| GELOGE(PARAM_INVALID, | |||||
| "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: " | |||||
| "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", | |||||
| str.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| } | |||||
| /** | |||||
| * Parser shape_range from string to vector | |||||
| * shape_range from option normally is "[1~20,3,3~6,-1],[1~20,3,3~6,-1]" | |||||
| * @param shape_range | |||||
| */ | |||||
| Status ParseDynamicInputShapeRange(const std::string &shape_range, | |||||
| std::vector<std::vector<std::pair<int64_t, int64_t>>> &range) { | |||||
| if (shape_range.size() < 2) { | |||||
| GELOGE(PARAM_INVALID, "Shape range %s is invalid.", shape_range.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| // different shape_range of single input are split by ']' | |||||
| vector<string> shape_range_set = ge::StringUtils::Split(shape_range, ']'); | |||||
| if (shape_range_set.empty()) { | |||||
| GELOGE(PARAM_INVALID, "Shape range %s is not valid. Correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", | |||||
| shape_range.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| for (auto &shape_range_str : shape_range_set) { | |||||
| if (shape_range_str.empty()) { | |||||
| continue; | |||||
| } | |||||
| // trim start bytes, after that, single input should be "1~20,3,3~6,-1" | |||||
| if (ge::StringUtils::StartWith(shape_range_str, "[")) { | |||||
| shape_range_str = shape_range_str.substr(1, shape_range_str.size()); | |||||
| } | |||||
| if (ge::StringUtils::StartWith(shape_range_str, ",")) { | |||||
| shape_range_str = shape_range_str.substr(2, shape_range_str.size()); | |||||
| } | |||||
| // parse shape_range of single input. eg. "1~20,3,3~6,-1" | |||||
| std::vector<std::pair<int64_t, int64_t>> range_of_single_input; | |||||
| vector<string> dim_range_set = ge::StringUtils::Split(shape_range_str, ','); | |||||
| for (const auto &range_pair_str : dim_range_set) { | |||||
| vector<string> range_pair_set = ge::StringUtils::Split(range_pair_str, '~'); | |||||
| pair<int64_t, int64_t> range_pair; | |||||
| if (range_pair_set.size() == 1) { | |||||
| // fix dim | |||||
| auto range_value = StringToLongNoThrow(range_pair_set.at(0).c_str()); | |||||
| if (range_value < 0) { | |||||
| range_pair = std::make_pair(0, range_value); | |||||
| } else { | |||||
| range_pair = std::make_pair(range_value, range_value); | |||||
| } | |||||
| } else if (range_pair_set.size() == 2) { | |||||
| // unknown dim, should get range. | |||||
| auto range_left = StringToLongNoThrow(range_pair_set.at(0).c_str()); | |||||
| auto range_right = StringToLongNoThrow(range_pair_set.at(1).c_str()); | |||||
| range_pair = std::make_pair(range_left, range_right); | |||||
| } else { | |||||
| GELOGE(PARAM_INVALID, | |||||
| "Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", | |||||
| shape_range.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| range_of_single_input.emplace_back(range_pair); | |||||
| } | |||||
| range.emplace_back(range_of_single_input); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option, | |||||
| vector<vector<std::pair<int64_t, int64_t>>> &range_vec) { | |||||
| auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE); | |||||
| if (mode_iter == graph_option.end()) { | |||||
| GELOGD("Graph Option: Can not find %s option in graph options.", OPTION_EXEC_DYNAMIC_EXECUTE_MODE); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGD("Graph Option: dynamic_input_mode value is %s.", mode_iter->second.c_str()); | |||||
| if (mode_iter->second != "dynamic_execute") { | |||||
| return SUCCESS; | |||||
| } | |||||
| auto iter = graph_option.find(OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE); | |||||
| if (iter == graph_option.end()) { | |||||
| GELOGE(PARAM_INVALID, "Graph option %s is required when %s is dynamic_execute", OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE, | |||||
| OPTION_EXEC_DYNAMIC_EXECUTE_MODE); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GELOGD("GraphOption: dynamic_inputs_shape_range value is %s.", iter->second.c_str()); | |||||
| auto ret = ParseDynamicInputShapeRange(iter->second, range_vec); | |||||
| GE_CHK_STATUS_RET(ret, "Parse dynamic input shape range failed."); | |||||
| if (range_vec.size() != user_input.size()) { | |||||
| GELOGE(PARAM_INVALID, "Dynamic input shape range size is %zu, inputs size is %zu. Not match.", range_vec.size(), | |||||
| user_input.size()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index, | |||||
| const vector<vector<std::pair<int64_t, int64_t>>> &range_vec, OpDescPtr &op, | |||||
| GeTensorDesc &desc) { | |||||
| auto origin_shape = desc.GetShape(); | |||||
| auto current_shape_range_vec = range_vec.at(index); | |||||
| if (current_shape_range_vec.size() != origin_shape.GetDimNum()) { | |||||
| GELOGE(PARAM_INVALID, "Given shape_range dim num is %zu, current dim num is %zu, not match.Pleace Check.", | |||||
| current_shape_range_vec.size(), origin_shape.GetDimNum()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| for (size_t i = 0; i < origin_shape.GetDimNum(); ++i) { | |||||
| if (current_shape_range_vec.at(i).first == current_shape_range_vec.at(i).second) { | |||||
| // given shape_range is known dim, check is same as origin or not | |||||
| if (origin_shape.GetDim(i) != current_shape_range_vec.at(i).first) { | |||||
| GELOGE(PARAM_INVALID, "Given shape range is %ld, current dim shape is %ld, not match.Pleace Check.", | |||||
| current_shape_range_vec.at(i).first, origin_shape.GetDim(i)); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| origin_shape.SetDim(i, current_shape_range_vec.at(i).first); | |||||
| } else { | |||||
| origin_shape.SetDim(i, -1); | |||||
| } | |||||
| } | |||||
| desc.SetShape(origin_shape); | |||||
| desc.SetShapeRange(current_shape_range_vec); | |||||
| int64_t dynamic_shape_size = 1; | |||||
| for (const auto range_pair : range_vec.at(index)) { | |||||
| FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second); | |||||
| dynamic_shape_size *= range_pair.second; | |||||
| } | |||||
| auto data_type_size = GetSizeByDataType(desc.GetDataType()); | |||||
| if (data_type_size < 0) { | |||||
| GELOGE(PARAM_INVALID, "Input data type is %s, is not supported.", | |||||
| TypeUtils::DataTypeToSerialString(desc.GetDataType()).c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size); | |||||
| dynamic_shape_size *= data_type_size; | |||||
| GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size); | |||||
| ge::TensorUtils::SetSize(desc, dynamic_shape_size); | |||||
| graphStatus graph_ret = op->UpdateInputDesc(0, desc); | |||||
| GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret); | |||||
| graph_ret = op->UpdateOutputDesc(0, desc); | |||||
| GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| GraphPrepare::GraphPrepare() : compute_graph_(nullptr) {} | GraphPrepare::GraphPrepare() : compute_graph_(nullptr) {} | ||||
| @@ -1104,7 +1257,11 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) { | |||||
| Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option) { | |||||
| // Get shape range of input in dynamic_execute mode | |||||
| vector<vector<std::pair<int64_t,int64_t>>> dynamic_shape_range_vec; | |||||
| auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec); | |||||
| GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode."); | |||||
| compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); | compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); | ||||
| for (NodePtr &input_node : compute_graph_->GetDirectNode()) { | for (NodePtr &input_node : compute_graph_->GetDirectNode()) { | ||||
| GE_CHECK_NOTNULL(input_node); | GE_CHECK_NOTNULL(input_node); | ||||
| @@ -1187,6 +1344,12 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) { | |||||
| return graph_ret; | return graph_ret; | ||||
| } | } | ||||
| if (!dynamic_shape_range_vec.empty()) { | |||||
| ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc); | |||||
| GE_CHK_STATUS_RET(ret, "Fail to update dynamic input shape range on %s.", op->GetName().c_str()); | |||||
| continue; | |||||
| } | |||||
| if (!options_.train_graph_flag) { | if (!options_.train_graph_flag) { | ||||
| Status ret = AdjustDataOpOutput(input_node); | Status ret = AdjustDataOpOutput(input_node); | ||||
| GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "AdjustDataOpOutput fail, ret:%u", ret); return ret); | GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "AdjustDataOpOutput fail, ret:%u", ret); return ret); | ||||
| @@ -1360,17 +1523,17 @@ Status GraphPrepare::SaveOriginalGraphToOmModel() { | |||||
| GELOGI("Prepare %s on graph %s success.", name, compute_graph->GetName().c_str()); \ | GELOGI("Prepare %s on graph %s success.", name, compute_graph->GetName().c_str()); \ | ||||
| } while (0) | } while (0) | ||||
| Status GraphPrepare::PrepareDynShape(ConstGraphPtr graph, const std::vector<GeTensor> &user_input, | |||||
| Status GraphPrepare::PrepareDynShape(const GraphNodePtr &graph_node, const std::vector<GeTensor> &user_input, | |||||
| ge::ComputeGraphPtr &compute_graph, uint64_t session_id) { | ge::ComputeGraphPtr &compute_graph, uint64_t session_id) { | ||||
| GE_CHECK_NOTNULL(graph); | |||||
| GE_CHECK_NOTNULL(graph_node->GetGraph()); | |||||
| GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
| GetLocalOmgContext().type = static_cast<domi::FrameworkType>(options_.framework_type); | GetLocalOmgContext().type = static_cast<domi::FrameworkType>(options_.framework_type); | ||||
| const Graph &const_graph = *graph; | |||||
| const Graph &const_graph = *graph_node->GetGraph(); | |||||
| PP_RUN("Init", Init, const_graph, session_id); | PP_RUN("Init", Init, const_graph, session_id); | ||||
| PP_RUN("SetRtContext", SetRtContext, rtContext_t(), RT_CTX_GEN_MODE); | PP_RUN("SetRtContext", SetRtContext, rtContext_t(), RT_CTX_GEN_MODE); | ||||
| PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input); | |||||
| PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input, graph_node->GetOptions()); | |||||
| PP_RUN_AND_DUMP("GraphEquivalentTransformation", GraphEquivalentTransformation); | PP_RUN_AND_DUMP("GraphEquivalentTransformation", GraphEquivalentTransformation); | ||||
| PP_RUN_AND_DUMP("ProcessOutput", ProcessNetOutput); | PP_RUN_AND_DUMP("ProcessOutput", ProcessNetOutput); | ||||
| PP_RUN_AND_DUMP("ProcessMultiBatch", multibatch::ProcessMultiBatch, compute_graph_); | PP_RUN_AND_DUMP("ProcessMultiBatch", multibatch::ProcessMultiBatch, compute_graph_); | ||||
| @@ -1705,9 +1868,6 @@ Status GraphPrepare::PrepareOptimize() { | |||||
| VarIsInitializedOpPass var_is_initialized_pass; | VarIsInitializedOpPass var_is_initialized_pass; | ||||
| ParallelConcatStartOpPass parallel_concat_start_op_pass; | ParallelConcatStartOpPass parallel_concat_start_op_pass; | ||||
| IdentityPass identity_pass(false); | IdentityPass identity_pass(false); | ||||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||||
| AssignRemovePass assign_remove_pass; | |||||
| #endif | |||||
| SnapshotPass snapshot_pass; | SnapshotPass snapshot_pass; | ||||
| if (!options_.train_graph_flag) { | if (!options_.train_graph_flag) { | ||||
| names_to_passes.emplace_back("DropOutPass", &dropout_pass); | names_to_passes.emplace_back("DropOutPass", &dropout_pass); | ||||
| @@ -1722,11 +1882,6 @@ Status GraphPrepare::PrepareOptimize() { | |||||
| names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass); | names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass); | ||||
| names_to_passes.emplace_back("ParallelConcatStartOpPass", ¶llel_concat_start_op_pass); | names_to_passes.emplace_back("ParallelConcatStartOpPass", ¶llel_concat_start_op_pass); | ||||
| names_to_passes.emplace_back("IdentityPass", &identity_pass); | names_to_passes.emplace_back("IdentityPass", &identity_pass); | ||||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||||
| if (GetContext().GetHostExecFlag()) { | |||||
| names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass); | |||||
| } | |||||
| #endif | |||||
| GE_TIMESTAMP_START(names_to_passes); | GE_TIMESTAMP_START(names_to_passes); | ||||
| ret = ge_passes.Run(names_to_passes); | ret = ge_passes.Run(names_to_passes); | ||||
| GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses"); | GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses"); | ||||
| @@ -1738,6 +1893,8 @@ Status GraphPrepare::PrepareOptimize() { | |||||
| PassManager graph_pass; | PassManager graph_pass; | ||||
| try { | try { | ||||
| (void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass); | (void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass); | ||||
| // todo 临时把hccl的memcpy插入放到图准备,为了防止其多插memcpy | |||||
| (void)graph_pass.AddPass("PrepareOptimize::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass); | |||||
| } catch (std::bad_alloc &e) { | } catch (std::bad_alloc &e) { | ||||
| GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); | GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -1837,7 +1994,7 @@ Status GraphPrepare::ProcessNetOutput() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input) { | |||||
| Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input,const std::map<string,string> &graph_option) { | |||||
| compute_graph_->SetInputSize(user_input.size()); | compute_graph_->SetInputSize(user_input.size()); | ||||
| if (user_input.empty()) { | if (user_input.empty()) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -1849,7 +2006,7 @@ Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = UpdateInput(user_input); | |||||
| ret = UpdateInput(user_input, graph_option); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "UpdateInput fail, ret:%u", ret); | GELOGE(ret, "UpdateInput fail, ret:%u", ret); | ||||
| return ret; | return ret; | ||||
| @@ -45,7 +45,7 @@ class GraphPrepare { | |||||
| virtual ~GraphPrepare(); | virtual ~GraphPrepare(); | ||||
| GraphPrepare(const GraphPrepare &in) = delete; | GraphPrepare(const GraphPrepare &in) = delete; | ||||
| GraphPrepare &operator=(const GraphPrepare &in) = delete; | GraphPrepare &operator=(const GraphPrepare &in) = delete; | ||||
| Status PrepareDynShape(ConstGraphPtr graph, | |||||
| Status PrepareDynShape(const GraphNodePtr &graph_node, | |||||
| const std::vector<GeTensor> &user_input, | const std::vector<GeTensor> &user_input, | ||||
| ge::ComputeGraphPtr &compute_graph, | ge::ComputeGraphPtr &compute_graph, | ||||
| uint64_t session_id = 0); | uint64_t session_id = 0); | ||||
| @@ -63,8 +63,8 @@ class GraphPrepare { | |||||
| Status CheckRefOp(); | Status CheckRefOp(); | ||||
| Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); | Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); | ||||
| Status AdjustDataOpOutput(const NodePtr &node); | Status AdjustDataOpOutput(const NodePtr &node); | ||||
| Status UpdateInput(const std::vector<GeTensor> &user_input); | |||||
| Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input); | |||||
| Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option); | |||||
| Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option); | |||||
| Status CheckConstOp(); | Status CheckConstOp(); | ||||
| Status VerifyConstOp(const NodePtr &node); | Status VerifyConstOp(const NodePtr &node); | ||||
| Status CheckUserInput(const std::vector<GeTensor> &user_input); | Status CheckUserInput(const std::vector<GeTensor> &user_input); | ||||
| @@ -1692,13 +1692,11 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) { | |||||
| } | } | ||||
| Status ProcessMultiBatch(ComputeGraphPtr &graph) { | Status ProcessMultiBatch(ComputeGraphPtr &graph) { | ||||
| if (GetLocalOmgContext().dynamic_node_type.empty()) { | |||||
| const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN"); | |||||
| if (multi_batch_with_switchn == nullptr) { | |||||
| PassManager pass_manager; | |||||
| GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); | |||||
| return pass_manager.Run(graph); | |||||
| } | |||||
| const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN"); | |||||
| if (multi_batch_with_switchn == nullptr) { | |||||
| PassManager pass_manager; | |||||
| GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); | |||||
| return pass_manager.Run(graph); | |||||
| } | } | ||||
| if (!GetLocalOmgContext().need_multi_batch) { | if (!GetLocalOmgContext().need_multi_batch) { | ||||
| GELOGI("No need to process_multi for no_train graph."); | GELOGI("No need to process_multi for no_train graph."); | ||||
| @@ -99,9 +99,8 @@ Status DistinguishGetNextAndData(ComputeGraphPtr &graph, vector<NodePtr> &data_n | |||||
| } | } | ||||
| GELOGI("Data count is %zu, getnext nosink count is %zu, getnext sink count is %zu.", data_nodes.size(), | GELOGI("Data count is %zu, getnext nosink count is %zu, getnext sink count is %zu.", data_nodes.size(), | ||||
| getnext_nosink_nodes.size(), getnext_sink_nodes.size()); | getnext_nosink_nodes.size(), getnext_sink_nodes.size()); | ||||
| GE_IF_BOOL_EXEC(!graph->SetExtAttr(kExtAttrDataNodes, data_nodes), GELOGW("Set data nodes attr failed.");) | |||||
| GE_IF_BOOL_EXEC(!graph->SetExtAttr(kExtAttrGetNextNoSink, getnext_nosink_nodes), | |||||
| GELOGW("Set getnext nosink nodes attr failed.");) | |||||
| GetLocalOmgContext().data_nodes = data_nodes; | |||||
| GetLocalOmgContext().getnext_nosink_nodes = getnext_nosink_nodes; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -25,7 +25,6 @@ target_compile_options(host_cpu_engine PRIVATE | |||||
| target_compile_definitions(host_cpu_engine PRIVATE | target_compile_definitions(host_cpu_engine PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(host_cpu_engine PRIVATE | target_include_directories(host_cpu_engine PRIVATE | ||||
| @@ -66,7 +65,6 @@ target_compile_options(atc_host_cpu_engine PRIVATE | |||||
| target_compile_definitions(atc_host_cpu_engine PRIVATE | target_compile_definitions(atc_host_cpu_engine PRIVATE | ||||
| COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(atc_host_cpu_engine PRIVATE | target_include_directories(atc_host_cpu_engine PRIVATE | ||||
| @@ -111,7 +109,6 @@ target_compile_options(host_cpu_opskernel_builder PRIVATE | |||||
| target_compile_definitions(host_cpu_opskernel_builder PRIVATE | target_compile_definitions(host_cpu_opskernel_builder PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(host_cpu_opskernel_builder PRIVATE | target_include_directories(host_cpu_opskernel_builder PRIVATE | ||||
| @@ -152,7 +149,6 @@ target_compile_options(atc_host_cpu_opskernel_builder PRIVATE | |||||
| target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE | target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(atc_host_cpu_opskernel_builder PRIVATE | target_include_directories(atc_host_cpu_opskernel_builder PRIVATE | ||||
| @@ -199,7 +195,6 @@ target_compile_options(host_cpu_opskernel_builder_static PRIVATE | |||||
| target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE | target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| LOG_CPP | LOG_CPP | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(host_cpu_opskernel_builder_static PRIVATE | target_include_directories(host_cpu_opskernel_builder_static PRIVATE | ||||
| @@ -20,9 +20,7 @@ | |||||
| #include "graph/manager/graph_caching_allocator.h" | #include "graph/manager/graph_caching_allocator.h" | ||||
| #include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
| #include "graph/manager/rdma_pool_allocator.h" | #include "graph/manager/rdma_pool_allocator.h" | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/manager/host_mem_allocator.h" | #include "graph/manager/host_mem_allocator.h" | ||||
| #endif | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -67,11 +65,7 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { | |||||
| if (mem_type == RDMA_HBM) { | if (mem_type == RDMA_HBM) { | ||||
| buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_); | buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_); | ||||
| } else if (mem_type == HOST_DDR) { | } else if (mem_type == HOST_DDR) { | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size); | buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size); | ||||
| #else | |||||
| buffer = malloc(allocate_size); | |||||
| #endif | |||||
| } else { | } else { | ||||
| if (allocate_size > kMaxHbmMemorySize) { | if (allocate_size > kMaxHbmMemorySize) { | ||||
| GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size); | GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size); | ||||
| @@ -108,11 +102,7 @@ void NpuMemoryAllocator::Deallocate(void *data, MemStorageType mem_type) { | |||||
| if (mem_type == RDMA_HBM) { | if (mem_type == RDMA_HBM) { | ||||
| MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_); | MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_); | ||||
| } else if (mem_type == HOST_DDR) { | } else if (mem_type == HOST_DDR) { | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(data); | MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(data); | ||||
| #else | |||||
| free(data); | |||||
| #endif | |||||
| } else { | } else { | ||||
| MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_); | MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_); | ||||
| } | } | ||||
| @@ -98,10 +98,10 @@ Status HybridModelAsyncExecutor::Init() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelAsyncExecutor::PreRun(InputData ¤t_data) { | |||||
| Status HybridModelAsyncExecutor::PreRun(InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args) { | |||||
| GE_CHK_STATUS_RET(SyncVarData(), "Failed to sync var data"); | GE_CHK_STATUS_RET(SyncVarData(), "Failed to sync var data"); | ||||
| RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[SyncVarData] End"); | RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[SyncVarData] End"); | ||||
| GE_CHK_STATUS_RET(CopyInputData(current_data), "Failed to copy input data to model"); | |||||
| GE_CHK_STATUS_RET(PrepareInputs(current_data, args), "Failed to copy input data to model"); | |||||
| RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[CopyInputData] End"); | RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[CopyInputData] End"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -126,14 +126,9 @@ Status HybridModelAsyncExecutor::RunInternal() { | |||||
| InputData current_data = data_wrapper->GetInput(); | InputData current_data = data_wrapper->GetInput(); | ||||
| GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id_, current_data.index); | GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id_, current_data.index); | ||||
| HybridModelExecutor::ExecuteArgs args; | |||||
| args.inputs.resize(input_tensors_.size()); | |||||
| for (auto &it : input_tensors_) { | |||||
| args.inputs[it.first] = it.second; | |||||
| } | |||||
| RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] Start", iterator_count_); | RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] Start", iterator_count_); | ||||
| ret = PreRun(current_data); | |||||
| HybridModelExecutor::ExecuteArgs args; | |||||
| ret = PreRun(current_data, args); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); | ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); | ||||
| CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | ||||
| @@ -202,7 +197,9 @@ Status HybridModelAsyncExecutor::SyncVarData() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelAsyncExecutor::CopyInputData(const InputData ¤t_data) { | |||||
| Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args) { | |||||
| args.inputs.resize(input_tensors_.size()); | |||||
| args.input_desc.resize(input_tensor_desc_.size()); | |||||
| const std::vector<DataBuffer> &blobs = current_data.blobs; | const std::vector<DataBuffer> &blobs = current_data.blobs; | ||||
| for (const auto &it : input_tensors_) { | for (const auto &it : input_tensors_) { | ||||
| auto input_index = it.first; | auto input_index = it.first; | ||||
| @@ -230,6 +227,13 @@ Status HybridModelAsyncExecutor::CopyInputData(const InputData ¤t_data) { | |||||
| data_buf.data, | data_buf.data, | ||||
| data_buf.length, | data_buf.length, | ||||
| RT_MEMCPY_HOST_TO_DEVICE)); | RT_MEMCPY_HOST_TO_DEVICE)); | ||||
| args.inputs[input_index] = input_tensor; | |||||
| if (is_input_dynamic_[input_index]) { | |||||
| auto &tensor_desc = input_tensor_desc_[input_index]; | |||||
| tensor_desc->SetShape(GeShape(current_data.shapes[input_index])); | |||||
| args.input_desc[input_index] = tensor_desc; | |||||
| GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); | |||||
| } | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -240,7 +244,10 @@ Status HybridModelAsyncExecutor::InitInputTensors() { | |||||
| GE_CHECK_NOTNULL(allocator); | GE_CHECK_NOTNULL(allocator); | ||||
| int input_index = 0; | int input_index = 0; | ||||
| for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) { | for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) { | ||||
| GELOGD("Init input[%u], node = %s", input_index, input_node->NodeName().c_str()); | |||||
| GELOGD("Init input[%u], node = %s, is_dynamic = %d", | |||||
| input_index, | |||||
| input_node->NodeName().c_str(), | |||||
| input_node->is_dynamic); | |||||
| auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex); | auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex); | ||||
| GE_CHECK_NOTNULL(output_desc); | GE_CHECK_NOTNULL(output_desc); | ||||
| int64_t tensor_size = 0; | int64_t tensor_size = 0; | ||||
| @@ -258,6 +265,8 @@ Status HybridModelAsyncExecutor::InitInputTensors() { | |||||
| TensorValue tensor(shared_ptr<TensorBuffer>(buffer.release())); | TensorValue tensor(shared_ptr<TensorBuffer>(buffer.release())); | ||||
| tensor.SetName("Input_" + input_node->NodeName()); | tensor.SetName("Input_" + input_node->NodeName()); | ||||
| input_tensors_.emplace(input_index, tensor); | input_tensors_.emplace(input_index, tensor); | ||||
| input_tensor_desc_.emplace(input_index, output_desc); | |||||
| is_input_dynamic_.push_back(input_node->is_dynamic); | |||||
| input_index += 1; | input_index += 1; | ||||
| } | } | ||||
| @@ -402,18 +411,12 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector< | |||||
| buffer.data = const_cast<uint8_t *>(tensor.GetData().GetData()); | buffer.data = const_cast<uint8_t *>(tensor.GetData().GetData()); | ||||
| buffer.length = tensor.GetData().size(); | buffer.length = tensor.GetData().size(); | ||||
| input_data.blobs.emplace_back(buffer); | input_data.blobs.emplace_back(buffer); | ||||
| input_data.shapes.emplace_back(tensor.GetTensorDesc().GetShape().GetDims()); | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(CopyInputData(input_data), "Failed to copy input data to model"); | |||||
| GELOGD("Done copying input data successfully."); | |||||
| HybridModelExecutor::ExecuteArgs args; | HybridModelExecutor::ExecuteArgs args; | ||||
| args.inputs.resize(input_tensors_.size()); | |||||
| args.input_desc.resize(input_tensors_.size()); | |||||
| for (auto &it : input_tensors_) { | |||||
| args.inputs[it.first] = it.second; | |||||
| args.input_desc[it.first] = MakeShared<GeTensorDesc>(inputs[it.first].GetTensorDesc()); | |||||
| } | |||||
| GE_CHK_STATUS_RET(PrepareInputs(input_data, args), "Failed to copy input data to model"); | |||||
| GELOGD("Done copying input data successfully."); | |||||
| GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); | GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); | ||||
| std::vector<ge::OutputTensorInfo> output_tensor_info_list; | std::vector<ge::OutputTensorInfo> output_tensor_info_list; | ||||
| @@ -70,9 +70,9 @@ class HybridModelAsyncExecutor { | |||||
| Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector<ge::OutputTensorInfo> &outputs); | Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector<ge::OutputTensorInfo> &outputs); | ||||
| Status PreRun(InputData ¤t_data); | |||||
| Status PreRun(InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args); | |||||
| Status CopyInputData(const InputData ¤t_data); | |||||
| Status PrepareInputs(const InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args); | |||||
| std::mutex mu_; | std::mutex mu_; | ||||
| HybridModel *model_; | HybridModel *model_; | ||||
| @@ -86,6 +86,8 @@ class HybridModelAsyncExecutor { | |||||
| rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
| std::map<uint32_t, TensorValue> input_tensors_; | std::map<uint32_t, TensorValue> input_tensors_; | ||||
| std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_; | |||||
| std::vector<bool> is_input_dynamic_; | |||||
| std::shared_ptr<ModelListener> listener_; | std::shared_ptr<ModelListener> listener_; | ||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| @@ -221,6 +221,8 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel | |||||
| tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | ||||
| tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | ||||
| } | } | ||||
| tmp_compute_graph_info.task_id = context_->GetTaskId(); | |||||
| tmp_compute_graph_info.stream_id = context_->GetStreamId(); | |||||
| compute_graph_info.emplace_back(tmp_compute_graph_info); | compute_graph_info.emplace_back(tmp_compute_graph_info); | ||||
| GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str()); | GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str()); | ||||
| } | } | ||||
| @@ -25,21 +25,30 @@ | |||||
| #include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
| #include "graph/manager/host_mem_manager.h" | #include "graph/manager/host_mem_manager.h" | ||||
| #include "graph/manager/trans_var_data_utils.h" | #include "graph/manager/trans_var_data_utils.h" | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
| #include "graph/manager/host_mem_allocator.h" | #include "graph/manager/host_mem_allocator.h" | ||||
| #endif | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "hybrid/common/npu_memory_allocator.h" | #include "hybrid/common/npu_memory_allocator.h" | ||||
| #include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| using domi::LogTimeStampDef; | |||||
| using domi::TaskDef; | |||||
| namespace { | namespace { | ||||
| const uint32_t kSubgraphIndex = 0U; | const uint32_t kSubgraphIndex = 0U; | ||||
| const uint32_t kVarOutputIndex = 0U; | const uint32_t kVarOutputIndex = 0U; | ||||
| const uint64_t kProfilingFpStartLogid = 1U; | |||||
| const uint64_t kProfilingBpEndLogid = 2U; | |||||
| const uint64_t kProfilingIterEndLogid = 65535U; | |||||
| const int kBytes = 8; | const int kBytes = 8; | ||||
| const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; | const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; | ||||
| const char *const kProfilingGraph = "ProfilingGraph"; | |||||
| const char *const kProfilingFpNode = "ProfilingFpNode"; | |||||
| const char *const kProfilingBpNode = "ProfilingBpNode"; | |||||
| const char *const kProfilingEndNode = "ProfilingEndNode"; | |||||
| const char *const kProfilingArNode = "ProfilingAllReduceNode"; | |||||
| const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; | |||||
| Status SetOutputNameAttr(ComputeGraph &graph) { | Status SetOutputNameAttr(ComputeGraph &graph) { | ||||
| vector<string> output_names; | vector<string> output_names; | ||||
| @@ -854,7 +863,6 @@ Status HybridModelBuilder::InitConstantOps() { | |||||
| std::unique_ptr<TensorValue> var_tensor; | std::unique_ptr<TensorValue> var_tensor; | ||||
| if (GetContext().GetHostExecFlag()) { | if (GetContext().GetHostExecFlag()) { | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| GE_CHECK_NOTNULL(ge_tensor); | GE_CHECK_NOTNULL(ge_tensor); | ||||
| // Address for eigen kernel should be aligned with 16 bytes | // Address for eigen kernel should be aligned with 16 bytes | ||||
| // Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned | // Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned | ||||
| @@ -867,11 +875,6 @@ Status HybridModelBuilder::InitConstantOps() { | |||||
| } | } | ||||
| var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(), | var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(), | ||||
| aligned_tensor.GetData().size())); | aligned_tensor.GetData().size())); | ||||
| #else | |||||
| auto buffer = ge_tensor->MutableData(); | |||||
| GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize()); | |||||
| var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize())); | |||||
| #endif | |||||
| } else { | } else { | ||||
| GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); | GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); | ||||
| GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); | GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); | ||||
| @@ -926,7 +929,6 @@ Status HybridModelBuilder::InitVariableTensors() { | |||||
| GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str()); | GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str()); | ||||
| return GE_GRAPH_MALLOC_FAILED; | return GE_GRAPH_MALLOC_FAILED; | ||||
| } | } | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr, | if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr, | ||||
| tensor_size) == nullptr) { | tensor_size) == nullptr) { | ||||
| GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); | GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); | ||||
| @@ -936,11 +938,6 @@ Status HybridModelBuilder::InitVariableTensors() { | |||||
| std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), | std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), | ||||
| tensor_size)); | tensor_size)); | ||||
| #else | |||||
| GELOGD("Host variable [%s] malloc success.", it.first.c_str()); | |||||
| std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size)); | |||||
| #endif | |||||
| GE_CHECK_NOTNULL(tensor); | GE_CHECK_NOTNULL(tensor); | ||||
| hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); | hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); | ||||
| } | } | ||||
| @@ -1531,6 +1528,188 @@ Status HybridModelBuilder::RecoverGraphUnknownFlag() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelBuilder::GenerateFpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list) { | |||||
| uint64_t jobid_log_id = ge::GetContext().TraceId(); | |||||
| GELOGD("The first FP operator is %s,, job_id %lu", op_desc->GetName().c_str(), jobid_log_id); | |||||
| TaskDef job_task_def; | |||||
| job_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | |||||
| job_task_def.set_stream_id(op_desc->GetStreamId()); | |||||
| LogTimeStampDef *job_log_def = job_task_def.mutable_log_timestamp(); | |||||
| if (job_log_def != nullptr) { | |||||
| job_log_def->set_logid(jobid_log_id); | |||||
| job_log_def->set_notify(false); | |||||
| } | |||||
| task_def_list.emplace_back(job_task_def); | |||||
| TaskDef fp_task_def; | |||||
| fp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | |||||
| fp_task_def.set_stream_id(op_desc->GetStreamId()); | |||||
| LogTimeStampDef *fp_log_def = fp_task_def.mutable_log_timestamp(); | |||||
| if (fp_log_def != nullptr) { | |||||
| fp_log_def->set_logid(kProfilingFpStartLogid); | |||||
| fp_log_def->set_notify(false); | |||||
| } | |||||
| task_def_list.emplace_back(fp_task_def); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, | |||||
| vector<domi::TaskDef> &task_def_list) { | |||||
| TaskDef ar_task_def; | |||||
| ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | |||||
| ar_task_def.set_stream_id(op_desc->GetStreamId()); | |||||
| LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | |||||
| if (ar_log_def != nullptr) { | |||||
| ar_log_def->set_logid(log_id); | |||||
| ar_log_def->set_notify(false); | |||||
| } | |||||
| task_def_list.emplace_back(ar_task_def); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list) { | |||||
| TaskDef bp_task_def; | |||||
| bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | |||||
| bp_task_def.set_stream_id(op_desc->GetStreamId()); | |||||
| LogTimeStampDef *bp_log_def = bp_task_def.mutable_log_timestamp(); | |||||
| GE_CHECK_NOTNULL(bp_log_def); | |||||
| bp_log_def->set_logid(kProfilingBpEndLogid); | |||||
| bp_log_def->set_notify(false); | |||||
| task_def_list.emplace_back(bp_task_def); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list) { | |||||
| TaskDef end_task_def; | |||||
| end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | |||||
| end_task_def.set_stream_id(op_desc->GetStreamId()); | |||||
| LogTimeStampDef *end_log_def = end_task_def.mutable_log_timestamp(); | |||||
| GE_CHECK_NOTNULL(end_log_def); | |||||
| end_log_def->set_logid(kProfilingIterEndLogid); | |||||
| end_log_def->set_notify(true); | |||||
| task_def_list.emplace_back(end_task_def); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| const OpDescPtr &op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| const auto &compute_graph = MakeShared<ComputeGraph>(kProfilingGraph); | |||||
| GE_CHECK_NOTNULL(compute_graph); | |||||
| NodePtr node_ptr = nullptr; | |||||
| vector<domi::TaskDef> task_def_list; | |||||
| // create fp node | |||||
| bool is_insert_fp_profiling_task = false; | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); | |||||
| if (is_insert_fp_profiling_task) { | |||||
| (void)GenerateFpProfilingTask(op_desc, task_def_list); | |||||
| auto fp_desc = MakeShared<OpDesc>(kProfilingFpNode, PROFILINGTRAININGTRACE); | |||||
| GE_CHECK_NOTNULL(fp_desc); | |||||
| fp_desc->SetOpKernelLibName(kEngineNameRts); | |||||
| node_ptr = compute_graph->AddNode(fp_desc); | |||||
| GELOGD("Create fp profiling node success before."); | |||||
| } | |||||
| // creat all reduce start node | |||||
| bool is_insert_bp_profiling_task = false; | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); | |||||
| bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | |||||
| if (is_all_reduce && is_insert_bp_profiling_task) { | |||||
| int64_t log_id = 0; | |||||
| (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); | |||||
| GELOGD("All reduce node profiling task log id: %ld before", log_id); | |||||
| (void) GenerateArProfilingTask(op_desc, log_id, task_def_list); | |||||
| string op_name = string(kProfilingArNode) + std::to_string(log_id); | |||||
| auto ar_desc_start = MakeShared<OpDesc>(op_name, PROFILINGTRAININGTRACE); | |||||
| GE_CHECK_NOTNULL(ar_desc_start); | |||||
| ar_desc_start->SetOpKernelLibName(kEngineNameRts); | |||||
| node_ptr = compute_graph->AddNode(ar_desc_start); | |||||
| GELOGD("Create all reduce start profiling node success before."); | |||||
| } | |||||
| if (node_ptr != nullptr) { | |||||
| for (const auto &task_def : task_def_list) { | |||||
| hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); | |||||
| } | |||||
| NodeItem *node_item = nullptr; | |||||
| GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); | |||||
| node_item->input_start = 0; | |||||
| node_item->output_start = 0; | |||||
| graph_item.node_items_.emplace_back(node_item); | |||||
| } else { | |||||
| GELOGD("No need to create profiling node before."); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| const OpDescPtr &op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| const auto &compute_graph = MakeShared<ComputeGraph>(kProfilingGraph); | |||||
| GE_CHECK_NOTNULL(compute_graph); | |||||
| NodePtr node_ptr = nullptr; | |||||
| vector<domi::TaskDef> task_def_list; | |||||
| // Create all reduce end node | |||||
| bool is_insert_bp_profiling_task = false; | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); | |||||
| bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | |||||
| if (is_all_reduce && is_insert_bp_profiling_task) { | |||||
| int64_t log_id = 0; | |||||
| (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); | |||||
| GELOGD("All reduce node profiling task log id: %ld after", log_id); | |||||
| (void) GenerateArProfilingTask(op_desc, log_id + 1, task_def_list); | |||||
| string op_name = string(kProfilingArNode) + std::to_string(log_id + 1); | |||||
| auto ar_desc_end = MakeShared<OpDesc>(op_name, PROFILINGTRAININGTRACE); | |||||
| GE_CHECK_NOTNULL(ar_desc_end); | |||||
| ar_desc_end->SetOpKernelLibName(kEngineNameRts); | |||||
| node_ptr = compute_graph->AddNode(ar_desc_end); | |||||
| GELOGD("Create all reduce end profiling node success after."); | |||||
| } | |||||
| // create bp node | |||||
| if (!is_all_reduce && is_insert_bp_profiling_task) { | |||||
| (void) GenerateBpProfilingTask(op_desc, task_def_list); | |||||
| auto bp_op_desc = MakeShared<OpDesc>(kProfilingBpNode, PROFILINGTRAININGTRACE); | |||||
| GE_CHECK_NOTNULL(bp_op_desc); | |||||
| bp_op_desc->SetOpKernelLibName(kEngineNameRts); | |||||
| node_ptr = compute_graph->AddNode(bp_op_desc); | |||||
| GELOGD("Create bp profiling node success after."); | |||||
| } | |||||
| // create end node | |||||
| bool is_insert_end_profiling_task = false; | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task); | |||||
| if (is_insert_end_profiling_task) { | |||||
| (void)GenerateEndProfilingTask(op_desc, task_def_list); | |||||
| auto end_desc = MakeShared<OpDesc>(kProfilingEndNode, PROFILINGTRAININGTRACE); | |||||
| GE_CHECK_NOTNULL(end_desc); | |||||
| end_desc->SetOpKernelLibName(kEngineNameRts); | |||||
| node_ptr = compute_graph->AddNode(end_desc); | |||||
| GELOGD("Create end profiling node success after."); | |||||
| } | |||||
| if (node_ptr != nullptr) { | |||||
| for (const auto &task_def : task_def_list) { | |||||
| hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); | |||||
| } | |||||
| NodeItem *node_item = nullptr; | |||||
| GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); | |||||
| node_item->input_start = 0; | |||||
| node_item->output_start = 0; | |||||
| graph_item.node_items_.emplace_back(node_item); | |||||
| } else { | |||||
| GELOGD("No need to create profiling node after."); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root_graph) { | Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root_graph) { | ||||
| GELOGD("Start to load subgraph [%s]", graph.GetName().c_str()); | GELOGD("Start to load subgraph [%s]", graph.GetName().c_str()); | ||||
| // for known partitioned call, load all nodes | // for known partitioned call, load all nodes | ||||
| @@ -1567,8 +1746,9 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root | |||||
| graph_item->output_node_ = node_item; | graph_item->output_node_ = node_item; | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildOutputMapping(*graph_item, *node_item, is_root_graph)); | GE_CHK_STATUS_RET_NOLOG(BuildOutputMapping(*graph_item, *node_item, is_root_graph)); | ||||
| } | } | ||||
| GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeBefore(*graph_item, node)); | |||||
| graph_item->node_items_.emplace_back(node_item); | graph_item->node_items_.emplace_back(node_item); | ||||
| GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeAfter(*graph_item, node)); | |||||
| // parse var outputs | // parse var outputs | ||||
| GE_CHK_STATUS_RET_NOLOG(ParseVarOutputs(*node_item)); | GE_CHK_STATUS_RET_NOLOG(ParseVarOutputs(*node_item)); | ||||
| GELOGD("NodeItem created: %s", node_item->DebugString().c_str()); | GELOGD("NodeItem created: %s", node_item->DebugString().c_str()); | ||||
| @@ -79,6 +79,12 @@ class HybridModelBuilder { | |||||
| Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item); | Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item); | ||||
| Status RecoverGraphUnknownFlag(); | Status RecoverGraphUnknownFlag(); | ||||
| Status CheckAicpuOpList(); | Status CheckAicpuOpList(); | ||||
| Status CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node); | |||||
| Status CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node); | |||||
| Status GenerateFpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | |||||
| Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | |||||
| Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | |||||
| Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list); | |||||
| const char* GetGraphName() const { | const char* GetGraphName() const { | ||||
| return hybrid_model_.model_name_.c_str(); | return hybrid_model_.model_name_.c_str(); | ||||
| @@ -18,10 +18,8 @@ | |||||
| #include "hybrid/node_executor/host_cpu/kernel_factory.h" | #include "hybrid/node_executor/host_cpu/kernel_factory.h" | ||||
| #include "graph/passes/folding_pass.h" | #include "graph/passes/folding_pass.h" | ||||
| #include "hybrid/model/hybrid_model.h" | #include "hybrid/model/hybrid_model.h" | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
| #include "graph/manager/host_mem_allocator.h" | #include "graph/manager/host_mem_allocator.h" | ||||
| #endif | |||||
| #include "ge_local_engine/engine/host_cpu_engine.h" | #include "ge_local_engine/engine/host_cpu_engine.h" | ||||
| namespace ge { | namespace ge { | ||||
| @@ -54,18 +52,11 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { | |||||
| auto input_desc_ptr = context.GetInputDesc(i); | auto input_desc_ptr = context.GetInputDesc(i); | ||||
| GE_CHECK_NOTNULL(input_desc_ptr); | GE_CHECK_NOTNULL(input_desc_ptr); | ||||
| const auto &input_desc = *input_desc_ptr; | const auto &input_desc = *input_desc_ptr; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| auto tensor = context.GetInput(i); | auto tensor = context.GetInput(i); | ||||
| GE_CHECK_NOTNULL(tensor); | GE_CHECK_NOTNULL(tensor); | ||||
| auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | ||||
| GE_CHECK_NOTNULL(item.second); | GE_CHECK_NOTNULL(item.second); | ||||
| auto in_tensor = MakeShared<GeTensor>(input_desc, item.second, item.first); | auto in_tensor = MakeShared<GeTensor>(input_desc, item.second, item.first); | ||||
| #else | |||||
| GE_CHECK_NOTNULL(context.GetInput(i)); | |||||
| auto in_tensor = MakeShared<GeTensor>(input_desc, | |||||
| reinterpret_cast<const uint8_t *>(context.GetInput(i)->GetData()), | |||||
| context.GetInput(i)->GetSize()); | |||||
| #endif | |||||
| GE_CHECK_NOTNULL(in_tensor); | GE_CHECK_NOTNULL(in_tensor); | ||||
| in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); | in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); | ||||
| in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape()); | in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape()); | ||||
| @@ -84,15 +75,9 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { | |||||
| } | } | ||||
| auto tensor = context.GetOutput(i); | auto tensor = context.GetOutput(i); | ||||
| GE_CHECK_NOTNULL(tensor); | GE_CHECK_NOTNULL(tensor); | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | ||||
| GE_CHECK_NOTNULL(item.second); | GE_CHECK_NOTNULL(item.second); | ||||
| auto out_tensor = MakeShared<GeTensor>(output_desc, item.second, item.first); | auto out_tensor = MakeShared<GeTensor>(output_desc, item.second, item.first); | ||||
| #else | |||||
| auto out_tensor = MakeShared<GeTensor>(output_desc, | |||||
| reinterpret_cast<const uint8_t *>(tensor->GetData()), | |||||
| tensor->GetSize()); | |||||
| #endif | |||||
| GE_CHECK_NOTNULL(out_tensor); | GE_CHECK_NOTNULL(out_tensor); | ||||
| out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); | out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); | ||||
| out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); | out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include "common/debug/log.h" | #include "common/debug/log.h" | ||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
| #include "hybrid/model/hybrid_model.h" | |||||
| #include "runtime/rt.h" | #include "runtime/rt.h" | ||||
| namespace ge { | namespace ge { | ||||
| @@ -79,12 +80,44 @@ Status IdentityNNodeTask::ExecuteAsync(TaskContext &context, std::function<void( | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status ProfilingTraceNodeTask::UpdateArgs(TaskContext &context) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ProfilingTraceNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) { | |||||
| for (const auto &task_def : task_defs_) { | |||||
| auto log_time_stamp_def = task_def.log_timestamp(); | |||||
| uint64_t log_id = log_time_stamp_def.logid(); | |||||
| bool notify = log_time_stamp_def.notify(); | |||||
| uint32_t flat = log_time_stamp_def.flat(); | |||||
| GELOGD("ProfilingTraceTask execute async start. logid = %lu, notify = %d.", log_id, notify); | |||||
| rtError_t rt_ret = rtProfilerTrace(log_id, notify, flat, context.GetStream()); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| GELOGD("[%s] ProfilingTraceTask[%lu] execute success.", context.GetNodeName(), log_id); | |||||
| } | |||||
| return SUCCESS; | |||||
| }; | |||||
| Status RtsNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const { | Status RtsNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const { | ||||
| GE_CHECK_NOTNULL(node); | |||||
| auto op_type = node->GetType(); | auto op_type = node->GetType(); | ||||
| if (op_type == IDENTITY) { | if (op_type == IDENTITY) { | ||||
| task = MakeShared<IdentityNodeTask>(); | task = MakeShared<IdentityNodeTask>(); | ||||
| } else if (op_type == IDENTITYN) { | } else if (op_type == IDENTITYN) { | ||||
| task = MakeShared<IdentityNNodeTask>(); | task = MakeShared<IdentityNNodeTask>(); | ||||
| } else if (op_type == PROFILINGTRAININGTRACE) { | |||||
| auto *task_defs = model.GetTaskDefs(node); | |||||
| if (task_defs == nullptr || task_defs->empty()) { | |||||
| GELOGE(INTERNAL_ERROR, "Profiling node has no task to execute."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| task = MakeShared<ProfilingTraceNodeTask>(*task_defs); | |||||
| } else { | } else { | ||||
| GELOGE(INTERNAL_ERROR, "[%s] Unsupported RTS op type: %s", node->GetName().c_str(), op_type.c_str()); | GELOGE(INTERNAL_ERROR, "[%s] Unsupported RTS op type: %s", node->GetName().c_str(), op_type.c_str()); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -18,6 +18,7 @@ | |||||
| #define GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_EXECUTOR_H_ | #define GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_EXECUTOR_H_ | ||||
| #include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
| #include "proto/task.pb.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -35,6 +36,18 @@ class IdentityNNodeTask : public IdentityNodeTask { | |||||
| Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | ||||
| }; | }; | ||||
| class ProfilingTraceNodeTask : public NodeTask { | |||||
| public: | |||||
| explicit ProfilingTraceNodeTask(const std::vector<domi::TaskDef> &task_defs) : task_defs_(task_defs) {} | |||||
| ~ProfilingTraceNodeTask() override = default; | |||||
| Status UpdateArgs(TaskContext &context) override; | |||||
| Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | |||||
| private: | |||||
| std::vector<domi::TaskDef> task_defs_; | |||||
| }; | |||||
| class RtsNodeExecutor : public NodeExecutor { | class RtsNodeExecutor : public NodeExecutor { | ||||
| public: | public: | ||||
| Status LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const override; | Status LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const override; | ||||
| @@ -123,7 +123,7 @@ class TaskContext { | |||||
| Status status_ = SUCCESS; | Status status_ = SUCCESS; | ||||
| std::vector<void *> workspaces_; | std::vector<void *> workspaces_; | ||||
| uint64_t iteration_ = 0; | uint64_t iteration_ = 0; | ||||
| uint32_t task_id_= 0; | |||||
| uint32_t task_id_ = 0; | |||||
| uint32_t stream_id_ = 0; | uint32_t stream_id_ = 0; | ||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| @@ -36,6 +36,9 @@ | |||||
| #include "model/ge_model.h" | #include "model/ge_model.h" | ||||
| #include "graph/shape_refiner.h" | #include "graph/shape_refiner.h" | ||||
| #include "graph/opsproto_manager.h" | #include "graph/opsproto_manager.h" | ||||
| #include "inc/pass_manager.h" | |||||
| #include "graph/passes/net_output_pass.h" | |||||
| #include "graph/passes/data_pass.h" | |||||
| using std::string; | using std::string; | ||||
| using namespace std; | using namespace std; | ||||
| @@ -233,6 +236,7 @@ class Impl { | |||||
| ModelBufferData &ge_models); | ModelBufferData &ge_models); | ||||
| graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, | graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, | ||||
| bool is_dynamic_input); | bool is_dynamic_input); | ||||
| static graphStatus InferShapePrepare(const ComputeGraphPtr &compute_graph); | |||||
| void SetRtSocVersion(); | void SetRtSocVersion(); | ||||
| void UpdateThreadContext(); | void UpdateThreadContext(); | ||||
| void LoadOpsProto(); | void LoadOpsProto(); | ||||
| @@ -243,6 +247,22 @@ class Impl { | |||||
| OmgContext omg_context_; | OmgContext omg_context_; | ||||
| }; | }; | ||||
| graphStatus Impl::InferShapePrepare(const ComputeGraphPtr &compute_graph) { | |||||
| GE_CHECK_NOTNULL(compute_graph); | |||||
| PassManager prepare_infershape; | |||||
| prepare_infershape.AddPass("PrepareNetoutput", new(std::nothrow) NetOutputPass); | |||||
| prepare_infershape.AddPass("PrepareSubGraphReflection", new (std::nothrow) DataPass); | |||||
| auto ret = prepare_infershape.Run(compute_graph); | |||||
| if ((ret != SUCCESS) && (ret != NOT_CHANGED)) { | |||||
| GELOGE(ret, "Prepair for infershape failed, ret:%d", ret); | |||||
| return ret; | |||||
| } | |||||
| GELOGD("Prepair for infershape success!"); | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { | graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { | ||||
| GELOGD("Enter Update Data Attr Process!"); | GELOGD("Enter Update Data Attr Process!"); | ||||
| if (options_.find(kInputShape) == options_.end()) { | if (options_.find(kInputShape) == options_.end()) { | ||||
| @@ -591,7 +611,12 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph) { | |||||
| return GRAPH_PARAM_INVALID; | return GRAPH_PARAM_INVALID; | ||||
| } | } | ||||
| auto ret = compute_graph->TopologicalSorting(); | |||||
| auto ret = Impl::InferShapePrepare(compute_graph); | |||||
| if (ret != GRAPH_SUCCESS) { | |||||
| return ret; | |||||
| } | |||||
| ret = compute_graph->TopologicalSorting(); | |||||
| if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
| GELOGE(ret, "Acl topo logical sort failed."); | GELOGE(ret, "Acl topo logical sort failed."); | ||||
| return ret; | return ret; | ||||
| @@ -30,7 +30,6 @@ target_compile_definitions(atc PRIVATE | |||||
| COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
| google=ascend_private | google=ascend_private | ||||
| LOG_CPP | LOG_CPP | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(atc PRIVATE | target_include_directories(atc PRIVATE | ||||
| @@ -93,7 +92,6 @@ target_compile_definitions(atc_atc.bin PRIVATE | |||||
| COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
| google=ascend_private | google=ascend_private | ||||
| LOG_CPP | LOG_CPP | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(atc_atc.bin PRIVATE | target_include_directories(atc_atc.bin PRIVATE | ||||
| @@ -154,7 +152,6 @@ target_compile_options(fwk_atc.bin PRIVATE | |||||
| -O2 | -O2 | ||||
| -Wno-deprecated-declarations | -Wno-deprecated-declarations | ||||
| -fno-common | -fno-common | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_definitions(fwk_atc.bin PRIVATE | target_compile_definitions(fwk_atc.bin PRIVATE | ||||
| @@ -42,21 +42,29 @@ bool IsOriginalOpFind(OpDescPtr &op_desc, const std::string &op_name) { | |||||
| } | } | ||||
| void KeepDtypeReportError(const std::vector<std::string> &invalid_list) { | void KeepDtypeReportError(const std::vector<std::string> &invalid_list) { | ||||
| std::stringstream error_ops; | |||||
| for (size_t i = 0; i < invalid_list.size(); i++) { | |||||
| std::stringstream err_msg; | |||||
| size_t list_size = invalid_list.size(); | |||||
| err_msg << "config file contains " << list_size; | |||||
| if (list_size == 1) { | |||||
| err_msg << " operator not in the graph, op name:"; | |||||
| } else { | |||||
| err_msg << " operators not in the graph, op names:"; | |||||
| } | |||||
| for (size_t i = 0; i < list_size; i++) { | |||||
| if (i == kMaxOpsNum) { | if (i == kMaxOpsNum) { | ||||
| error_ops << "..."; | |||||
| err_msg << ".."; | |||||
| break; | break; | ||||
| } | } | ||||
| error_ops << invalid_list[i] << " "; | |||||
| err_msg << invalid_list[i]; | |||||
| if (i != list_size - 1) { | |||||
| err_msg << " "; | |||||
| } | |||||
| } | } | ||||
| std::string err_msg = "config file contains "; | |||||
| err_msg = err_msg.append(std::to_string(invalid_list.size())) | |||||
| .append(" operators not in the graph, op names:") | |||||
| .append(error_ops.str()); | |||||
| ErrorManager::GetInstance().ATCReportErrMessage( | ErrorManager::GetInstance().ATCReportErrMessage( | ||||
| "E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.c_str()}); | |||||
| GELOGE(FAILED, "%s", err_msg.c_str()); | |||||
| "E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.str().c_str()}); | |||||
| GELOGE(FAILED, "%s", err_msg.str().c_str()); | |||||
| } | } | ||||
| Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep_dtype) { | Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep_dtype) { | ||||
| @@ -96,6 +104,7 @@ Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep | |||||
| invalid_list.push_back(op_name); | invalid_list.push_back(op_name); | ||||
| } | } | ||||
| } | } | ||||
| ifs.close(); | |||||
| if (!invalid_list.empty()) { | if (!invalid_list.empty()) { | ||||
| KeepDtypeReportError(invalid_list); | KeepDtypeReportError(invalid_list); | ||||
| @@ -994,6 +994,8 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output | |||||
| Status ret = ge::DealKeepDtypeOption(ge::GraphUtils::GetComputeGraph(graph), FLAGS_keep_dtype); | Status ret = ge::DealKeepDtypeOption(ge::GraphUtils::GetComputeGraph(graph), FLAGS_keep_dtype); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| (void)ge_generator.Finalize(); | |||||
| (void)ge::GELib::GetInstance()->Finalize(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -14,7 +14,6 @@ target_compile_options(engine PRIVATE | |||||
| target_compile_definitions(engine PRIVATE | target_compile_definitions(engine PRIVATE | ||||
| REUSE_MEMORY=1 | REUSE_MEMORY=1 | ||||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(engine PRIVATE | target_include_directories(engine PRIVATE | ||||
| @@ -477,6 +477,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||||
| single_op.num_inputs_ = data_ops_.size(); | single_op.num_inputs_ = data_ops_.size(); | ||||
| single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | ||||
| GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | ||||
| model_params_.memory_size = UINT_MAX; | |||||
| return BuildTaskListForDynamicOp(single_op); | return BuildTaskListForDynamicOp(single_op); | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -61,6 +61,11 @@ const char *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag"; | |||||
| const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic"; | const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic"; | ||||
| const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory"; | const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory"; | ||||
| const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization"; | const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization"; | ||||
| // Dynamic input flag. ge.exec.dynamicInput=1, means enable dynaimc input, | |||||
| // ge.exec.dynamicGraphExecuteMode, dynamic_execute[default] | |||||
| const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; | |||||
| const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; | |||||
| const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; | |||||
| // Option key: memory init | // Option key: memory init | ||||
| const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; | const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; | ||||
| @@ -37,10 +37,7 @@ enum FrameworkType { | |||||
| MINDSPORE = 1, | MINDSPORE = 1, | ||||
| TENSORFLOW = 3, | TENSORFLOW = 3, | ||||
| ANDROID_NN, | ANDROID_NN, | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| ONNX, | ONNX, | ||||
| #endif | |||||
| FRAMEWORK_RESERVED, | |||||
| }; | }; | ||||
| enum OpEngineType { | enum OpEngineType { | ||||
| @@ -73,14 +70,15 @@ struct DataBuffer { | |||||
| /// @brief External input data | /// @brief External input data | ||||
| /// | /// | ||||
| struct InputData { | struct InputData { | ||||
| uint32_t index; // Index of input data | |||||
| uint32_t timestamp; // Data creation time | |||||
| uint32_t timeout; // Processing timeout | |||||
| uint32_t model_id; // Model ID required for data processing | |||||
| uint64_t request_id = 0; // Request ID | |||||
| std::vector<DataBuffer> blobs; // Actual input data, currently only supports one input | |||||
| bool is_dynamic_batch = false; // Whether is dynamic batch size scene, default:false | |||||
| std::string batch_label; // Gear used for current inference in dynamic batch scene | |||||
| uint32_t index; // Index of input data | |||||
| uint32_t timestamp; // Data creation time | |||||
| uint32_t timeout; // Processing timeout | |||||
| uint32_t model_id; // Model ID required for data processing | |||||
| uint64_t request_id = 0; // Request ID | |||||
| std::vector<DataBuffer> blobs; // Actual input data, currently only supports one input | |||||
| bool is_dynamic_batch = false; // Whether is dynamic batch size scene, default:false | |||||
| std::string batch_label; // Gear used for current inference in dynamic batch scene | |||||
| std::vector<std::vector<int64_t>> shapes; // Input shapes | |||||
| }; | }; | ||||
| /// Output result structure definition | /// Output result structure definition | ||||
| @@ -263,6 +261,8 @@ struct ComputeGraphDescInfo { | |||||
| std::vector<Format> output_format; | std::vector<Format> output_format; | ||||
| std::vector<std::vector<int64_t>> output_shape; | std::vector<std::vector<int64_t>> output_shape; | ||||
| std::vector<DataType> output_data_type; | std::vector<DataType> output_data_type; | ||||
| uint32_t task_id; | |||||
| uint32_t stream_id; | |||||
| }; | }; | ||||
| struct OpDescInfo { | struct OpDescInfo { | ||||
| @@ -529,6 +529,9 @@ REGISTER_OPTYPE_DECLARE(HVDWAIT, "HorovodWait"); | |||||
| // aicpu op for online_infer dynamic_dims | // aicpu op for online_infer dynamic_dims | ||||
| REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims"); | REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims"); | ||||
| // profiling training trace node | |||||
| REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace"); | |||||
| enum InputMode { INPUT = 0, CONST_INPUT }; | enum InputMode { INPUT = 0, CONST_INPUT }; | ||||
| // Definition of the processing status enum of the process module | // Definition of the processing status enum of the process module | ||||
| @@ -157,9 +157,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
| ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | ||||
| ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | |||||
| std::vector<ge::TensorDesc> &output_desc); | |||||
| ge::Status CommandHandle(const ge::Command &command); | ge::Status CommandHandle(const ge::Command &command); | ||||
| ge::Status SetDump(const DumpConfig &dump_config); | ge::Status SetDump(const DumpConfig &dump_config); | ||||
| @@ -26,6 +26,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "framework/common/fmk_error_codes.h" | #include "framework/common/fmk_error_codes.h" | ||||
| #include "register/register_fmk_types.h" | #include "register/register_fmk_types.h" | ||||
| #include "graph/node.h" | |||||
| using domi::DOMI_TENSOR_ND; | using domi::DOMI_TENSOR_ND; | ||||
| using domi::DOMI_TENSOR_RESERVED; | using domi::DOMI_TENSOR_RESERVED; | ||||
| @@ -120,6 +121,8 @@ struct OmgContext { | |||||
| std::vector<std::vector<int64_t>> user_real_input_dims; | std::vector<std::vector<int64_t>> user_real_input_dims; | ||||
| std::vector<int64_t> cur_dynamic_dims; | std::vector<int64_t> cur_dynamic_dims; | ||||
| bool need_multi_batch = false; | bool need_multi_batch = false; | ||||
| std::vector<NodePtr> data_nodes; | |||||
| std::vector<NodePtr> getnext_nosink_nodes; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -65,7 +65,6 @@ class ModelParser { | |||||
| */ | */ | ||||
| virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; | virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| /** | /** | ||||
| * @ingroup domi_omg | * @ingroup domi_omg | ||||
| * @brief Parse relevant data from memory and save it to graph | * @brief Parse relevant data from memory and save it to graph | ||||
| @@ -77,7 +76,6 @@ class ModelParser { | |||||
| * @author | * @author | ||||
| */ | */ | ||||
| virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; | virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; | ||||
| #endif | |||||
| /** | /** | ||||
| * @ingroup domi_omg | * @ingroup domi_omg | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit 11c6cf2921b6a385616a3ebc601b4431b55b07db | |||||
| Subproject commit f08320a6d699f5b537bf66da572bf225b9cd330e | |||||
| @@ -1 +1 @@ | |||||
| Subproject commit 99437c39d26624a14060307366a96b79b1d439c3 | |||||
| Subproject commit b2df31dc5810283e2e483df5ba9517e2ece132a0 | |||||
| @@ -46,6 +46,7 @@ set(SRCS | |||||
| "${GE_CODE_DIR}/metadef/graph/anchor.cc" | "${GE_CODE_DIR}/metadef/graph/anchor.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" | "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/buffer.cc" | "${GE_CODE_DIR}/metadef/graph/buffer.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" | |||||
| "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" | "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/graph.cc" | "${GE_CODE_DIR}/metadef/graph/graph.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/model.cc" | "${GE_CODE_DIR}/metadef/graph/model.cc" | ||||
| @@ -67,6 +67,7 @@ set(SRC_FILES | |||||
| "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" | "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/attr_value.cc" | "${GE_CODE_DIR}/metadef/graph/attr_value.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/buffer.cc" | "${GE_CODE_DIR}/metadef/graph/buffer.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" | |||||
| "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" | "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/ge_attr_define.cc" | "${GE_CODE_DIR}/metadef/graph/ge_attr_define.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/graph.cc" | "${GE_CODE_DIR}/metadef/graph/graph.cc" | ||||
| @@ -110,7 +111,6 @@ target_compile_options(ut_libgraph PRIVATE | |||||
| target_compile_definitions(ut_libgraph PRIVATE | target_compile_definitions(ut_libgraph PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ut_libgraph | target_link_libraries(ut_libgraph | ||||
| @@ -230,7 +230,7 @@ TEST_F(UtestGeTensor, test_tensor_invalid_null) { | |||||
| GeTensor tensor(msg_owner, nullptr); | GeTensor tensor(msg_owner, nullptr); | ||||
| EXPECT_EQ(tensor.GetData().size(), 0); | EXPECT_EQ(tensor.GetData().size(), 0); | ||||
| EXPECT_EQ(tensor.MutableData().size(), 0); | EXPECT_EQ(tensor.MutableData().size(), 0); | ||||
| EXPECT_EQ(tensor.SetData(Buffer(100)), ge::GRAPH_PARAM_INVALID); | |||||
| EXPECT_EQ(tensor.SetData(Buffer(100)), GRAPH_SUCCESS); | |||||
| TensorUtils::SetWeightSize(tensor.MutableTensorDesc(), 100); | TensorUtils::SetWeightSize(tensor.MutableTensorDesc(), 100); | ||||
| EXPECT_EQ(TensorUtils::GetWeightSize(tensor), 0); | EXPECT_EQ(TensorUtils::GetWeightSize(tensor), 0); | ||||
| @@ -89,6 +89,7 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" | "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/attr_value.cc" | "${GE_CODE_DIR}/metadef/graph/attr_value.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/buffer.cc" | "${GE_CODE_DIR}/metadef/graph/buffer.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" | |||||
| "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" | "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/graph.cc" | "${GE_CODE_DIR}/metadef/graph/graph.cc" | ||||
| "${GE_CODE_DIR}/metadef/graph/gnode.cc" | "${GE_CODE_DIR}/metadef/graph/gnode.cc" | ||||
| @@ -121,6 +122,7 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/metadef/graph/opsproto/opsproto_manager.cc" | "${GE_CODE_DIR}/metadef/graph/opsproto/opsproto_manager.cc" | ||||
| "${GE_CODE_DIR}/metadef/ops/op_imp.cpp" | "${GE_CODE_DIR}/metadef/ops/op_imp.cpp" | ||||
| "${GE_CODE_DIR}/metadef/register/register.cpp" | "${GE_CODE_DIR}/metadef/register/register.cpp" | ||||
| "${GE_CODE_DIR}/metadef/register/register_pass.cpp" | |||||
| "${GE_CODE_DIR}/metadef/register/op_kernel_registry.cpp" | "${GE_CODE_DIR}/metadef/register/op_kernel_registry.cpp" | ||||
| "${GE_CODE_DIR}/metadef/register/auto_mapping_util.cpp" | "${GE_CODE_DIR}/metadef/register/auto_mapping_util.cpp" | ||||
| "${GE_CODE_DIR}/metadef/register/tensor_assign.cpp" | "${GE_CODE_DIR}/metadef/register/tensor_assign.cpp" | ||||
| @@ -226,6 +228,7 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/graph/passes/for_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/for_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/enter_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/enter_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/assign_remove_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/assign_remove_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/inplace_support_check_pass.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/passes/addn_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/addn_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/common_subexpression_elimination_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/common_subexpression_elimination_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/transop_symmetry_elimination_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/transop_symmetry_elimination_pass.cc" | ||||
| @@ -302,6 +305,7 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/graph/common/local_context.cc" | "${GE_CODE_DIR}/ge/graph/common/local_context.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" | "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" | "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" | |||||
| "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" | "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" | ||||
| "${GE_CODE_DIR}/ge/common/model_saver.cc" | "${GE_CODE_DIR}/ge/common/model_saver.cc" | ||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" | "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" | ||||
| @@ -369,6 +373,7 @@ set(GRAPH_LOAD_COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" | "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" | "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" | "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" | |||||
| "${GE_CODE_DIR}/ge/common/thread_pool.cc" | "${GE_CODE_DIR}/ge/common/thread_pool.cc" | ||||
| ) | ) | ||||
| @@ -569,6 +574,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES | |||||
| #"graph/load/new_model_manager_davinci_model_unittest.cc" | #"graph/load/new_model_manager_davinci_model_unittest.cc" | ||||
| #"graph/load/new_model_manager_model_manager_unittest.cc" | #"graph/load/new_model_manager_model_manager_unittest.cc" | ||||
| #"graph/load/new_model_manager_task_build_unittest.cc" | #"graph/load/new_model_manager_task_build_unittest.cc" | ||||
| "graph/load/new_model_manager_model_manager_aicpu_unittest.cc" | |||||
| "graph/load/end_graph_task_unittest.cc" | "graph/load/end_graph_task_unittest.cc" | ||||
| "graph/load/new_model_manager_event_manager_unittest.cc" | "graph/load/new_model_manager_event_manager_unittest.cc" | ||||
| #"graph/load/output_net_output_unittest.cc" | #"graph/load/output_net_output_unittest.cc" | ||||
| @@ -625,6 +631,7 @@ set(PASS_TEST_FILES | |||||
| "graph/passes/net_output_pass_unittest.cc" | "graph/passes/net_output_pass_unittest.cc" | ||||
| "graph/passes/no_use_reshape_remove_pass_unittest.cc" | "graph/passes/no_use_reshape_remove_pass_unittest.cc" | ||||
| "graph/passes/infershape_pass_unittest.cc" | "graph/passes/infershape_pass_unittest.cc" | ||||
| "graph/passes/multi_batch_clone_pass_unittest.cc" | |||||
| ) | ) | ||||
| set(KERNEL_TEST_FILES | set(KERNEL_TEST_FILES | ||||
| @@ -720,7 +727,6 @@ add_library(ge_ut_common STATIC ${COMMON_SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) | |||||
| target_compile_definitions(ge_ut_common PRIVATE | target_compile_definitions(ge_ut_common PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ge_ut_common PRIVATE | target_link_libraries(ge_ut_common PRIVATE | ||||
| @@ -735,7 +741,6 @@ add_library(ge_ut_common_format STATIC ${COMMON_SRC_FILES} ${COMMON_FORMAT_SRC_F | |||||
| target_compile_definitions(ge_ut_common_format PRIVATE | target_compile_definitions(ge_ut_common_format PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ge_ut_common_format PRIVATE | target_link_libraries(ge_ut_common_format PRIVATE | ||||
| @@ -792,7 +797,6 @@ add_library(ge_load_common STATIC ${GRAPH_LOAD_COMMON_SRC_FILES} ${PROTO_SRCS} $ | |||||
| target_compile_definitions(ge_load_common PRIVATE | target_compile_definitions(ge_load_common PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ge_load_common PRIVATE | target_link_libraries(ge_load_common PRIVATE | ||||
| @@ -807,7 +811,6 @@ add_library(ge_execute_common STATIC ${GRAPH_EXECUTE_COMMON_SRC_FILES} ${PROTO_S | |||||
| target_compile_definitions(ge_execute_common PRIVATE | target_compile_definitions(ge_execute_common PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ge_execute_common PRIVATE | target_link_libraries(ge_execute_common PRIVATE | ||||
| @@ -822,7 +825,6 @@ add_library(ge_build_common STATIC ${GRAPH_BUILD_COMMON_SRC_FILES} ${PROTO_SRCS} | |||||
| target_compile_definitions(ge_build_common PRIVATE | target_compile_definitions(ge_build_common PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ge_build_common PRIVATE | target_link_libraries(ge_build_common PRIVATE | ||||
| @@ -895,10 +897,6 @@ target_compile_options(ut_libge_others_utest PRIVATE | |||||
| -g --coverage -fprofile-arcs -ftest-coverage | -g --coverage -fprofile-arcs -ftest-coverage | ||||
| ) | ) | ||||
| target_compile_definitions(ut_libge_others_utest PRIVATE | |||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | |||||
| target_link_libraries(ut_libge_others_utest | target_link_libraries(ut_libge_others_utest | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| ge_load_common ge_execute_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov | ge_load_common ge_execute_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov | ||||
| @@ -916,10 +914,6 @@ target_compile_options(ut_libge_kernel_utest PRIVATE | |||||
| -g --coverage -fprofile-arcs -ftest-coverage | -g --coverage -fprofile-arcs -ftest-coverage | ||||
| ) | ) | ||||
| target_compile_definitions(ut_libge_kernel_utest PRIVATE | |||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | |||||
| target_link_libraries(ut_libge_kernel_utest | target_link_libraries(ut_libge_kernel_utest | ||||
| $<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
| ge_load_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov | ge_load_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov | ||||
| @@ -940,7 +934,6 @@ target_compile_options(ut_libge_distinct_load_utest PRIVATE | |||||
| target_compile_definitions(ut_libge_distinct_load_utest PRIVATE | target_compile_definitions(ut_libge_distinct_load_utest PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ut_libge_distinct_load_utest | target_link_libraries(ut_libge_distinct_load_utest | ||||
| @@ -32,6 +32,18 @@ class UtestDavinciModel : public testing::Test { | |||||
| void SetUp() {} | void SetUp() {} | ||||
| void TearDown() {} | void TearDown() {} | ||||
| public: | |||||
| NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { | |||||
| GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| auto op_desc = std::make_shared<OpDesc>(name, type); | |||||
| for (auto i = 0; i < in_num; ++i) { | |||||
| op_desc->AddInputDesc(test_desc); | |||||
| } | |||||
| for (auto i = 0; i < out_num; ++i) { | |||||
| op_desc->AddOutputDesc(test_desc); | |||||
| } | |||||
| return graph->AddNode(op_desc); | |||||
| } | |||||
| }; | }; | ||||
| TEST_F(UtestDavinciModel, init_success) { | TEST_F(UtestDavinciModel, init_success) { | ||||
| @@ -111,6 +123,12 @@ TEST_F(UtestDavinciModel, init_success) { | |||||
| EXPECT_EQ(model.output_addrs_list_.size(), 1); | EXPECT_EQ(model.output_addrs_list_.size(), 1); | ||||
| EXPECT_EQ(model.task_list_.size(), 2); | EXPECT_EQ(model.task_list_.size(), 2); | ||||
| OutputData output_data; | |||||
| vector<OutputTensorInfo> outputs; | |||||
| EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS); | |||||
| EXPECT_EQ(output_data.blobs.size(), 1); | |||||
| EXPECT_EQ(outputs.size(), 1); | |||||
| ProfilingManager::Instance().is_load_profiling_ = false; | ProfilingManager::Instance().is_load_profiling_ = false; | ||||
| } | } | ||||
| @@ -121,13 +139,14 @@ TEST_F(UtestDavinciModel, init_data_op) { | |||||
| model.runtime_param_.mem_size = 5120000; | model.runtime_param_.mem_size = 5120000; | ||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | ||||
| OpDescPtr op_input = CreateOpDesc("data", DATA); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | ||||
| TensorUtils::SetSize(tensor, 512); | TensorUtils::SetSize(tensor, 512); | ||||
| OpDescPtr op_input = CreateOpDesc("data", DATA); | |||||
| op_input->AddInputDesc(tensor); | op_input->AddInputDesc(tensor); | ||||
| op_input->AddOutputDesc(tensor); | op_input->AddOutputDesc(tensor); | ||||
| op_input->SetInputOffset({1024}); | op_input->SetInputOffset({1024}); | ||||
| op_input->SetOutputOffset({5120}); | |||||
| op_input->SetOutputOffset({1024}); | |||||
| NodePtr node_input = graph->AddNode(op_input); | NodePtr node_input = graph->AddNode(op_input); | ||||
| OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | ||||
| @@ -150,12 +169,14 @@ TEST_F(UtestDavinciModel, init_data_op_subgraph) { | |||||
| model.runtime_param_.mem_size = 5120000; | model.runtime_param_.mem_size = 5120000; | ||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | ||||
| OpDescPtr op_input = CreateOpDesc("data", DATA); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | ||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr op_input = CreateOpDesc("data", DATA); | |||||
| op_input->AddInputDesc(tensor); | op_input->AddInputDesc(tensor); | ||||
| op_input->AddOutputDesc(tensor); | op_input->AddOutputDesc(tensor); | ||||
| op_input->SetInputOffset({1024}); | op_input->SetInputOffset({1024}); | ||||
| op_input->SetOutputOffset({5120}); | |||||
| op_input->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_input); | NodePtr node = graph->AddNode(op_input); | ||||
| uint32_t data_op_index = 0; | uint32_t data_op_index = 0; | ||||
| @@ -174,8 +195,10 @@ TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) { | |||||
| model.runtime_param_.mem_size = 5120000; | model.runtime_param_.mem_size = 5120000; | ||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | ||||
| OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | ||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | |||||
| op_output->AddInputDesc(tensor); | op_output->AddInputDesc(tensor); | ||||
| op_output->SetInputOffset({1024}); | op_output->SetInputOffset({1024}); | ||||
| op_output->SetSrcName( { "data" } ); | op_output->SetSrcName( { "data" } ); | ||||
| @@ -282,4 +305,458 @@ TEST_F(UtestDavinciModel, init_unknown) { | |||||
| const vector<void *> outputs = { &virtual_addr }; | const vector<void *> outputs = { &virtual_addr }; | ||||
| EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); | EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); | ||||
| } | } | ||||
| TEST_F(UtestDavinciModel, ReturnNoOutput_test) { | |||||
| DavinciModel model(0, nullptr); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr var1 = CreateOpDesc("var1", VARIABLE); | |||||
| var1->AddInputDesc(tensor); | |||||
| var1->AddOutputDesc(tensor); | |||||
| var1->SetInputOffset({1024}); | |||||
| var1->SetOutputOffset({1024}); | |||||
| model.variable_op_list_.push_back(var1); | |||||
| EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, SyncVarData_test) { | |||||
| DavinciModel model(0, nullptr); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr var1 = CreateOpDesc("var1", VARIABLE); | |||||
| var1->AddInputDesc(tensor); | |||||
| var1->AddOutputDesc(tensor); | |||||
| var1->SetInputOffset({1024}); | |||||
| var1->SetOutputOffset({1024}); | |||||
| model.variable_op_list_.push_back(var1); | |||||
| EXPECT_NE(model.SyncVarData(), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ1) { | |||||
| DavinciModel model(0, nullptr); | |||||
| model.ge_model_ = make_shared<GeModel>(); | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| OpDescPtr op_output = CreateOpDesc("output_ascend_mbatch_batch_1", NETOUTPUT); | |||||
| op_output->AddInputDesc(tensor); | |||||
| op_output->SetInputOffset({1024}); | |||||
| NodePtr node_output = graph->AddNode(op_output); | |||||
| EXPECT_EQ(model.InitRealSizeAndShapeInfo(graph, node_output), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ2) { | |||||
| DavinciModel model(0, nullptr); | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph"); | |||||
| OpDescPtr data1 = CreateOpDesc("data1", DATA); | |||||
| GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT); | |||||
| data1->AddInputDesc(shape_desc); | |||||
| data1->AddOutputDesc(shape_desc); | |||||
| NodePtr data1_node = graph->AddNode(data1); | |||||
| OpDescPtr case_node = CreateOpDesc("case1", CASE); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| case_node->AddInputDesc(tensor); | |||||
| case_node->AddOutputDesc(tensor); | |||||
| NodePtr case1_node = graph->AddNode(case_node); | |||||
| OpDescPtr output = CreateOpDesc("output1", NETOUTPUT); | |||||
| output->AddInputDesc(tensor); | |||||
| output->SetSrcName( { "case1" } ); | |||||
| output->SetSrcIndex( { 0 } ); | |||||
| NodePtr output_node = graph->AddNode(output); | |||||
| GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), case1_node->GetInDataAnchor(0)); | |||||
| GraphUtils::AddEdge(case1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | |||||
| (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1;2;4;8"); | |||||
| (void)AttrUtils::SetBool(case_node, ATTR_INSERT_BY_MBATCH, true); | |||||
| model.is_getnext_sink_dynamic_ = false; | |||||
| model.is_online_infer_dynamic_ = true; | |||||
| auto ret = model.InitRealSizeAndShapeInfo(graph, output_node); | |||||
| // GetGearAndRealOutShapeInfo without ATTR_NAME_DYNAMIC_OUTPUT_DIMS | |||||
| EXPECT_EQ(ret, SUCCESS); | |||||
| vector<string> dynamic_output_dims = {"0,0,1,1,0,2,2,0,4,3,0,8"}; | |||||
| (void)AttrUtils::SetListStr(output_node->GetOpDesc(), ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_dims); | |||||
| ret = model.InitRealSizeAndShapeInfo(graph, output_node); | |||||
| EXPECT_EQ(ret, SUCCESS); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ3) { | |||||
| DavinciModel model(0, nullptr); | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph"); | |||||
| OpDescPtr data1 = CreateOpDesc("data1", DATA); | |||||
| GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT); | |||||
| data1->AddInputDesc(shape_desc); | |||||
| data1->AddOutputDesc(shape_desc); | |||||
| NodePtr data1_node = graph->AddNode(data1); | |||||
| OpDescPtr shape_node = CreateOpDesc("ascend_mbatch_get_dynamic_dims_node", GETDYNAMICDIMS); | |||||
| GeTensorDesc in_tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| GeTensorDesc out_tensor(GeShape({4,3}), FORMAT_NCHW, DT_FLOAT); | |||||
| shape_node->AddInputDesc(in_tensor); | |||||
| shape_node->AddOutputDesc(out_tensor); | |||||
| NodePtr get_dynamic_dims_node = graph->AddNode(shape_node); | |||||
| OpDescPtr output = CreateOpDesc("output1", NETOUTPUT); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| output->AddInputDesc(tensor); | |||||
| output->SetSrcName( { "data1", "ascend_mbatch_get_dynamic_dims_node" } ); | |||||
| output->SetSrcIndex( { 0, 1 } ); | |||||
| NodePtr output_node = graph->AddNode(output); | |||||
| GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | |||||
| GraphUtils::AddEdge(get_dynamic_dims_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(1)); | |||||
| (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1,3;;4,3;,3"); | |||||
| model.is_getnext_sink_dynamic_ = true; | |||||
| model.is_online_infer_dynamic_ = false; | |||||
| auto ret = model.InitRealSizeAndShapeInfo(graph, output_node); | |||||
| EXPECT_EQ(ret, SUCCESS); | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 4; | |||||
| ret = model.InitRealSizeAndShapeInfo(graph, output_node); | |||||
| EXPECT_EQ(ret, SUCCESS); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, init_data_aipp_info) { | |||||
| DavinciModel model(0, nullptr); | |||||
| model.ge_model_ = make_shared<GeModel>(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 5120000; | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr op_desc = CreateOpDesc("data", DATA); | |||||
| op_desc->AddInputDesc(tensor); | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| op_desc->SetInputOffset({1024}); | |||||
| op_desc->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_desc); | |||||
| GeAttrValue::NAMED_ATTRS aipp_attr; | |||||
| aipp_attr.SetAttr("aipp_mode", GeAttrValue::CreateFrom<GeAttrValue::INT>(domi::AippOpParams::dynamic)); | |||||
| aipp_attr.SetAttr("related_input_rank", GeAttrValue::CreateFrom<GeAttrValue::INT>(0)); | |||||
| aipp_attr.SetAttr("max_src_image_size", GeAttrValue::CreateFrom<GeAttrValue::INT>(2048)); | |||||
| aipp_attr.SetAttr("support_rotation", GeAttrValue::CreateFrom<GeAttrValue::INT>(1)); | |||||
| EXPECT_TRUE(AttrUtils::SetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr)); | |||||
| AippConfigInfo aipp_info; | |||||
| EXPECT_EQ(model.GetAippInfo(0, aipp_info), ACL_ERROR_GE_AIPP_NOT_EXIST); | |||||
| EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||||
| EXPECT_EQ(model.GetAippInfo(0, aipp_info), SUCCESS); | |||||
| EXPECT_EQ(aipp_info.aipp_mode, domi::AippOpParams::dynamic); | |||||
| EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
| EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
| EXPECT_EQ(model.op_list_.size(), 1); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, init_data_aipp_static) { | |||||
| DavinciModel model(0, nullptr); | |||||
| model.ge_model_ = make_shared<GeModel>(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 5120000; | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr op_desc = CreateOpDesc("data", DATA); | |||||
| op_desc->AddInputDesc(tensor); | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| op_desc->SetInputOffset({1024}); | |||||
| op_desc->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_desc); | |||||
| AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "static_aipp"); | |||||
| InputAippType aipp_type; | |||||
| size_t aipp_index = 0; | |||||
| EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); | |||||
| EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||||
| EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); | |||||
| EXPECT_EQ(aipp_type, DATA_WITH_STATIC_AIPP); | |||||
| EXPECT_EQ(aipp_index, 0xFFFFFFFFu); | |||||
| EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
| EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
| EXPECT_EQ(model.op_list_.size(), 1); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, init_data_aipp_dynamic) { | |||||
| DavinciModel model(0, nullptr); | |||||
| model.ge_model_ = make_shared<GeModel>(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 5120000; | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr op_desc = CreateOpDesc("data", DATA); | |||||
| op_desc->AddInputDesc(tensor); | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| op_desc->SetInputOffset({1024}); | |||||
| op_desc->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_desc); // op_index 0 | |||||
| AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp"); | |||||
| AttrUtils::SetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, "releated_aipp"); | |||||
| InputAippType aipp_type; | |||||
| size_t aipp_index = 0; | |||||
| EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); | |||||
| EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||||
| EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); | |||||
| EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
| EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
| EXPECT_EQ(model.op_list_.size(), 1); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, init_data_aipp_releated) { | |||||
| DavinciModel model(0, nullptr); | |||||
| model.ge_model_ = make_shared<GeModel>(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 5120000; | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| { | |||||
| OpDescPtr op_desc = CreateOpDesc("data", DATA); | |||||
| op_desc->AddInputDesc(tensor); | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| op_desc->SetInputOffset({1024}); | |||||
| op_desc->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_desc); // op_index 0 | |||||
| AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp"); | |||||
| AttrUtils::SetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, "releated_aipp"); | |||||
| } | |||||
| { | |||||
| OpDescPtr op_desc = CreateOpDesc("releated_aipp", DATA); | |||||
| op_desc->AddInputDesc(tensor); | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| op_desc->SetInputOffset({1024}); | |||||
| op_desc->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_desc); // op_index 1 | |||||
| } | |||||
| InputAippType aipp_type; | |||||
| size_t aipp_index = 0; | |||||
| EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); | |||||
| EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||||
| EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); | |||||
| EXPECT_EQ(aipp_type, DATA_WITH_DYNAMIC_AIPP); | |||||
| EXPECT_EQ(aipp_index, 1); | |||||
| EXPECT_EQ(model.input_addrs_list_.size(), 2); | |||||
| EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
| EXPECT_EQ(model.op_list_.size(), 2); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, init_data_aipp_dynamic_conf) { | |||||
| DavinciModel model(0, nullptr); | |||||
| model.ge_model_ = make_shared<GeModel>(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 5120000; | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr op_desc = CreateOpDesc("data", DATA); | |||||
| op_desc->AddInputDesc(tensor); | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| op_desc->SetInputOffset({1024}); | |||||
| op_desc->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_desc); // op_index 0 | |||||
| AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_conf"); | |||||
| InputAippType aipp_type; | |||||
| size_t aipp_index = 0; | |||||
| EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); | |||||
| EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||||
| EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); | |||||
| EXPECT_EQ(aipp_type, DYNAMIC_AIPP_NODE); | |||||
| EXPECT_EQ(aipp_index, 0xFFFFFFFFU); | |||||
| EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
| EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
| EXPECT_EQ(model.op_list_.size(), 1); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, init_data_aipp_dynamic_invalid) { | |||||
| DavinciModel model(0, nullptr); | |||||
| model.ge_model_ = make_shared<GeModel>(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 5120000; | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr op_desc = CreateOpDesc("data", DATA); | |||||
| op_desc->AddInputDesc(tensor); | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| op_desc->SetInputOffset({1024}); | |||||
| op_desc->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_desc); // op_index 0 | |||||
| AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_invalid"); | |||||
| InputAippType aipp_type; | |||||
| size_t aipp_index = 0; | |||||
| EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); | |||||
| EXPECT_EQ(model.InitNodes(graph), ACL_ERROR_GE_AIPP_MODE_INVALID); | |||||
| EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
| EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
| EXPECT_EQ(model.op_list_.size(), 1); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, init_data_aipp_input_info_empty) { | |||||
| DavinciModel model(0, nullptr); | |||||
| model.ge_model_ = make_shared<GeModel>(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 5120000; | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr op_desc = CreateOpDesc("data", DATA); | |||||
| op_desc->AddInputDesc(tensor); | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| op_desc->SetInputOffset({1024}); | |||||
| op_desc->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_desc); // op_index 0 | |||||
| vector<string> inputs = {}; | |||||
| AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); | |||||
| vector<string> outputs = {}; | |||||
| AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); | |||||
| OriginInputInfo orig_input_info; | |||||
| EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); | |||||
| EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||||
| EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), SUCCESS); | |||||
| EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
| EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
| EXPECT_EQ(model.op_list_.size(), 1); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, init_data_aipp_input_info_normal) { | |||||
| DavinciModel model(0, nullptr); | |||||
| model.ge_model_ = make_shared<GeModel>(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 5120000; | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr op_desc = CreateOpDesc("data", DATA); | |||||
| op_desc->AddInputDesc(tensor); | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| op_desc->SetInputOffset({1024}); | |||||
| op_desc->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_desc); // op_index 0 | |||||
| vector<string> inputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; | |||||
| AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); | |||||
| vector<string> outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; | |||||
| AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); | |||||
| OriginInputInfo orig_input_info; | |||||
| EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); | |||||
| EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||||
| EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), SUCCESS); | |||||
| EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
| EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
| EXPECT_EQ(model.op_list_.size(), 1); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, init_data_aipp_input_info_invalid) { | |||||
| DavinciModel model(0, nullptr); | |||||
| model.ge_model_ = make_shared<GeModel>(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 5120000; | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr op_desc = CreateOpDesc("data", DATA); | |||||
| op_desc->AddInputDesc(tensor); | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| op_desc->SetInputOffset({1024}); | |||||
| op_desc->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_desc); // op_index 0 | |||||
| vector<string> inputs = { "NCHW:DT_FLOAT:TensorName" }; // Invalid | |||||
| AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); | |||||
| vector<string> outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; | |||||
| AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); | |||||
| OriginInputInfo orig_input_info; | |||||
| EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); | |||||
| EXPECT_EQ(model.InitNodes(graph), ACL_ERROR_GE_AIPP_MODE_INVALID); | |||||
| EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); | |||||
| EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
| EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
| EXPECT_EQ(model.op_list_.size(), 1); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, init_data_aipp_input_dims_normal) { | |||||
| DavinciModel model(0, nullptr); | |||||
| model.ge_model_ = make_shared<GeModel>(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 5120000; | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr op_desc = CreateOpDesc("data", DATA); | |||||
| op_desc->AddInputDesc(tensor); | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| op_desc->SetInputOffset({1024}); | |||||
| op_desc->SetOutputOffset({1024}); | |||||
| NodePtr node = graph->AddNode(op_desc); // op_index 0 | |||||
| vector<string> inputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; | |||||
| AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); | |||||
| vector<string> outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; | |||||
| AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); | |||||
| vector<InputOutputDims> input_dims; | |||||
| vector<InputOutputDims> output_dims; | |||||
| EXPECT_EQ(model.GetAllAippInputOutputDims(0, input_dims, output_dims), ACL_ERROR_GE_AIPP_NOT_EXIST); | |||||
| EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||||
| EXPECT_EQ(model.GetAllAippInputOutputDims(0, input_dims, output_dims), SUCCESS); | |||||
| EXPECT_EQ(input_dims.size(), 1); | |||||
| EXPECT_EQ(output_dims.size(), 1); | |||||
| EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
| EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
| EXPECT_EQ(model.op_list_.size(), 1); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -1120,7 +1120,6 @@ TEST_F(UtestKernelTaskInfo, kernel_task_info_init_success) { | |||||
| op_desc->AddOutputDesc(descout); | op_desc->AddOutputDesc(descout); | ||||
| op_desc->SetId(0); | op_desc->SetId(0); | ||||
| model.data_op_list_.push_back(op_desc); | |||||
| model.op_list_[0] = op_desc; | model.op_list_[0] = op_desc; | ||||
| domi::TaskDef task_def; | domi::TaskDef task_def; | ||||
| @@ -254,6 +254,17 @@ TEST_F(UtestModelManagerDavinciModel, eventlist_success) { | |||||
| delete model; | delete model; | ||||
| } | } | ||||
| // test Shrink | |||||
| TEST_F(UtestModelManagerDavinciModel, shrink_success) { | |||||
| DavinciModel model(0, g_label_call_back); | |||||
| OpDescPtr op_desc_ptr = make_shared<OpDesc>("Cast", "Cast"); | |||||
| void *addr = nullptr; | |||||
| rtMalloc(&addr, 128, RT_MEMORY_HBM); | |||||
| model.saved_task_addrs_.emplace(op_desc_ptr, addr); | |||||
| model.Shrink(); | |||||
| EXPECT_EQ(model.saved_task_addrs_.isEmpty(), true); | |||||
| } | |||||
| // test rtMalloc | // test rtMalloc | ||||
| TEST_F(UtestModelManagerDavinciModel, failed_reset_device) { | TEST_F(UtestModelManagerDavinciModel, failed_reset_device) { | ||||
| DavinciModel model(0, g_label_call_back); | DavinciModel model(0, g_label_call_back); | ||||
| @@ -0,0 +1,66 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #include <cce/compiler_stub.h> | |||||
| #include "common/debug/log.h" | |||||
| #include "common/model_parser/base.h" | |||||
| #include "common/properties_manager.h" | |||||
| #include "common/types.h" | |||||
| #include "common/l2_cache_optimize.h" | |||||
| #define private public | |||||
| #define protected public | |||||
| #include "graph/load/new_model_manager/model_manager.h" | |||||
| #include "common/helper/om_file_helper.h" | |||||
| #include "common/op/ge_op_utils.h" | |||||
| #include "graph/load/graph_loader.h" | |||||
| #include "graph/load/new_model_manager/davinci_model.h" | |||||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | |||||
| //#include "new_op_test_utils.h" | |||||
| #undef private | |||||
| #undef protected | |||||
| using namespace std; | |||||
| using namespace testing; | |||||
| namespace ge { | |||||
| const static std::string ENC_KEY = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"; | |||||
| class UtestModelManagerModelManagerAicpu : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| TEST_F(UtestModelManagerModelManagerAicpu, checkAicpuOptype) { | |||||
| ModelManager model_manager; | |||||
| uint32_t model_id = 0; | |||||
| std::vector<std::string> aicpu_op_list; | |||||
| std::vector<std::string> aicpu_tf_list; | |||||
| aicpu_tf_list.emplace_back("FrameworkOp"); | |||||
| aicpu_tf_list.emplace_back("Unique"); | |||||
| model_manager.LaunchKernelCheckAicpuOp(aicpu_op_list, aicpu_tf_list); | |||||
| // Load allow listener is null | |||||
| //EXPECT_EQ(ge::FAILED, mm.LoadModelOffline(model_id, data, nullptr, nullptr)); | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,247 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "graph/passes/multi_batch_clone_pass.h" | |||||
| #include <gtest/gtest.h> | |||||
| #include <set> | |||||
| #include <string> | |||||
| #include "inc/pass_manager.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "graph/common/local_context.h" | |||||
| #include "graph/passes/multi_batch_pass.h" | |||||
| #include "graph/preprocess/multi_batch_copy_graph.h" | |||||
| #include "graph/preprocess/insert_op/util_insert_aipp_op.h" | |||||
| #include "framework/omg/omg_inner_types.h" | |||||
| #include "register/op_registry.h" | |||||
| namespace ge{ | |||||
| class UtestMultiBatchClonePass : public testing::Test { | |||||
| protected: | |||||
| void SetUp() { | |||||
| SetLocalOmgContext(domi::GetContext()); | |||||
| GetLocalOmgContext().dynamic_image_size.clear(); | |||||
| GetLocalOmgContext().dynamic_batch_size.clear(); | |||||
| } | |||||
| void TearDown() { | |||||
| GetLocalOmgContext().dynamic_image_size.clear(); | |||||
| GetLocalOmgContext().dynamic_batch_size.clear(); | |||||
| GetLocalOmgContext().dynamic_node_type.clear(); | |||||
| } | |||||
| public: | |||||
| NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { | |||||
| GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| auto op_desc = std::make_shared<OpDesc>(name, type); | |||||
| for (auto i = 0; i < in_num; ++i) { | |||||
| op_desc->AddInputDesc(test_desc); | |||||
| } | |||||
| for (auto i = 0; i < out_num; ++i) { | |||||
| op_desc->AddOutputDesc(test_desc); | |||||
| } | |||||
| return graph->AddNode(op_desc); | |||||
| } | |||||
| NodePtr MakeConstNode(const ComputeGraphPtr &graph) { | |||||
| static uint32_t index = 0; | |||||
| GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| auto op_desc = std::make_shared<OpDesc>("dynamic_const_" + std::to_string(index++), "Const"); | |||||
| op_desc->AddOutputDesc(test_desc); | |||||
| return graph->AddNode(op_desc); | |||||
| } | |||||
| void make_original_graph(const ComputeGraphPtr &graph) { | |||||
| auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); | |||||
| { | |||||
| auto data1 = MakeNode(graph, 1, 1, "data", "Data"); | |||||
| GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); | |||||
| data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); | |||||
| data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); | |||||
| AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); | |||||
| GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})}; | |||||
| GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); | |||||
| auto const1 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); | |||||
| auto const2 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); | |||||
| } | |||||
| auto bn_conv1 = MakeNode(graph, 4, 1, "bn_conv1", "BNInference"); | |||||
| { | |||||
| GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(0)); | |||||
| auto const1 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const1->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(1)); | |||||
| auto const2 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const2->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(2)); | |||||
| auto const3= MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const3->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(3)); | |||||
| } | |||||
| auto scale_conv1 = MakeNode(graph, 4, 1, "scale1", "Scale"); | |||||
| { | |||||
| GraphUtils::AddEdge(bn_conv1->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(0)); | |||||
| auto const1 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const1->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(1)); | |||||
| auto const2 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const2->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(2)); | |||||
| } | |||||
| auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); | |||||
| GraphUtils::AddEdge(scale_conv1->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | |||||
| } | |||||
| void GraphWithJustData(const ComputeGraphPtr &graph) { | |||||
| auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); | |||||
| { | |||||
| auto data1 = MakeNode(graph, 1, 1, "data", "Data"); | |||||
| GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); | |||||
| data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); | |||||
| data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); | |||||
| AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); | |||||
| GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})}; | |||||
| GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); | |||||
| auto const1 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); | |||||
| auto const2 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); | |||||
| } | |||||
| auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); | |||||
| GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | |||||
| } | |||||
| void GraphWithGetNextNosink(const ComputeGraphPtr &graph) { | |||||
| auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); | |||||
| { | |||||
| auto data1 = MakeNode(graph, 1, 1, "IteratorGetNext_data", "Data"); | |||||
| GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); | |||||
| data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); | |||||
| data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); | |||||
| AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); | |||||
| GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})}; | |||||
| GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); | |||||
| auto const1 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); | |||||
| auto const2 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); | |||||
| } | |||||
| auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); | |||||
| GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | |||||
| } | |||||
| // getnext has one data and has one out of shape | |||||
| void GraphWithGetNextSink(const ComputeGraphPtr &graph) { | |||||
| auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); | |||||
| { | |||||
| auto data1 = MakeNode(graph, 1, 2, "data", "IteratorV2"); | |||||
| GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); | |||||
| GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT); | |||||
| data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); | |||||
| data1->GetOpDesc()->UpdateOutputDesc(1, shape_desc); | |||||
| AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); | |||||
| GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector<int64_t>{-1,3,224,224})}; | |||||
| GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); | |||||
| auto identity = MakeNode(graph, 1, 0, "identity", "Identity"); | |||||
| GraphUtils::AddEdge(data1->GetOutDataAnchor(1), identity->GetInDataAnchor(0)); | |||||
| auto const1 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); | |||||
| auto const2 = MakeConstNode(graph); | |||||
| GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); | |||||
| } | |||||
| auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); | |||||
| GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | |||||
| } | |||||
| }; | |||||
| // graph is nullptr | |||||
| TEST_F(UtestMultiBatchClonePass, graph_nullptr) { | |||||
| PassManager pass_manager; | |||||
| pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass); | |||||
| ComputeGraphPtr graph; | |||||
| EXPECT_EQ(pass_manager.Run(graph), PARAM_INVALID); | |||||
| } | |||||
| // graph with subgraph | |||||
| TEST_F(UtestMultiBatchClonePass, graph_with_subgraph) { | |||||
| PassManager pass_manager; | |||||
| pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass); | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph"); | |||||
| make_original_graph(graph); | |||||
| EXPECT_EQ(pass_manager.Run(graph), SUCCESS); | |||||
| ComputeGraphPtr owner = std::make_shared<ComputeGraph>("test_owner"); | |||||
| auto func_node = MakeNode(owner, 3, 1, "test_if", "If"); | |||||
| graph->SetParentNode(func_node); | |||||
| graph->SetParentGraph(owner); | |||||
| EXPECT_EQ(pass_manager.Run(graph), SUCCESS); | |||||
| } | |||||
| //graph is uncompute graph, not need to do multi batch | |||||
| TEST_F(UtestMultiBatchClonePass, uncompute_graph) { | |||||
| MultiBatchClonePass multi_batch_clone; | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph"); | |||||
| make_original_graph(graph); | |||||
| GetLocalOmgContext().need_multi_batch = false; | |||||
| EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); | |||||
| } | |||||
| //compute_graph with data from DATA | |||||
| TEST_F(UtestMultiBatchClonePass, compute_graph_with_data) { | |||||
| MultiBatchClonePass multi_batch_clone; | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph"); | |||||
| GraphWithJustData(graph); | |||||
| GetLocalOmgContext().need_multi_batch = true; | |||||
| EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); | |||||
| GetLocalOmgContext().dynamic_node_type = DATA; | |||||
| GetLocalOmgContext().dynamic_dims = "1;2;4;8"; | |||||
| EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); | |||||
| EXPECT_EQ(GetLocalOmgContext().data_nodes.size(), 1); | |||||
| } | |||||
| //compute_graph with data from GetNext_nosink | |||||
| TEST_F(UtestMultiBatchClonePass, compute_graph_with_getnext_nosink) { | |||||
| MultiBatchClonePass multi_batch_clone; | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph"); | |||||
| GraphWithGetNextNosink(graph); | |||||
| GetLocalOmgContext().need_multi_batch = true; | |||||
| GetLocalOmgContext().dynamic_node_type = GETNEXT; | |||||
| GetLocalOmgContext().dynamic_dims = "1;2;4;8"; | |||||
| EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); | |||||
| EXPECT_EQ(GetLocalOmgContext().getnext_nosink_nodes.size(), 1); | |||||
| } | |||||
| //compute_graph with data from GetNext_nosink | |||||
| TEST_F(UtestMultiBatchClonePass, compute_graph_with_getnext_sink) { | |||||
| MultiBatchClonePass multi_batch_clone; | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph"); | |||||
| GraphWithGetNextSink(graph); | |||||
| GetLocalOmgContext().need_multi_batch = true; | |||||
| GetLocalOmgContext().dynamic_node_type = GETNEXT; | |||||
| GetLocalOmgContext().dynamic_dims = "1;2;4;8"; | |||||
| EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); | |||||
| EXPECT_EQ(GetLocalOmgContext().getnext_nosink_nodes.size(), 0); | |||||
| } | |||||
| } | |||||
| @@ -1,60 +1,60 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef AICPU_OP_TYPE_LIST_H_ | |||||
| #define AICPU_OP_TYPE_LIST_H_ | |||||
| enum OpKernelType { | |||||
| TF_KERNEL, | |||||
| CPU_KERNEL | |||||
| }; | |||||
| enum ReturnCode { | |||||
| OP_TYPE_NOT_SUPPORT, | |||||
| FORMAT_NOT_SUPPORT, | |||||
| DTYPE_NOT_SUPPORT | |||||
| }; | |||||
| #pragma pack(push, 1) | |||||
| //One byte alignment | |||||
| struct SysOpInfo { | |||||
| uint64_t opLen; | |||||
| uint64_t opType; | |||||
| OpKernelType kernelsType; | |||||
| }; | |||||
| struct OpParamInfo { | |||||
| uint64_t num; | |||||
| uint64_t dtypeList; | |||||
| uint64_t formatList; | |||||
| }; | |||||
| struct SysOpCheckInfo { | |||||
| uint64_t opListNum; | |||||
| uint64_t offSetLen; | |||||
| uint64_t sysOpInfoList; | |||||
| uint64_t opParamInfoList; | |||||
| }; | |||||
| struct SysOpCheckResp { | |||||
| uint64_t opListNum; | |||||
| bool isWithoutJson; | |||||
| uint64_t returnCodeList; | |||||
| uint64_t sysOpInfoList; | |||||
| uint64_t opParamInfoList; | |||||
| }; | |||||
| #pragma pack(pop) | |||||
| #endif // AICPU_OP_TYPE_LIST_H_ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef AICPU_OP_TYPE_LIST_H_ | |||||
| #define AICPU_OP_TYPE_LIST_H_ | |||||
| enum OpKernelType { | |||||
| TF_KERNEL, | |||||
| CPU_KERNEL | |||||
| }; | |||||
| enum ReturnCode { | |||||
| OP_TYPE_NOT_SUPPORT, | |||||
| FORMAT_NOT_SUPPORT, | |||||
| DTYPE_NOT_SUPPORT | |||||
| }; | |||||
| #pragma pack(push, 1) | |||||
| //One byte alignment | |||||
| struct SysOpInfo { | |||||
| uint64_t opLen; | |||||
| uint64_t opType; | |||||
| OpKernelType kernelsType; | |||||
| }; | |||||
| struct OpParamInfo { | |||||
| uint64_t num; | |||||
| uint64_t dtypeList; | |||||
| uint64_t formatList; | |||||
| }; | |||||
| struct SysOpCheckInfo { | |||||
| uint64_t opListNum; | |||||
| uint64_t offSetLen; | |||||
| uint64_t sysOpInfoList; | |||||
| uint64_t opParamInfoList; | |||||
| }; | |||||
| struct SysOpCheckResp { | |||||
| uint64_t opListNum; | |||||
| bool isWithoutJson; | |||||
| uint64_t returnCodeList; | |||||
| uint64_t sysOpInfoList; | |||||
| uint64_t opParamInfoList; | |||||
| }; | |||||
| #pragma pack(pop) | |||||
| #endif // AICPU_OP_TYPE_LIST_H_ | |||||
| @@ -33,15 +33,6 @@ extern "C" { | |||||
| /** | |||||
| * @brief Get the rank number in the group. | |||||
| * | |||||
| * @param group A string identifying the group name. | |||||
| * @param rankSize A pointer identifying the rank number. | |||||
| * @return HcclResult | |||||
| */ | |||||
| HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); | |||||
| /** | /** | ||||
| * @brief Get the rank number in the group. | * @brief Get the rank number in the group. | ||||
| * | * | ||||
| @@ -51,15 +42,6 @@ HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); | |||||
| */ | */ | ||||
| HcclResult HcomGetRankSize(const char *group, u32 *rankSize); | HcclResult HcomGetRankSize(const char *group, u32 *rankSize); | ||||
| /** | |||||
| * @brief Get the rank number of this rank's server within the group. | |||||
| * | |||||
| * @param group A string identifying the group name. | |||||
| * @param localRankSize A pointer identifying the rank number. | |||||
| * @return HcclResult | |||||
| */ | |||||
| HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); | |||||
| /** | /** | ||||
| * @brief Get the rank number of this rank's server within the group. | * @brief Get the rank number of this rank's server within the group. | ||||
| * | * | ||||
| @@ -69,15 +51,6 @@ HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); | |||||
| */ | */ | ||||
| HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); | HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); | ||||
| /** | |||||
| * @brief Get the rank id of this rank. | |||||
| * | |||||
| * @param group A string identifying the group name. | |||||
| * @param rankId A pointer identifying the rank id. | |||||
| * @return HcclResult | |||||
| */ | |||||
| HcclResult hcom_get_rank_id(const char *group, u32 *rankId); | |||||
| /** | /** | ||||
| * @brief Get the rank id of this rank. | * @brief Get the rank id of this rank. | ||||
| * | * | ||||
| @@ -87,15 +60,6 @@ HcclResult hcom_get_rank_id(const char *group, u32 *rankId); | |||||
| */ | */ | ||||
| HcclResult HcomGetRankId(const char *group, u32 *rankId); | HcclResult HcomGetRankId(const char *group, u32 *rankId); | ||||
| /** | |||||
| * @brief Get the local rank id of this rank's server within the group. | |||||
| * | |||||
| * @param group A string identifying the group name. | |||||
| * @param localRankId A pointer identifying the local rank id. | |||||
| * @return HcclResult | |||||
| */ | |||||
| HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); | |||||
| /** | /** | ||||
| * @brief Get the local rank id of this rank's server within the group. | * @brief Get the local rank id of this rank's server within the group. | ||||
| * | * | ||||
| @@ -105,16 +69,6 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); | |||||
| */ | */ | ||||
| HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); | HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); | ||||
| /** | |||||
| * @brief Get the world rank id according to the group rank id. | |||||
| * | |||||
| * @param group A string identifying the group name. | |||||
| * @param groupRank An integer(u32) identifying the group rank id. | |||||
| * @param worldRank A pointer identifying the world rank id. | |||||
| * @return HcclResult | |||||
| */ | |||||
| HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); | |||||
| /** | /** | ||||
| * @brief Get the world rank id according to the group rank id. | * @brief Get the world rank id according to the group rank id. | ||||
| * | * | ||||
| @@ -125,16 +79,6 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, | |||||
| */ | */ | ||||
| HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); | HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); | ||||
| /** | |||||
| * @brief Get the group rank id according to the world rank id. | |||||
| * | |||||
| * @param worldRank An integer(u32) identifying the world rank id. | |||||
| * @param group A string identifying the group name. | |||||
| * @param groupRank A pointer identifying the group rank id. | |||||
| * @return HcclResult | |||||
| */ | |||||
| HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); | |||||
| /** | /** | ||||
| * @brief Get the group rank id according to the world rank id. | * @brief Get the group rank id according to the world rank id. | ||||
| * | * | ||||
| @@ -145,16 +89,6 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, | |||||
| */ | */ | ||||
| HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); | HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); | ||||
| /** | |||||
| * @brief Create group. | |||||
| * | |||||
| * @param group A string identifying the group name. | |||||
| * @param rankNum An integer(u32) identifying the number of ranks in the group. | |||||
| * @param rankIds A list identifying the ranks in the group. | |||||
| * @return HcclResult | |||||
| */ | |||||
| HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); | |||||
| /** | /** | ||||
| * @brief Create group. | * @brief Create group. | ||||
| * | * | ||||
| @@ -165,14 +99,6 @@ HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); | |||||
| */ | */ | ||||
| HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); | HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); | ||||
| /** | |||||
| * @brief Destroy group | |||||
| * | |||||
| * @param group A string identifying the group name. | |||||
| * @return HcclResult | |||||
| */ | |||||
| HcclResult hcom_destroy_group(const char *group); | |||||
| /** | /** | ||||
| * @brief Destroy group | * @brief Destroy group | ||||
| * | * | ||||
| @@ -189,46 +115,54 @@ HcclResult HcomDestroyGroup(const char *group); | |||||
| * @param IdxList A list identifying the index of end gradient in each segment. | * @param IdxList A list identifying the index of end gradient in each segment. | ||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); | |||||
| extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); | |||||
| /** | /** | ||||
| * @brief Set the gradient split strategy with in the group, according to gradient index. | |||||
| * @brief Set the gradient split strategy with in the group, according to gradient data size. | |||||
| * | * | ||||
| * @param group A string identifying the group name. | * @param group A string identifying the group name. | ||||
| * @param segmentNum An integer(u32) identifying the segments number of gradients. | * @param segmentNum An integer(u32) identifying the segments number of gradients. | ||||
| * @param IdxList A list identifying the index of end gradient in each segment. | |||||
| * @param sizeList A list identifying the percent of each segment. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); | |||||
| extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); | |||||
| /** | /** | ||||
| * @brief Set the gradient split strategy with in the group, according to gradient data size. | |||||
| * @brief Initialize hcom executor. | |||||
| * | * | ||||
| * @param group A string identifying the group name. | |||||
| * @param segmentNum An integer(u32) identifying the segments number of gradients. | |||||
| * @param sizeList A list identifying the percent of each segment. | |||||
| * @param void | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); | |||||
| HcclResult HcomExecInitialize(); | |||||
| /** | /** | ||||
| * @brief Set the gradient split strategy with in the group, according to gradient data size. | |||||
| * @brief Finalize hcom executor. | |||||
| * | * | ||||
| * @param group A string identifying the group name. | |||||
| * @param segmentNum An integer(u32) identifying the segments number of gradients. | |||||
| * @param sizeList A list identifying the percent of each segment. | |||||
| * @param void | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); | |||||
| HcclResult HcomExecFinalize(); | |||||
| /** | /** | ||||
| * @brief Register memories and init resources for remote access. | |||||
| * @brief Put collective communication operation into hcom executor. | |||||
| * | * | ||||
| * @param addrList memory addresses for remote access. | |||||
| * @param count number of remote memory addresses. | |||||
| * @param opInfo information about collective communication operation. | |||||
| * @param callback callback after collective communication operation. | |||||
| * @return HcclResult | * @return HcclResult | ||||
| */ | */ | ||||
| extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); | |||||
| HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback); | |||||
| /** | |||||
| * @brief Put remote access operation into hcom executor. | |||||
| * | |||||
| * @param remoteAccessType operation type (read or write). | |||||
| * @param addrInfos address information about collective communication operation. | |||||
| * @param callback callback after collective communication operation. | |||||
| * @return HcclResult | |||||
| */ | |||||
| HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, | |||||
| const std::vector<HcomRemoteAccessAddrInfo>& addrInfos, | |||||
| std::function<void(HcclResult status)> callback); | |||||
| /** | /** | ||||
| * @brief Register memories and init resources for remote access. | * @brief Register memories and init resources for remote access. | ||||
| @@ -239,16 +173,6 @@ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrLis | |||||
| */ | */ | ||||
| extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); | extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); | ||||
| HcclResult HcomExecInitialize(); | |||||
| HcclResult HcomExecFinalize(); | |||||
| HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback); | |||||
| HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, | |||||
| const std::vector<HcomRemoteAccessAddrInfo>& addrInfos, | |||||
| std::function<void(HcclResult status)> callback); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif // __cplusplus | #endif // __cplusplus | ||||
| @@ -279,8 +279,9 @@ typedef struct { | |||||
| #define M_NAME_MAX MAX_FNAME | #define M_NAME_MAX MAX_FNAME | ||||
| #define M_F_OK F_OK | #define M_F_OK F_OK | ||||
| #define M_R_OK R_OK | |||||
| #define M_X_OK X_OK | |||||
| #define M_W_OK W_OK | #define M_W_OK W_OK | ||||
| #define M_R_OK R_OK | |||||
| #define MM_DT_DIR DT_DIR | #define MM_DT_DIR DT_DIR | ||||
| #define MM_DT_REG DT_REG | #define MM_DT_REG DT_REG | ||||
| @@ -322,6 +322,7 @@ typedef VOID (*mmPf)(VOID); | |||||
| #define M_NAME_MAX _MAX_FNAME | #define M_NAME_MAX _MAX_FNAME | ||||
| #define M_F_OK 0 | #define M_F_OK 0 | ||||
| #define M_X_OK 1 | |||||
| #define M_W_OK 2 | #define M_W_OK 2 | ||||
| #define M_R_OK 4 | #define M_R_OK 4 | ||||
| @@ -0,0 +1,49 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_REGISTER_OP_KERNEL_REGISTRY_H_ | |||||
| #define INC_REGISTER_OP_KERNEL_REGISTRY_H_ | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "register/register_types.h" | |||||
| #include "register.h" | |||||
| namespace ge { | |||||
| class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry { | |||||
| public: | |||||
| using CreateFn = HostCpuOp* (*)(); | |||||
| ~OpKernelRegistry(); | |||||
| static OpKernelRegistry& GetInstance() { | |||||
| static OpKernelRegistry instance; | |||||
| return instance; | |||||
| } | |||||
| bool IsRegistered(const std::string &op_type); | |||||
| void RegisterHostCpuOp(const std::string &op_type, CreateFn create_fn); | |||||
| std::unique_ptr<HostCpuOp> CreateHostCpuOp(const std::string &op_type); | |||||
| private: | |||||
| OpKernelRegistry(); | |||||
| class OpKernelRegistryImpl; | |||||
| /*lint -e148*/ | |||||
| std::unique_ptr<OpKernelRegistryImpl> impl_; | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // INC_REGISTER_OP_KERNEL_REGISTRY_H_ | |||||
| @@ -0,0 +1,96 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_REGISTER_OP_REGISTRY_H_ | |||||
| #define INC_REGISTER_OP_REGISTRY_H_ | |||||
| #include <limits.h> | |||||
| #include <set> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include "register/register.h" | |||||
| namespace domi { | |||||
| enum RemoveInputType { | |||||
| OMG_MOVE_TYPE_DTYPE = 0, | |||||
| OMG_MOVE_TYPE_VALUE, | |||||
| OMG_MOVE_TYPE_SHAPE, | |||||
| OMG_MOVE_TYPE_FORMAT, | |||||
| OMG_MOVE_TYPE_AXIS, | |||||
| OMG_MOVE_TYPE_SCALAR_VALUE, | |||||
| OMG_REMOVE_TYPE_WITH_COND = 1000, | |||||
| OMG_REMOVE_INPUT_WITH_ORIGINAL_TYPE, | |||||
| OMG_INPUT_REORDER, | |||||
| }; | |||||
| struct RemoveInputConfigure { | |||||
| int inputIdx = INT_MAX; | |||||
| std::string attrName; | |||||
| RemoveInputType moveType; | |||||
| bool attrValue = false; | |||||
| std::string originalType; | |||||
| std::vector<int> input_order; | |||||
| }; | |||||
| class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { | |||||
| public: | |||||
| static OpRegistry *Instance(); | |||||
| std::vector<OpRegistrationData> registrationDatas; | |||||
| bool Register(const OpRegistrationData ®_data); | |||||
| domi::ImplyType GetImplyType(const std::string &op_type); | |||||
| void GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const domi::ImplyType &imply_type); | |||||
| domi::ParseParamFunc GetParseParamFunc(const std::string &op_type, const std::string &ori_type); | |||||
| domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &ori_type); | |||||
| domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type, const std::string &ori_type); | |||||
| domi::FusionParseParamByOpFunc GetFusionParseParamByOpFunc(const std::string &op_type, | |||||
| const std::string &ori_type); | |||||
| domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type); | |||||
| Status GetParseSubgraphPostFunc(const std::string &op_type, domi::ParseSubgraphFuncV2 &parse_subgraph_func); | |||||
| domi::ImplyType GetImplyTypeByOriOpType(const std::string &ori_optype); | |||||
| const std::vector<RemoveInputConfigure> &GetRemoveInputConfigure(const std::string &ori_optype) const; | |||||
| bool GetOmTypeByOriOpType(const std::string &ori_optype, std::string &om_type); | |||||
| ParseOpToGraphFunc GetParseOpToGraphFunc(const std::string &op_type, const std::string &ori_type); | |||||
| private: | |||||
| std::unordered_map<std::string, domi::ImplyType> op_run_mode_map_; | |||||
| std::unordered_map<std::string, ParseParamFunc> op_parse_params_fn_map_; | |||||
| std::unordered_map<std::string, ParseParamByOpFunc> parse_params_by_op_func_map_; | |||||
| std::unordered_map<std::string, FusionParseParamFunc> fusion_op_parse_params_fn_map_; | |||||
| std::unordered_map<std::string, FusionParseParamByOpFunc> fusion_parse_params_by_op_fn_map_; | |||||
| std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_; | |||||
| std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_; | |||||
| std::unordered_map<std::string, std::string> origin_type_to_om_type_; | |||||
| std::unordered_map<std::string, ParseOpToGraphFunc> parse_op_to_graph_fn_map_; | |||||
| std::unordered_map<std::string, ParseSubgraphFuncV2> op_types_to_parse_subgraph_post_func_v2_; | |||||
| }; | |||||
| } // namespace domi | |||||
| #endif // INC_REGISTER_OP_REGISTRY_H_ | |||||
| @@ -81,26 +81,17 @@ typedef enum tagRtLimitType { | |||||
| } rtLimitType_t; | } rtLimitType_t; | ||||
| typedef struct rtExceptionInfo { | typedef struct rtExceptionInfo { | ||||
| uint32_t taskid; | |||||
| uint32_t streamid; | |||||
| uint32_t tid; | |||||
| uint32_t deviceid; | |||||
| uint32_t taskid; | |||||
| uint32_t streamid; | |||||
| uint32_t tid; | |||||
| uint32_t deviceid; | |||||
| uint32_t retcode; | |||||
| } rtExceptionInfo; | } rtExceptionInfo; | ||||
| typedef struct rtTaskFailInfo { | |||||
| uint32_t taskid; | |||||
| uint32_t streamid; | |||||
| uint32_t tid; | |||||
| uint32_t deviceid; | |||||
| uint32_t retcode; | |||||
| } rtTaskFailInfo; | |||||
| typedef void (*rtErrorCallback)(rtExceptionType); | typedef void (*rtErrorCallback)(rtExceptionType); | ||||
| typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); | typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); | ||||
| typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo); | |||||
| typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); | typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); | ||||
| /** | /** | ||||
| @@ -143,13 +134,13 @@ RTS_API rtError_t rtProfilerConfig(uint16_t type); | |||||
| * @ingroup profiling_base | * @ingroup profiling_base | ||||
| * @brief start rts profiler. | * @brief start rts profiler. | ||||
| */ | */ | ||||
| RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); | |||||
| RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList); | |||||
| /** | /** | ||||
| * @ingroup profiling_base | * @ingroup profiling_base | ||||
| * @brief stop rts profiler. | * @brief stop rts profiler. | ||||
| */ | */ | ||||
| RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); | |||||
| RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList); | |||||
| /** | /** | ||||
| * @ingroup profiling_base | * @ingroup profiling_base | ||||
| @@ -209,7 +200,7 @@ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCal | |||||
| * @param [out] NA | * @param [out] NA | ||||
| * @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
| */ | */ | ||||
| RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback); | |||||
| RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallback callback); | |||||
| /** | /** | ||||
| * @ingroup dvrt_base | * @ingroup dvrt_base | ||||
| @@ -42,6 +42,7 @@ typedef enum tagRtChipType { | |||||
| CHIP_MDC, | CHIP_MDC, | ||||
| CHIP_LHISI, | CHIP_LHISI, | ||||
| CHIP_DC, | CHIP_DC, | ||||
| CHIP_CLOUD_V2, | |||||
| CHIP_END, | CHIP_END, | ||||
| } rtChipType_t; | } rtChipType_t; | ||||
| @@ -62,6 +63,7 @@ typedef enum tagRtPlatformType { | |||||
| PLATFORM_LHISI_ES, | PLATFORM_LHISI_ES, | ||||
| PLATFORM_LHISI_CS, | PLATFORM_LHISI_CS, | ||||
| PLATFORM_DC, | PLATFORM_DC, | ||||
| PLATFORM_CLOUD_V2, | |||||
| PLATFORM_END, | PLATFORM_END, | ||||
| } rtPlatformType_t; | } rtPlatformType_t; | ||||
| @@ -119,7 +121,9 @@ typedef struct tagRtMemoryConfig { | |||||
| uint32_t compilerSize; | uint32_t compilerSize; | ||||
| } rtMemoryConfig_t; | } rtMemoryConfig_t; | ||||
| typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; | |||||
| typedef struct tagRtPlatformConfig { | |||||
| uint32_t platformConfig; | |||||
| } rtPlatformConfig_t; | |||||
| /** | /** | ||||
| * @ingroup | * @ingroup | ||||
| @@ -47,7 +47,7 @@ typedef struct tagRtGroupInfo { | |||||
| uint32_t aivectorNum; | uint32_t aivectorNum; | ||||
| uint32_t sdmaNum; | uint32_t sdmaNum; | ||||
| uint32_t activeStreamNum; | uint32_t activeStreamNum; | ||||
| void* extrPtr; | |||||
| void *extrPtr; | |||||
| } rtGroupInfo_t; | } rtGroupInfo_t; | ||||
| /** | /** | ||||
| @@ -185,7 +185,7 @@ RTS_API rtError_t rtDisableP2P(uint32_t devIdDes, uint32_t phyIdSrc); | |||||
| * @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
| * @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
| */ | */ | ||||
| RTS_API rtError_t rtDeviceCanAccessPeer(int32_t* canAccessPeer, uint32_t device, uint32_t peerDevice); | |||||
| RTS_API rtError_t rtDeviceCanAccessPeer(int32_t *canAccessPeer, uint32_t device, uint32_t peerDevice); | |||||
| /** | /** | ||||
| * @ingroup dvrt_dev | * @ingroup dvrt_dev | ||||