diff --git a/CMakeLists.txt b/CMakeLists.txt index 9194f119..88ce15ff 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,7 @@ if (ENABLE_OPEN_SRC) endif() set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH}) set(STATIC_ACL_LIB ${GE_LIB_PATH}) - find_module(slog libslog.so ${GE_LIB_PATH}) + find_module(slog libalog.so ${GE_LIB_PATH}) find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH}) find_module(hccl libhccl.so ${GE_LIB_PATH}) @@ -88,7 +88,7 @@ if (ENABLE_OPEN_SRC) elseif(ENABLE_GE_COV OR ENABLE_GE_UT) add_subdirectory(tests) else() - find_module(slog libslog.so ${ASCEND_ATC_DIR}) + find_module(slog libalog.so ${ASCEND_ATC_DIR}) find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) if(PLATFORM STREQUAL "train") @@ -154,7 +154,7 @@ elseif (ENABLE_D OR ENABLE_ACL) include(cmake/intf_pub_linux.cmake) # common libraries - find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH}) + find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH}) find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) @@ -174,7 +174,7 @@ elseif(ENABLE_MS_TESTCASES) include(cmake/intf_pub_linux.cmake) # common libraries - find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH}) + find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH}) find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) diff --git a/cmake/intf_pub_linux.cmake b/cmake/intf_pub_linux.cmake index 40c6bca9..61237d11 100755 --- a/cmake/intf_pub_linux.cmake +++ b/cmake/intf_pub_linux.cmake @@ -16,6 +16,7 @@ target_compile_definitions(intf_pub INTERFACE $<$:CFG_BUILD_DEBUG> WIN64=1 LINUX=0 + LOG_CPP ) target_link_options(intf_pub INTERFACE -Wl,-z,relro diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 8d9edb65..317ff00a 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -124,7 +124,7 @@ set(TRAIN_SRC_LIST "graph/manager/graph_var_manager.cc" "graph/manager/host_mem_manager.cc" "graph/manager/rdma_pool_allocator.cc" - $<$>:graph/manager/host_mem_allocator.cc> + "graph/manager/host_mem_allocator.cc" "graph/manager/memory_api.cc" "graph/manager/model_manager/event_manager.cc" "graph/manager/trans_var_data_utils.cc" @@ -166,7 +166,7 @@ set(TRAIN_SRC_LIST "graph/passes/hccl_group_pass.cc" "graph/passes/enter_pass.cc" "graph/passes/assign_remove_pass.cc" - $<$>:graph/passes/inplace_support_check_pass.cc> + "graph/passes/inplace_support_check_pass.cc" "graph/passes/flow_ctrl_pass.cc" "graph/passes/global_step_insert_pass.cc" "host_kernels/transpose_kernel.cc" @@ -409,7 +409,7 @@ set(INFER_SRC_LIST "graph/manager/graph_var_manager.cc" "graph/manager/host_mem_manager.cc" "graph/manager/rdma_pool_allocator.cc" - $<$>:graph/manager/host_mem_allocator.cc> + "graph/manager/host_mem_allocator.cc" "graph/manager/graph_mem_allocator.cc" "graph/manager/graph_caching_allocator.cc" "model/ge_model.cc" @@ -531,7 +531,7 @@ set(INFER_SRC_LIST "graph/passes/for_pass.cc" "graph/passes/enter_pass.cc" "graph/passes/assign_remove_pass.cc" - $<$>:graph/passes/inplace_support_check_pass.cc> + "graph/passes/inplace_support_check_pass.cc" "graph/passes/addn_pass.cc" "graph/passes/common_subexpression_elimination_pass.cc" "graph/passes/remove_same_const_pass.cc" @@ -654,7 +654,6 @@ target_compile_definitions(ge_runner PRIVATE FMK_SUPPORT_DUMP DAVINCI_CLOUD google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_runner PRIVATE @@ -718,7 +717,6 @@ target_compile_definitions(ge_compiler PRIVATE FMK_HOST_INFER COMPILE_OMG_PACKAGE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_compiler PRIVATE @@ -806,7 +804,6 @@ endif() target_compile_definitions(opensrc_ascendcl PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(opensrc_ascendcl PRIVATE diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index 75cc92d4..d65d7667 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -32,9 +32,7 @@ #include "graph/common/ge_call_wrapper.h" #include "register/op_registry.h" #include "common/ge/tbe_plugin_manager.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "toolchain/plog.h" -#endif using domi::OpRegistry; using std::map; @@ -132,11 +130,9 @@ Status GEInitializeImpl(const std::map &options) { // Initialize GE, prepare for execution, call GELib::Initialize Status GEInitialize(const std::map &options) { -#ifndef ONLY_COMPILE_OPEN_SRC if (DlogReportInitialize() != SUCCESS) { GELOGW("Dlog report device log initialize failed."); } -#endif return GEInitializeImpl(options); } @@ -151,11 +147,9 @@ Status GEInitialize(const std::map &options) { std::string val = option.second.GetString(); str_options[key] = val; } -#ifndef ONLY_COMPILE_OPEN_SRC if (DlogReportInitialize() != SUCCESS) { GELOGW("Dlog report device log initialize failed."); } -#endif return GEInitializeImpl(str_options); } @@ -200,11 +194,9 @@ Status GEFinalize() { // to avoid memory fragment, use malloc_trim to back free stack to system malloc_trim(0); -#ifndef ONLY_COMPILE_OPEN_SRC if (DlogReportFinalize() != SUCCESS) { GELOGW("Dlog report device log finalize failed."); } -#endif GELOGT(TRACE_STOP, "GEFinalize finished"); return ret; diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index d2b8c8e7..bb08570a 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -73,7 +73,6 @@ target_compile_definitions(ge_common PRIVATE FMK_SUPPORT_DUMP OS_CENTOS google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_common PRIVATE @@ -133,7 +132,6 @@ target_compile_definitions(ge_common_static PRIVATE $,OS_TYPE=WIN,OS_TYPE=0> $<$:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_common_static PRIVATE @@ -182,7 +180,6 @@ target_compile_definitions(ge_common PRIVATE FMK_SUPPORT_DUMP OS_CENTOS google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_common PRIVATE diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 4ca18864..755bdf97 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -28,7 +28,7 @@ set(SRC_LIST "../graph/manager/trans_var_data_utils.cc" "../graph/manager/util/debug.cc" "../graph/manager/rdma_pool_allocator.cc" - $<$>:../graph/manager/host_mem_allocator.cc> + "../graph/manager/host_mem_allocator.cc" "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" "../model/ge_model.cc" "../model/ge_root_model.cc" @@ -175,7 +175,6 @@ target_compile_definitions(ge_executor PRIVATE $,OS_TYPE=WIN,OS_TYPE=0> $<$:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_executor PRIVATE @@ -218,7 +217,6 @@ target_compile_definitions(ge_executor_shared PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 DAVINCI_SUPPORT_PROFILING google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_executor_shared PRIVATE diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt index f963730b..7189e8ff 100755 --- a/ge/ge_local_engine/CMakeLists.txt +++ b/ge/ge_local_engine/CMakeLists.txt @@ -31,7 +31,6 @@ target_compile_options(ge_local_engine PRIVATE target_compile_definitions(ge_local_engine PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_local_engine PRIVATE @@ -73,7 +72,6 @@ target_compile_options(atc_ge_local_engine PRIVATE target_compile_definitions(atc_ge_local_engine PRIVATE COMPILE_OMG_PACKAGE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc_ge_local_engine PRIVATE @@ -119,7 +117,6 @@ target_compile_options(ge_local_opskernel_builder PRIVATE target_compile_definitions(ge_local_opskernel_builder PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_local_opskernel_builder PRIVATE @@ -161,7 +158,6 @@ target_compile_options(atc_ge_local_opskernel_builder PRIVATE target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc_ge_local_opskernel_builder PRIVATE @@ -209,7 +205,6 @@ target_compile_options(ge_local_opskernel_builder_static PRIVATE target_compile_definitions(ge_local_opskernel_builder_static PRIVATE google=ascend_private LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_local_opskernel_builder_static PRIVATE diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index 0f46b4cb..35ecfb2d 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -26,7 +26,6 @@ #include "common/math/math_util.h" namespace { -#ifndef ONLY_COMPILE_OPEN_SRC #define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ case (DTYPE): { \ GeTensorPtr ge_tensor = nullptr; \ @@ -50,43 +49,6 @@ namespace { named_outputs.emplace(tensor_name, tensor); \ break; \ } -#else -#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ - case (DTYPE): { \ - GeTensorPtr ge_tensor = nullptr; \ - if (need_create_flag) { \ - GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ - std::unique_ptr buf(new (std::nothrow) TYPE[data_num]()); \ - if (buf == nullptr) { \ - GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \ - static_cast(sizeof(TYPE) * data_num)); \ - return MEMALLOC_FAILED; \ - } \ - ge_tensor = MakeShared(out_desc); \ - GE_CHECK_NOTNULL(ge_tensor); \ - GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\ - if (ge_tensor->SetData(reinterpret_cast(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ - GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ - return MEMALLOC_FAILED; \ - } \ - ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ - ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ - outputs.emplace_back(ge_tensor); \ - } else { \ - ge_tensor = outputs[i]; \ - GE_CHECK_NOTNULL(ge_tensor); \ - GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \ - } \ - auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ - auto tensor_name = op_desc->GetOutputNameByIndex(i); \ - GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ - op_desc->GetName().c_str(), i); \ - GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \ - op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \ - named_outputs.emplace(tensor_name, tensor); \ - break; \ - } -#endif } namespace ge { diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt index ca770b15..ce1b89ea 100644 --- a/ge/ge_runtime/CMakeLists.txt +++ b/ge/ge_runtime/CMakeLists.txt @@ -27,7 +27,6 @@ target_compile_options(ge_runtime PRIVATE target_compile_definitions(ge_runtime PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_runtime PRIVATE diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index c4f91036..c0f084d8 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -38,10 +38,8 @@ #include "graph/partition/stage_partition.h" #include "graph/passes/addn_pass.h" #include "graph/passes/bitcast_pass.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/passes/assign_remove_pass.h" #include "graph/passes/inplace_support_check_pass.h" -#endif #include "graph/passes/atomic_addr_clean_pass.h" #include "graph/passes/attach_stream_label_pass.h" #include "graph/passes/cast_remove_pass.h" @@ -2269,20 +2267,16 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { ReshapeRemovePass reshape_remove_pass; CondRemovePass condition_remove_pass; BitcastPass bitcast_pass; -#ifndef ONLY_COMPILE_OPEN_SRC AssignRemovePass assign_remove_pass; InplaceSupportCheckPass inplace_support_check_pass; -#endif names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); names_to_passes.emplace_back("BitcastPass", &bitcast_pass); -#ifndef ONLY_COMPILE_OPEN_SRC if (GetContext().GetHostExecFlag()) { names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass); names_to_passes.emplace_back("InplaceSupportCheckPass", &inplace_support_check_pass); } -#endif GE_TIMESTAMP_START(names_to_passes); ret = GEPass(compute_graph).Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc index 4e31d835..f3037299 100755 --- a/ge/graph/manager/graph_mem_allocator.cc +++ b/ge/graph/manager/graph_mem_allocator.cc @@ -19,9 +19,7 @@ #include #include "graph/manager/graph_caching_allocator.h" #include "graph/manager/rdma_pool_allocator.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/manager/host_mem_allocator.h" -#endif namespace ge { void MemoryAllocator::Initialize(uint32_t device_id) { GELOGI("MemoryAllocator::Initialize"); @@ -192,12 +190,10 @@ Status MemManager::Initialize(const std::vector &memory_type) { GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); return ge::INTERNAL_ERROR; } -#ifndef ONLY_COMPILE_OPEN_SRC if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) { GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed."); return ge::INTERNAL_ERROR; } -#endif return SUCCESS; } @@ -219,9 +215,7 @@ void MemManager::Finalize() noexcept { // caching and rdma allocator use memory allocator, so finalize them first FinalizeAllocatorMap(caching_allocator_map_); FinalizeAllocatorMap(rdma_allocator_map_); -#ifndef ONLY_COMPILE_OPEN_SRC FinalizeAllocatorMap(host_allocator_map_); -#endif FinalizeAllocatorMap(memory_allocator_map_); } @@ -250,9 +244,7 @@ CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { return Instance().GetAllocator(memory_type, rdma_allocator_map_); } -#ifndef ONLY_COMPILE_OPEN_SRC HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { return Instance().GetAllocator(memory_type, host_allocator_map_); } -#endif } // namespace ge diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h index 6cdbd9b4..bd75dbb9 100644 --- a/ge/graph/manager/graph_mem_allocator.h +++ b/ge/graph/manager/graph_mem_allocator.h @@ -139,9 +139,7 @@ class MemoryAllocator { using MemoryAllocatorPtr = std::shared_ptr; class CachingAllocator; class RdmaPoolAllocator; -#ifndef ONLY_COMPILE_OPEN_SRC class HostMemAllocator; -#endif class MemManager { public: MemManager(); @@ -150,9 +148,7 @@ class MemManager { static MemoryAllocator *Instance(rtMemType_t memory_type); CachingAllocator &CachingInstance(rtMemType_t memory_type); RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); -#ifndef ONLY_COMPILE_OPEN_SRC HostMemAllocator &HostMemInstance(rtMemType_t memory_type); -#endif MemManager(const MemManager &) = delete; MemManager &operator=(const MemManager &) = delete; /// @@ -240,9 +236,7 @@ class MemManager { std::map memory_allocator_map_; std::map caching_allocator_map_; std::map rdma_allocator_map_; -#ifndef ONLY_COMPILE_OPEN_SRC std::map host_allocator_map_; -#endif std::recursive_mutex allocator_mutex_; }; } // namespace ge diff --git a/ge/graph/manager/host_mem_allocator.h b/ge/graph/manager/host_mem_allocator.h index b9dbdc4c..d10b2475 100644 --- a/ge/graph/manager/host_mem_allocator.h +++ b/ge/graph/manager/host_mem_allocator.h @@ -27,7 +27,7 @@ namespace ge { class HostMemAllocator { public: - explicit HostMemAllocator(rtMemType_t) {} + explicit HostMemAllocator(rtMemType_t) {} ~HostMemAllocator() = default; HostMemAllocator(const HostMemAllocator &) = delete; diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc index c9a33f5c..60a7586d 100644 --- a/ge/graph/manager/host_mem_manager.cc +++ b/ge/graph/manager/host_mem_manager.cc @@ -43,29 +43,20 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) { return GE_GRAPH_MEMORY_ALLOC_FAILED; } mem_info.fd = output_para.fd; -#ifndef ONLY_COMPILE_OPEN_SRC mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc([&output_para](std::unique_ptr &ptr) { ptr.reset(reinterpret_cast(output_para.ptr)); }, [](uint8_t *ptr) { ptr = nullptr; }); -#else - mem_info.host_address = reinterpret_cast(output_para.ptr); -#endif mem_info.device_address = reinterpret_cast(output_para.devPtr); return SUCCESS; } Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { GELOGD("SharedMemAllocator::DeAllocate"); -#ifndef ONLY_COMPILE_OPEN_SRC rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address}; -#else - rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, - mem_info.host_address, mem_info.device_address}; -#endif rtError_t rt_ret = rtFreeHostSharedMemory(&free_para); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret); diff --git a/ge/graph/manager/host_mem_manager.h b/ge/graph/manager/host_mem_manager.h index f204c9e4..be3237c3 100644 --- a/ge/graph/manager/host_mem_manager.h +++ b/ge/graph/manager/host_mem_manager.h @@ -42,11 +42,7 @@ struct SharedMemInfo { uint64_t mem_size = 0; int fd = 0; uint8_t *device_address = nullptr; -#ifndef ONLY_COMPILE_OPEN_SRC std::shared_ptr host_aligned_ptr = nullptr; -#else - uint8_t *host_address = nullptr; -#endif SharedMemInfo() = default; SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {} }; diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc index cd80a956..8cca5b5d 100644 --- a/ge/graph/optimize/graph_optimize.cc +++ b/ge/graph/optimize/graph_optimize.cc @@ -127,6 +127,10 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std } Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { + if (GetContext().GetHostExecFlag()) { + // graph exec on host, no need OptimizeOriginalGraph + return SUCCESS; + } if (compute_graph == nullptr) { GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeOriginalGraph]: compute_graph is nullptr."); return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; @@ -162,7 +166,7 @@ Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_graph) { GELOGD("OptimizeOriginalGraphJudgeInsert in"); if (GetContext().GetHostExecFlag()) { - // graph exec on host, no need OptimizeOriginalGraph + // graph exec on host, no need OptimizeOriginalGraphJudgeInsert return SUCCESS; } diff --git a/ge/graph/passes/assign_remove_pass.cc b/ge/graph/passes/assign_remove_pass.cc index 5029b9c3..e198c2db 100644 --- a/ge/graph/passes/assign_remove_pass.cc +++ b/ge/graph/passes/assign_remove_pass.cc @@ -19,6 +19,7 @@ #include "graph/utils/graph_utils.h" #include "graph/debug/ge_attr_define.h" +namespace ge { namespace { constexpr uint32_t kValidInputNodeOutputNum = 1; constexpr int32_t kAssignRefInputIndex = 0; @@ -28,8 +29,6 @@ static const std::set kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::VARIABLE, ge::VARIABLEV2 }; } -namespace ge { -#ifndef ONLY_COMPILE_OPEN_SRC Status AssignRemovePass::Run(NodePtr &node) { GELOGD("AssignRemovePass running"); @@ -145,71 +144,7 @@ Status AssignRemovePass::TransformAttr(NodePtr &node) { } return SUCCESS; } -#else -Status AssignRemovePass::Run(NodePtr &node) { - GELOGD("AssignRemovePass running"); - if (node->GetType() != ASSIGN) { - GELOGD("No need run AssignRemovePass on [%s, %s].", node->GetName().c_str(), node->GetType().c_str()); - return SUCCESS; - } - - const auto &ref_in_anchor = node->GetInDataAnchor(kAssignRefInputIndex); - const auto &value_in_anchor = node->GetInDataAnchor(kAssignValueInputIndex); - if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) { - GELOGE(FAILED, "In data anchor is null, node:%s", node->GetName().c_str()); - return FAILED; - } - const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor(); - const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor(); - if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) { - GELOGE(FAILED, "Peer data anchor is null, node:%s", node->GetName().c_str()); - return FAILED; - } - - if (IsCondMatch(node, ref_peer_anchor, value_peer_anchor)) { - /// - /// variable not-const not-const - /// \ / | - /// \ / | - /// Assign ----> variable - /// | | - /// | | - /// node node - /// - GELOGI("Optimization for assign_node %s start", node->GetName().c_str()); - if (IsolateAndDeleteNode(node, {kAssignRefInputIndex}) != SUCCESS) { - GELOGE(FAILED, "Isolate and delete assign_node %s failed.", node->GetName().c_str()); - return FAILED; - } - AddNodeDeleted(node); - const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc(); - const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc(); - if ((ref_input == nullptr) || (value_input == nullptr)) { - GELOGE(FAILED, "value input is null"); - return FAILED; - } - if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME, - ref_input->GetName())) { - GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed."); - return FAILED; - } - - // variable has and only has one input - if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str()); - return FAILED; - } - if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str()); - return FAILED; - } - } - - GELOGD("AssignRemovePass success"); - return SUCCESS; -} -#endif /// /// @brief Check if need optimize for assign_node /// @param [in] assign_node @@ -218,7 +153,7 @@ Status AssignRemovePass::Run(NodePtr &node) { /// @return Status /// bool AssignRemovePass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_peer_anchor, - const OutDataAnchorPtr &value_peer_anchor) { + const OutDataAnchorPtr &value_peer_anchor) { GELOGD("Check if assign_node %s match optimization condition, ref_input: %s, value_input: %s", node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(), value_peer_anchor->GetOwnerNode()->GetName().c_str()); diff --git a/ge/graph/passes/assign_remove_pass.h b/ge/graph/passes/assign_remove_pass.h index f8ef2e13..6588df7b 100644 --- a/ge/graph/passes/assign_remove_pass.h +++ b/ge/graph/passes/assign_remove_pass.h @@ -25,7 +25,6 @@ class AssignRemovePass : public BaseNodePass { Status Run(NodePtr &node) override; private: -#ifndef ONLY_COMPILE_OPEN_SRC /// /// @brief Optimize for assign_node /// @param [in] assign_node @@ -39,7 +38,7 @@ class AssignRemovePass : public BaseNodePass { /// @return Status /// Status TransformAttr(NodePtr &node); -#endif + /// /// @brief Check if need optimize for assign_node /// @param [in] assign_node diff --git a/ge/graph/passes/constant_fuse_same_pass.cc b/ge/graph/passes/constant_fuse_same_pass.cc index 8ee89648..eb8b3470 100644 --- a/ge/graph/passes/constant_fuse_same_pass.cc +++ b/ge/graph/passes/constant_fuse_same_pass.cc @@ -115,21 +115,15 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph, TypeUtils::DataTypeToSerialString(data_type).c_str()); continue; } -#ifndef ONLY_COMPILE_OPEN_SRC if ((type_size != 0) && (weight->MutableData().GetAlignedPtr() == nullptr)) { GELOGW("aligned_ptr is null while size is not 0"); continue; } -#endif ++insert_const_nums; SameConstKey map_key; map_key.data_size = type_size; -#ifndef ONLY_COMPILE_OPEN_SRC map_key.aligned_ptr = weight->MutableData().GetAlignedPtr(); -#else - map_key.data = weight->GetData().GetData(); -#endif map_key.data_type = data_type; map_key.format = output_tensor->GetFormat(); map_key.shape = output_tensor->GetShape().GetDims(); diff --git a/ge/graph/passes/constant_fuse_same_pass.h b/ge/graph/passes/constant_fuse_same_pass.h index ae39c707..3ff2d6b7 100755 --- a/ge/graph/passes/constant_fuse_same_pass.h +++ b/ge/graph/passes/constant_fuse_same_pass.h @@ -21,20 +21,14 @@ #include #include #include -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/aligned_ptr.h" -#endif #include "graph/types.h" #include "inc/graph_pass.h" namespace ge { struct SameConstKey { int data_size; -#ifndef ONLY_COMPILE_OPEN_SRC std::shared_ptr aligned_ptr; -#else - const uint8_t *data; -#endif DataType data_type; Format format; std::vector shape; @@ -44,19 +38,12 @@ struct SameConstKey { if (data_size != key.data_size) { return data_size < key.data_size; } -#ifndef ONLY_COMPILE_OPEN_SRC if (data_size != 0) { int ret = memcmp(aligned_ptr->Get(), key.aligned_ptr->Get(), data_size); if (ret != 0) { return ret < 0; } } -#else - int ret = memcmp(data, key.data, data_size); - if (ret != 0) { - return ret < 0; - } -#endif if (data_type != key.data_type) { return data_type < key.data_type; } diff --git a/ge/graph/passes/inplace_support_check_pass.cc b/ge/graph/passes/inplace_support_check_pass.cc index 73cc7f3b..44ad8361 100644 --- a/ge/graph/passes/inplace_support_check_pass.cc +++ b/ge/graph/passes/inplace_support_check_pass.cc @@ -19,6 +19,7 @@ #include "graph/utils/graph_utils.h" #include "graph/debug/ge_attr_define.h" +namespace ge { namespace { constexpr uint32_t kInplaceSupportOutputIndex = 0; constexpr uint32_t kInplaceSupportOutputNum = 1; @@ -26,8 +27,6 @@ static const std::set kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge: ge::CONSTANT, ge::CONSTANTOP, ge::VARIABLE, ge::VARIABLEV2 }; } - -namespace ge { Status InplaceSupportCheckPass::Run(NodePtr &node) { GELOGD("InplaceSupportCheckPass running"); if (node->GetAllOutDataAnchorsSize() != kInplaceSupportOutputNum) { diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index f94633a1..0bfec241 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -38,9 +38,6 @@ #include "graph/passes/aicpu_constant_folding_pass.h" #include "graph/passes/assert_pass.h" #include "ge/ge_api_types.h" -#ifdef ONLY_COMPILE_OPEN_SRC -#include "graph/passes/assign_remove_pass.h" -#endif #include "graph/passes/common_subexpression_elimination_pass.h" #include "graph/passes/cond_pass.h" #include "graph/passes/cond_remove_pass.h" @@ -1865,9 +1862,6 @@ Status GraphPrepare::PrepareOptimize() { VarIsInitializedOpPass var_is_initialized_pass; ParallelConcatStartOpPass parallel_concat_start_op_pass; IdentityPass identity_pass(false); -#ifdef ONLY_COMPILE_OPEN_SRC - AssignRemovePass assign_remove_pass; -#endif SnapshotPass snapshot_pass; if (!options_.train_graph_flag) { names_to_passes.emplace_back("DropOutPass", &dropout_pass); @@ -1882,11 +1876,6 @@ Status GraphPrepare::PrepareOptimize() { names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass); names_to_passes.emplace_back("ParallelConcatStartOpPass", ¶llel_concat_start_op_pass); names_to_passes.emplace_back("IdentityPass", &identity_pass); -#ifdef ONLY_COMPILE_OPEN_SRC - if (GetContext().GetHostExecFlag()) { - names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass); - } -#endif GE_TIMESTAMP_START(names_to_passes); ret = ge_passes.Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses"); diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt index f20f810e..cbd0bd8b 100644 --- a/ge/host_cpu_engine/CMakeLists.txt +++ b/ge/host_cpu_engine/CMakeLists.txt @@ -25,7 +25,6 @@ target_compile_options(host_cpu_engine PRIVATE target_compile_definitions(host_cpu_engine PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(host_cpu_engine PRIVATE @@ -66,7 +65,6 @@ target_compile_options(atc_host_cpu_engine PRIVATE target_compile_definitions(atc_host_cpu_engine PRIVATE COMPILE_OMG_PACKAGE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc_host_cpu_engine PRIVATE @@ -111,7 +109,6 @@ target_compile_options(host_cpu_opskernel_builder PRIVATE target_compile_definitions(host_cpu_opskernel_builder PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(host_cpu_opskernel_builder PRIVATE @@ -152,7 +149,6 @@ target_compile_options(atc_host_cpu_opskernel_builder PRIVATE target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc_host_cpu_opskernel_builder PRIVATE @@ -199,7 +195,6 @@ target_compile_options(host_cpu_opskernel_builder_static PRIVATE target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE google=ascend_private LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(host_cpu_opskernel_builder_static PRIVATE diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc index c2602f37..ccd6a624 100644 --- a/ge/hybrid/common/npu_memory_allocator.cc +++ b/ge/hybrid/common/npu_memory_allocator.cc @@ -20,9 +20,7 @@ #include "graph/manager/graph_caching_allocator.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/rdma_pool_allocator.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/manager/host_mem_allocator.h" -#endif namespace ge { namespace hybrid { @@ -67,11 +65,7 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { if (mem_type == RDMA_HBM) { buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_); } else if (mem_type == HOST_DDR) { -#ifndef ONLY_COMPILE_OPEN_SRC buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size); -#else - buffer = malloc(allocate_size); -#endif } else { if (allocate_size > kMaxHbmMemorySize) { GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size); @@ -108,11 +102,7 @@ void NpuMemoryAllocator::Deallocate(void *data, MemStorageType mem_type) { if (mem_type == RDMA_HBM) { MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast(data), device_id_); } else if (mem_type == HOST_DDR) { -#ifndef ONLY_COMPILE_OPEN_SRC MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(data); -#else - free(data); -#endif } else { MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast(data), device_id_); } diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 32fc495a..d1f61985 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -25,10 +25,8 @@ #include "graph/manager/graph_var_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/host_mem_allocator.h" -#endif #include "graph/utils/graph_utils.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/node_executor/node_executor.h" @@ -865,7 +863,6 @@ Status HybridModelBuilder::InitConstantOps() { std::unique_ptr var_tensor; if (GetContext().GetHostExecFlag()) { -#ifndef ONLY_COMPILE_OPEN_SRC GE_CHECK_NOTNULL(ge_tensor); // Address for eigen kernel should be aligned with 16 bytes // Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned @@ -878,11 +875,6 @@ Status HybridModelBuilder::InitConstantOps() { } var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(), aligned_tensor.GetData().size())); -#else - auto buffer = ge_tensor->MutableData(); - GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize()); - var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize())); -#endif } else { GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); @@ -937,7 +929,6 @@ Status HybridModelBuilder::InitVariableTensors() { GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str()); return GE_GRAPH_MALLOC_FAILED; } -#ifndef ONLY_COMPILE_OPEN_SRC if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr, tensor_size) == nullptr) { GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); @@ -947,11 +938,6 @@ Status HybridModelBuilder::InitVariableTensors() { std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), tensor_size)); -#else - GELOGD("Host variable [%s] malloc success.", it.first.c_str()); - - std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size)); -#endif GE_CHECK_NOTNULL(tensor); hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); } diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc index 32522fe8..0cc635e4 100755 --- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc +++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc @@ -18,10 +18,8 @@ #include "hybrid/node_executor/host_cpu/kernel_factory.h" #include "graph/passes/folding_pass.h" #include "hybrid/model/hybrid_model.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/host_mem_allocator.h" -#endif #include "ge_local_engine/engine/host_cpu_engine.h" namespace ge { @@ -54,18 +52,11 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { auto input_desc_ptr = context.GetInputDesc(i); GE_CHECK_NOTNULL(input_desc_ptr); const auto &input_desc = *input_desc_ptr; -#ifndef ONLY_COMPILE_OPEN_SRC auto tensor = context.GetInput(i); GE_CHECK_NOTNULL(tensor); auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); GE_CHECK_NOTNULL(item.second); auto in_tensor = MakeShared(input_desc, item.second, item.first); -#else - GE_CHECK_NOTNULL(context.GetInput(i)); - auto in_tensor = MakeShared(input_desc, - reinterpret_cast(context.GetInput(i)->GetData()), - context.GetInput(i)->GetSize()); -#endif GE_CHECK_NOTNULL(in_tensor); in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape()); @@ -84,15 +75,9 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { } auto tensor = context.GetOutput(i); GE_CHECK_NOTNULL(tensor); -#ifndef ONLY_COMPILE_OPEN_SRC auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); GE_CHECK_NOTNULL(item.second); auto out_tensor = MakeShared(output_desc, item.second, item.first); -#else - auto out_tensor = MakeShared(output_desc, - reinterpret_cast(tensor->GetData()), - tensor->GetSize()); -#endif GE_CHECK_NOTNULL(out_tensor); out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index cb6a3a50..d195e06f 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -30,7 +30,6 @@ target_compile_definitions(atc PRIVATE COMPILE_OMG_PACKAGE google=ascend_private LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc PRIVATE @@ -93,7 +92,6 @@ target_compile_definitions(atc_atc.bin PRIVATE COMPILE_OMG_PACKAGE google=ascend_private LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc_atc.bin PRIVATE @@ -154,7 +152,6 @@ target_compile_options(fwk_atc.bin PRIVATE -O2 -Wno-deprecated-declarations -fno-common - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_definitions(fwk_atc.bin PRIVATE diff --git a/ge/plugin/engine/CMakeLists.txt b/ge/plugin/engine/CMakeLists.txt index 65d5a8a1..f6353231 100644 --- a/ge/plugin/engine/CMakeLists.txt +++ b/ge/plugin/engine/CMakeLists.txt @@ -14,7 +14,6 @@ target_compile_options(engine PRIVATE target_compile_definitions(engine PRIVATE REUSE_MEMORY=1 PROTOBUF_INLINE_NOT_IN_HEADERS=0 - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(engine PRIVATE diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index d845654e..8327b72c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -37,10 +37,7 @@ enum FrameworkType { MINDSPORE = 1, TENSORFLOW = 3, ANDROID_NN, -#ifndef ONLY_COMPILE_OPEN_SRC ONNX, -#endif - FRAMEWORK_RESERVED, }; enum OpEngineType { diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h index 57cff9a7..9eda685d 100644 --- a/inc/framework/omg/parser/model_parser.h +++ b/inc/framework/omg/parser/model_parser.h @@ -65,7 +65,6 @@ class ModelParser { */ virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; -#ifndef ONLY_COMPILE_OPEN_SRC /** * @ingroup domi_omg * @brief Parse relevant data from memory and save it to graph @@ -77,7 +76,6 @@ class ModelParser { * @author */ virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; -#endif /** * @ingroup domi_omg diff --git a/metadef b/metadef index fe37bc34..f08320a6 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit fe37bc343ea52c76d35e9e9ec83cea0151bfa900 +Subproject commit f08320a6d699f5b537bf66da572bf225b9cd330e diff --git a/parser b/parser index 336cd310..b2df31dc 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 336cd3107253d3fe41cfb9fec2db62b5f3d8a33b +Subproject commit b2df31dc5810283e2e483df5ba9517e2ece132a0 diff --git a/tests/depends/cce/CMakeLists.txt b/tests/depends/cce/CMakeLists.txt index 85e69e6d..7550c63f 100644 --- a/tests/depends/cce/CMakeLists.txt +++ b/tests/depends/cce/CMakeLists.txt @@ -46,6 +46,7 @@ set(SRCS "${GE_CODE_DIR}/metadef/graph/anchor.cc" "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" "${GE_CODE_DIR}/metadef/graph/buffer.cc" + "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" "${GE_CODE_DIR}/metadef/graph/graph.cc" "${GE_CODE_DIR}/metadef/graph/model.cc" diff --git a/tests/ut/common/graph/CMakeLists.txt b/tests/ut/common/graph/CMakeLists.txt index e2490150..1c64dce1 100644 --- a/tests/ut/common/graph/CMakeLists.txt +++ b/tests/ut/common/graph/CMakeLists.txt @@ -67,6 +67,7 @@ set(SRC_FILES "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" "${GE_CODE_DIR}/metadef/graph/attr_value.cc" "${GE_CODE_DIR}/metadef/graph/buffer.cc" + "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" "${GE_CODE_DIR}/metadef/graph/ge_attr_define.cc" "${GE_CODE_DIR}/metadef/graph/graph.cc" @@ -110,7 +111,6 @@ target_compile_options(ut_libgraph PRIVATE target_compile_definitions(ut_libgraph PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ut_libgraph diff --git a/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc b/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc index 6d34ab59..5c75bd01 100644 --- a/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc +++ b/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc @@ -230,7 +230,7 @@ TEST_F(UtestGeTensor, test_tensor_invalid_null) { GeTensor tensor(msg_owner, nullptr); EXPECT_EQ(tensor.GetData().size(), 0); EXPECT_EQ(tensor.MutableData().size(), 0); - EXPECT_EQ(tensor.SetData(Buffer(100)), ge::GRAPH_PARAM_INVALID); + EXPECT_EQ(tensor.SetData(Buffer(100)), GRAPH_SUCCESS); TensorUtils::SetWeightSize(tensor.MutableTensorDesc(), 100); EXPECT_EQ(TensorUtils::GetWeightSize(tensor), 0); diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index db725dfb..2ebe9fc9 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -89,6 +89,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" "${GE_CODE_DIR}/metadef/graph/attr_value.cc" "${GE_CODE_DIR}/metadef/graph/buffer.cc" + "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" "${GE_CODE_DIR}/metadef/graph/graph.cc" "${GE_CODE_DIR}/metadef/graph/gnode.cc" @@ -227,6 +228,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/for_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/enter_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/assign_remove_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/inplace_support_check_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/addn_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/common_subexpression_elimination_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/transop_symmetry_elimination_pass.cc" @@ -303,6 +305,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/common/local_context.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" "${GE_CODE_DIR}/ge/common/model_saver.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" @@ -370,6 +373,7 @@ set(GRAPH_LOAD_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" "${GE_CODE_DIR}/ge/common/thread_pool.cc" ) @@ -723,7 +727,6 @@ add_library(ge_ut_common STATIC ${COMMON_SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) target_compile_definitions(ge_ut_common PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ge_ut_common PRIVATE @@ -738,7 +741,6 @@ add_library(ge_ut_common_format STATIC ${COMMON_SRC_FILES} ${COMMON_FORMAT_SRC_F target_compile_definitions(ge_ut_common_format PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ge_ut_common_format PRIVATE @@ -795,7 +797,6 @@ add_library(ge_load_common STATIC ${GRAPH_LOAD_COMMON_SRC_FILES} ${PROTO_SRCS} $ target_compile_definitions(ge_load_common PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ge_load_common PRIVATE @@ -810,7 +811,6 @@ add_library(ge_execute_common STATIC ${GRAPH_EXECUTE_COMMON_SRC_FILES} ${PROTO_S target_compile_definitions(ge_execute_common PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ge_execute_common PRIVATE @@ -825,7 +825,6 @@ add_library(ge_build_common STATIC ${GRAPH_BUILD_COMMON_SRC_FILES} ${PROTO_SRCS} target_compile_definitions(ge_build_common PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ge_build_common PRIVATE @@ -898,10 +897,6 @@ target_compile_options(ut_libge_others_utest PRIVATE -g --coverage -fprofile-arcs -ftest-coverage ) -target_compile_definitions(ut_libge_others_utest PRIVATE - $<$:ONLY_COMPILE_OPEN_SRC> -) - target_link_libraries(ut_libge_others_utest $ ge_load_common ge_execute_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov @@ -919,10 +914,6 @@ target_compile_options(ut_libge_kernel_utest PRIVATE -g --coverage -fprofile-arcs -ftest-coverage ) -target_compile_definitions(ut_libge_kernel_utest PRIVATE - $<$:ONLY_COMPILE_OPEN_SRC> -) - target_link_libraries(ut_libge_kernel_utest $ ge_load_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov @@ -943,7 +934,6 @@ target_compile_options(ut_libge_distinct_load_utest PRIVATE target_compile_definitions(ut_libge_distinct_load_utest PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ut_libge_distinct_load_utest diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h index 7e0f94a8..8d16467c 100644 --- a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h +++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h @@ -1,60 +1,60 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef AICPU_OP_TYPE_LIST_H_ -#define AICPU_OP_TYPE_LIST_H_ - -enum OpKernelType { - TF_KERNEL, - CPU_KERNEL -}; - -enum ReturnCode { - OP_TYPE_NOT_SUPPORT, - FORMAT_NOT_SUPPORT, - DTYPE_NOT_SUPPORT -}; - -#pragma pack(push, 1) -//One byte alignment -struct SysOpInfo { - uint64_t opLen; - uint64_t opType; - OpKernelType kernelsType; -}; - -struct OpParamInfo { - uint64_t num; - uint64_t dtypeList; - uint64_t formatList; -}; - -struct SysOpCheckInfo { - uint64_t opListNum; - uint64_t offSetLen; - uint64_t sysOpInfoList; - uint64_t opParamInfoList; -}; - -struct SysOpCheckResp { - uint64_t opListNum; - bool isWithoutJson; - uint64_t returnCodeList; - uint64_t sysOpInfoList; - uint64_t opParamInfoList; -}; -#pragma pack(pop) -#endif // AICPU_OP_TYPE_LIST_H_ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_OP_TYPE_LIST_H_ +#define AICPU_OP_TYPE_LIST_H_ + +enum OpKernelType { + TF_KERNEL, + CPU_KERNEL +}; + +enum ReturnCode { + OP_TYPE_NOT_SUPPORT, + FORMAT_NOT_SUPPORT, + DTYPE_NOT_SUPPORT +}; + +#pragma pack(push, 1) +//One byte alignment +struct SysOpInfo { + uint64_t opLen; + uint64_t opType; + OpKernelType kernelsType; +}; + +struct OpParamInfo { + uint64_t num; + uint64_t dtypeList; + uint64_t formatList; +}; + +struct SysOpCheckInfo { + uint64_t opListNum; + uint64_t offSetLen; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; + +struct SysOpCheckResp { + uint64_t opListNum; + bool isWithoutJson; + uint64_t returnCodeList; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; +#pragma pack(pop) +#endif // AICPU_OP_TYPE_LIST_H_ diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index e491d43f..972f470c 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -33,15 +33,6 @@ extern "C" { -/** - * @brief Get the rank number in the group. - * - * @param group A string identifying the group name. - * @param rankSize A pointer identifying the rank number. - * @return HcclResult - */ -HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); - /** * @brief Get the rank number in the group. * @@ -51,15 +42,6 @@ HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); */ HcclResult HcomGetRankSize(const char *group, u32 *rankSize); -/** - * @brief Get the rank number of this rank's server within the group. - * - * @param group A string identifying the group name. - * @param localRankSize A pointer identifying the rank number. - * @return HcclResult - */ -HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); - /** * @brief Get the rank number of this rank's server within the group. * @@ -69,15 +51,6 @@ HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); */ HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); -/** - * @brief Get the rank id of this rank. - * - * @param group A string identifying the group name. - * @param rankId A pointer identifying the rank id. - * @return HcclResult - */ -HcclResult hcom_get_rank_id(const char *group, u32 *rankId); - /** * @brief Get the rank id of this rank. * @@ -87,15 +60,6 @@ HcclResult hcom_get_rank_id(const char *group, u32 *rankId); */ HcclResult HcomGetRankId(const char *group, u32 *rankId); -/** - * @brief Get the local rank id of this rank's server within the group. - * - * @param group A string identifying the group name. - * @param localRankId A pointer identifying the local rank id. - * @return HcclResult - */ -HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); - /** * @brief Get the local rank id of this rank's server within the group. * @@ -105,16 +69,6 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); */ HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); -/** - * @brief Get the world rank id according to the group rank id. - * - * @param group A string identifying the group name. - * @param groupRank An integer(u32) identifying the group rank id. - * @param worldRank A pointer identifying the world rank id. - * @return HcclResult - */ -HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); - /** * @brief Get the world rank id according to the group rank id. * @@ -125,16 +79,6 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, */ HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); -/** - * @brief Get the group rank id according to the world rank id. - * - * @param worldRank An integer(u32) identifying the world rank id. - * @param group A string identifying the group name. - * @param groupRank A pointer identifying the group rank id. - * @return HcclResult - */ -HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); - /** * @brief Get the group rank id according to the world rank id. * @@ -145,16 +89,6 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, */ HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); -/** - * @brief Create group. - * - * @param group A string identifying the group name. - * @param rankNum An integer(u32) identifying the number of ranks in the group. - * @param rankIds A list identifying the ranks in the group. - * @return HcclResult - */ -HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); - /** * @brief Create group. * @@ -165,14 +99,6 @@ HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); */ HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); -/** - * @brief Destroy group - * - * @param group A string identifying the group name. - * @return HcclResult - */ -HcclResult hcom_destroy_group(const char *group); - /** * @brief Destroy group * @@ -189,46 +115,54 @@ HcclResult HcomDestroyGroup(const char *group); * @param IdxList A list identifying the index of end gradient in each segment. * @return HcclResult */ -extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); +extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); /** - * @brief Set the gradient split strategy with in the group, according to gradient index. + * @brief Set the gradient split strategy with in the group, according to gradient data size. * * @param group A string identifying the group name. * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param IdxList A list identifying the index of end gradient in each segment. + * @param sizeList A list identifying the percent of each segment. * @return HcclResult */ -extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); +extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); /** - * @brief Set the gradient split strategy with in the group, according to gradient data size. + * @brief Initialize hcom executor. * - * @param group A string identifying the group name. - * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param sizeList A list identifying the percent of each segment. + * @param void * @return HcclResult */ -extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); +HcclResult HcomExecInitialize(); /** - * @brief Set the gradient split strategy with in the group, according to gradient data size. + * @brief Finalize hcom executor. * - * @param group A string identifying the group name. - * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param sizeList A list identifying the percent of each segment. + * @param void * @return HcclResult */ -extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); +HcclResult HcomExecFinalize(); /** - * @brief Register memories and init resources for remote access. + * @brief Put collective communication operation into hcom executor. * - * @param addrList memory addresses for remote access. - * @param count number of remote memory addresses. + * @param opInfo information about collective communication operation. + * @param callback callback after collective communication operation. * @return HcclResult */ -extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); +HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); + +/** + * @brief Put remote access operation into hcom executor. + * + * @param remoteAccessType operation type (read or write). + * @param addrInfos address information about collective communication operation. + * @param callback callback after collective communication operation. + * @return HcclResult + */ +HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, + const std::vector& addrInfos, + std::function callback); /** * @brief Register memories and init resources for remote access. @@ -239,16 +173,6 @@ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrLis */ extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); -HcclResult HcomExecInitialize(); - -HcclResult HcomExecFinalize(); - -HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); - -HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, - const std::vector& addrInfos, - std::function callback); - #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index ad48f70b..005014ed 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -279,8 +279,9 @@ typedef struct { #define M_NAME_MAX MAX_FNAME #define M_F_OK F_OK -#define M_R_OK R_OK +#define M_X_OK X_OK #define M_W_OK W_OK +#define M_R_OK R_OK #define MM_DT_DIR DT_DIR #define MM_DT_REG DT_REG diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index cecdd4a7..49e97a5d 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -322,6 +322,7 @@ typedef VOID (*mmPf)(VOID); #define M_NAME_MAX _MAX_FNAME #define M_F_OK 0 +#define M_X_OK 1 #define M_W_OK 2 #define M_R_OK 4 diff --git a/third_party/fwkacllib/inc/register/op_kernel_registry.h b/third_party/fwkacllib/inc/register/op_kernel_registry.h new file mode 100644 index 00000000..5fed8960 --- /dev/null +++ b/third_party/fwkacllib/inc/register/op_kernel_registry.h @@ -0,0 +1,49 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_REGISTER_OP_KERNEL_REGISTRY_H_ +#define INC_REGISTER_OP_KERNEL_REGISTRY_H_ +#include +#include +#include "register/register_types.h" +#include "register.h" + +namespace ge { +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry { + public: + using CreateFn = HostCpuOp* (*)(); + ~OpKernelRegistry(); + + static OpKernelRegistry& GetInstance() { + static OpKernelRegistry instance; + return instance; + } + + bool IsRegistered(const std::string &op_type); + + void RegisterHostCpuOp(const std::string &op_type, CreateFn create_fn); + + std::unique_ptr CreateHostCpuOp(const std::string &op_type); + + private: + OpKernelRegistry(); + class OpKernelRegistryImpl; + /*lint -e148*/ + std::unique_ptr impl_; +}; +} // namespace ge + +#endif // INC_REGISTER_OP_KERNEL_REGISTRY_H_ diff --git a/third_party/fwkacllib/inc/register/op_registry.h b/third_party/fwkacllib/inc/register/op_registry.h new file mode 100644 index 00000000..318eb3ba --- /dev/null +++ b/third_party/fwkacllib/inc/register/op_registry.h @@ -0,0 +1,96 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_REGISTER_OP_REGISTRY_H_ +#define INC_REGISTER_OP_REGISTRY_H_ + +#include +#include +#include +#include +#include + +#include "register/register.h" + +namespace domi { +enum RemoveInputType { + OMG_MOVE_TYPE_DTYPE = 0, + OMG_MOVE_TYPE_VALUE, + OMG_MOVE_TYPE_SHAPE, + OMG_MOVE_TYPE_FORMAT, + OMG_MOVE_TYPE_AXIS, + OMG_MOVE_TYPE_SCALAR_VALUE, + OMG_REMOVE_TYPE_WITH_COND = 1000, + OMG_REMOVE_INPUT_WITH_ORIGINAL_TYPE, + OMG_INPUT_REORDER, +}; + +struct RemoveInputConfigure { + int inputIdx = INT_MAX; + std::string attrName; + RemoveInputType moveType; + bool attrValue = false; + std::string originalType; + std::vector input_order; +}; + +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { + public: + static OpRegistry *Instance(); + + std::vector registrationDatas; + + bool Register(const OpRegistrationData ®_data); + + domi::ImplyType GetImplyType(const std::string &op_type); + + void GetOpTypeByImplyType(std::vector &vec_op_type, const domi::ImplyType &imply_type); + + domi::ParseParamFunc GetParseParamFunc(const std::string &op_type, const std::string &ori_type); + + domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &ori_type); + + domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type, const std::string &ori_type); + + domi::FusionParseParamByOpFunc GetFusionParseParamByOpFunc(const std::string &op_type, + const std::string &ori_type); + + domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type); + + Status GetParseSubgraphPostFunc(const std::string &op_type, domi::ParseSubgraphFuncV2 &parse_subgraph_func); + + domi::ImplyType GetImplyTypeByOriOpType(const std::string &ori_optype); + + const std::vector &GetRemoveInputConfigure(const std::string &ori_optype) const; + + bool GetOmTypeByOriOpType(const std::string &ori_optype, std::string &om_type); + + ParseOpToGraphFunc GetParseOpToGraphFunc(const std::string &op_type, const std::string &ori_type); + + private: + std::unordered_map op_run_mode_map_; + std::unordered_map op_parse_params_fn_map_; + std::unordered_map parse_params_by_op_func_map_; + std::unordered_map fusion_op_parse_params_fn_map_; + std::unordered_map fusion_parse_params_by_op_fn_map_; + std::unordered_map op_types_to_parse_subgraph_post_func_; + std::unordered_map> remove_input_configure_map_; + std::unordered_map origin_type_to_om_type_; + std::unordered_map parse_op_to_graph_fn_map_; + std::unordered_map op_types_to_parse_subgraph_post_func_v2_; +}; +} // namespace domi +#endif // INC_REGISTER_OP_REGISTRY_H_ diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index b9b2cbe5..ebfc09f3 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -81,26 +81,17 @@ typedef enum tagRtLimitType { } rtLimitType_t; typedef struct rtExceptionInfo { - uint32_t taskid; - uint32_t streamid; - uint32_t tid; - uint32_t deviceid; + uint32_t taskid; + uint32_t streamid; + uint32_t tid; + uint32_t deviceid; + uint32_t retcode; } rtExceptionInfo; -typedef struct rtTaskFailInfo { - uint32_t taskid; - uint32_t streamid; - uint32_t tid; - uint32_t deviceid; - uint32_t retcode; -} rtTaskFailInfo; - typedef void (*rtErrorCallback)(rtExceptionType); typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); -typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo); - typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); /** @@ -143,13 +134,13 @@ RTS_API rtError_t rtProfilerConfig(uint16_t type); * @ingroup profiling_base * @brief start rts profiler. */ -RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); +RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList); /** * @ingroup profiling_base * @brief stop rts profiler. */ -RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); +RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList); /** * @ingroup profiling_base @@ -209,7 +200,7 @@ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCal * @param [out] NA * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback); +RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallback callback); /** * @ingroup dvrt_base diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index 12a407d7..8bfc9893 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -42,6 +42,7 @@ typedef enum tagRtChipType { CHIP_MDC, CHIP_LHISI, CHIP_DC, + CHIP_CLOUD_V2, CHIP_END, } rtChipType_t; @@ -62,6 +63,7 @@ typedef enum tagRtPlatformType { PLATFORM_LHISI_ES, PLATFORM_LHISI_CS, PLATFORM_DC, + PLATFORM_CLOUD_V2, PLATFORM_END, } rtPlatformType_t; @@ -119,7 +121,9 @@ typedef struct tagRtMemoryConfig { uint32_t compilerSize; } rtMemoryConfig_t; -typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; +typedef struct tagRtPlatformConfig { + uint32_t platformConfig; +} rtPlatformConfig_t; /** * @ingroup diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index 4be49a8c..ee0d8f0a 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -47,7 +47,7 @@ typedef struct tagRtGroupInfo { uint32_t aivectorNum; uint32_t sdmaNum; uint32_t activeStreamNum; - void* extrPtr; + void *extrPtr; } rtGroupInfo_t; /** diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index d1a91a9b..d6ffbc9a 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -185,7 +185,7 @@ RTS_API rtError_t rtDisableP2P(uint32_t devIdDes, uint32_t phyIdSrc); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtDeviceCanAccessPeer(int32_t* canAccessPeer, uint32_t device, uint32_t peerDevice); +RTS_API rtError_t rtDeviceCanAccessPeer(int32_t *canAccessPeer, uint32_t device, uint32_t peerDevice); /** * @ingroup dvrt_dev diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 5f519442..f44b181c 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -387,7 +387,7 @@ typedef void *rtModel_t; * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ - RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag); +RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag); /** * @ingroup rt_kernel diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index e65d8604..32bd9e6b 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -159,11 +159,11 @@ typedef struct rtAiCoreMemorySize { * @ingroup dvrt_mem * @brief memory type */ -typedef enum tagRtMemoryType { - RT_MEMORY_TYPE_HOST = 1, - RT_MEMORY_TYPE_DEVICE = 2 , - RT_MEMORY_TYPE_SVM = 3, - RT_MEMORY_TYPE_DVPP = 4 +typedef enum tagRtMemoryType { + RT_MEMORY_TYPE_HOST = 1, + RT_MEMORY_TYPE_DEVICE = 2, + RT_MEMORY_TYPE_SVM = 3, + RT_MEMORY_TYPE_DVPP = 4 } rtMemoryType_t; /** @@ -179,23 +179,23 @@ typedef struct tagRtPointerAttributes { typedef struct rtMallocHostSharedMemoryIn { - const char* name; - const uint64_t size; - uint32_t flag; + const char *name; + const uint64_t size; + uint32_t flag; } rtMallocHostSharedMemoryIn; typedef struct rtMallocHostSharedMemoryOut { - int fd; - void* ptr; - void* devPtr; + int fd; + void *ptr; + void *devPtr; } rtMallocHostSharedMemoryOut; typedef struct rtFreeHostSharedMemoryIn { - const char* name; - const uint64_t size; - int fd; - void* ptr; - void* devPtr; + const char *name; + const uint64_t size; + int fd; + void *ptr; + void *devPtr; } rtFreeHostSharedMemoryIn; @@ -267,7 +267,7 @@ RTS_API rtError_t rtFreeHost(void *hostPtr); */ RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in, - rtMallocHostSharedMemoryOut *out); + rtMallocHostSharedMemoryOut *out); /** * @ingroup dvrt_mem diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index 388fd3c2..6b9f80ae 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -36,6 +36,7 @@ extern "C" { #define RT_STREAM_FORBIDDEN_DEFAULT (0x10) #define RT_STREAM_HEAD (0x20) #define RT_STREAM_PRIMARY_DEFAULT (0x40) +#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80) /** * @ingroup stream_type diff --git a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h new file mode 100644 index 00000000..b642cbc8 --- /dev/null +++ b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h @@ -0,0 +1,52 @@ +/** +* @file ExternalSoftDp.h +* +* Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#ifndef EXTERNALSOFTDP_H +#define EXTERNALSOFTDP_H + +#include + +extern "C" { +struct SoftDpProcsessInfo { + uint8_t* inputBuffer; + uint32_t inputBufferSize; + + uint8_t* outputBuffer; + uint32_t outputBufferSize; + + uint32_t outputWidth; + uint32_t outputHeight; + + uint32_t reserved; +}; + +struct DpCropInfo { + uint32_t left; + uint32_t right; + uint32_t up; + uint32_t down; +}; + +/* + * @brief decode and resize interface + * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct + * @return success: return 0, fail: return error number + */ +uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo); + +/* + * @brief decode crop and resize interface + * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct + * @param [in] const DpCropInfo& cropInfo: crop struct + * @return success: return 0, fail: return error number + */ +uint32_t DecodeAndCropAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo, const DpCropInfo& cropInfo); +} +#endif // EXTERNALSOFTDP_H diff --git a/third_party/fwkacllib/inc/toolchain/plog.h b/third_party/fwkacllib/inc/toolchain/plog.h new file mode 100644 index 00000000..0d42e31d --- /dev/null +++ b/third_party/fwkacllib/inc/toolchain/plog.h @@ -0,0 +1,59 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _PLOG_H_ +#define _PLOG_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#ifndef LINUX +#define LINUX 0 +#endif // LINUX + +#ifndef WIN +#define WIN 1 +#endif + +#ifndef OS_TYPE +#define OS_TYPE 0 +#endif // OS_TYPE + +#if (OS_TYPE == LINUX) +#define DLL_EXPORT __attribute__((visibility("default"))) +#else +#define DLL_EXPORT _declspec(dllexport) +#endif + +/** + * @ingroup plog + * @brief DlogReportInitialize: init log in service process before all device setting. + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogReportInitialize(); + +/** + * @ingroup plog + * @brief DlogReportFinalize: release log resource in service process after all device reset. + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogReportFinalize(); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // D_PLOG_H_ diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index bce58f32..2ebce7d9 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -18,7 +18,9 @@ #define D_SYSLOG_H_ #ifdef __cplusplus +#ifndef LOG_CPP extern "C" { +#endif #endif // __cplusplus #ifndef LINUX @@ -105,6 +107,7 @@ extern "C" { #define SECURITY_LOG_MASK (0x00100000) #define RUN_LOG_MASK (0x01000000) #define OPERATION_LOG_MASK (0x10000000) +#define RESERVERD_LENGTH 52 typedef struct tagDCODE { const char *cName; @@ -116,6 +119,18 @@ typedef struct tagKV { char *value; } KeyValue; +typedef enum { + APPLICATION = 0, + SYSTEM +} ProcessType; + +typedef struct { + ProcessType type; + unsigned int pid; + unsigned int deviceId; + char reserved[RESERVERD_LENGTH]; +} LogAttr; + /** * @ingroup slog * @@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent); */ DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel); +/** + * @ingroup slog + * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION + * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetAttr(LogAttr logAttr); + /** * @ingroup slog * @brief dlog_error: print error log @@ -367,6 +390,121 @@ void DlogInner(int moduleId, int level, const char *fmt, ...); void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); #ifdef __cplusplus +#ifndef LOG_CPP } +#endif // LOG_CPP #endif // __cplusplus + +#ifdef LOG_CPP +#ifdef __cplusplus +extern "C" { +#endif +/** + * @ingroup slog + * @brief DlogGetlevelForC: get module loglevel and enableEvent + * + * @param [in]moduleId: moudule id(see slog.h, eg: CCE), others: invalid + * @param [out]enableEvent: 1: enable; 0: disable + * @return: module level(0: debug, 1: info, 2: warning, 3: error, 4: null output) + */ +DLL_EXPORT int DlogGetlevelForC(int moduleId, int *enableEvent); + +/** + * @ingroup slog + * @brief DlogSetlevelForC: set module loglevel and enableEvent + * + * @param [in]moduleId: moudule id(see slog.h, eg: CCE), -1: all modules, others: invalid + * @param [in]level: log level(0: debug, 1: info, 2: warning, 3: error, 4: null output) + * @param [in]enableEvent: 1: enable; 0: disable, others:invalid + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetlevelForC(int moduleId, int level, int enableEvent); + +/** + * @ingroup slog + * @brief CheckLogLevelForC: check module level enable or not + * users no need to call it because all dlog interface(include inner interface) has already called + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG + * @return: 1:enable, 0:disable + */ +DLL_EXPORT int CheckLogLevelForC(int moduleId, int logLevel); + +/** + * @ingroup slog + * @brief DlogSetAttrForC: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION + * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); + +/** + * @ingroup slog + * @brief DlogForC: print log, need caller to specify level + * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]fmt: log content + */ +#define DlogForC(moduleId, level, fmt, ...) \ + do { \ + if(CheckLogLevelForC(moduleId, level) == 1) { \ + DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogSubForC: print log, need caller to specify level and submodule + * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]submodule: eg: engine + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]fmt: log content + */ +#define DlogSubForC(moduleId, submodule, level, fmt, ...) \ + do { \ + if(CheckLogLevelForC(moduleId, level) == 1) { \ + DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogWithKVForC: print log, need caller to specify level and other paramters + * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]pstKVArray: key-value array + * @param [in]kvNum: key-value element num in array + * @param [in]fmt: log content + */ +#define DlogWithKVForC(moduleId, level, pstKVArray, kvNum, fmt, ...) \ + do { \ + if(CheckLogLevelForC(moduleId, level) == 1) { \ + DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogFlushForC: flush log buffer to file + */ +DLL_EXPORT void DlogFlushForC(void); + +/** + * @ingroup slog + * @brief Internal log interface, other modules are not allowed to call this interface + */ +void DlogInnerForC(int moduleId, int level, const char *fmt, ...); +void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); + +#ifdef __cplusplus +} +#endif +#endif // LOG_CPP #endif // D_SYSLOG_H_