Browse Source

!871 rm compile macros

From: @chen_yemeng
Reviewed-by: 
Signed-off-by:
tags/v1.2.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
df25efd308
52 changed files with 546 additions and 477 deletions
  1. +4
    -4
      CMakeLists.txt
  2. +1
    -0
      cmake/intf_pub_linux.cmake
  3. +4
    -7
      ge/CMakeLists.txt
  4. +0
    -8
      ge/client/ge_api.cc
  5. +0
    -3
      ge/common/CMakeLists.txt
  6. +1
    -3
      ge/executor/CMakeLists.txt
  7. +0
    -5
      ge/ge_local_engine/CMakeLists.txt
  8. +0
    -38
      ge/ge_local_engine/engine/host_cpu_engine.cc
  9. +0
    -1
      ge/ge_runtime/CMakeLists.txt
  10. +0
    -6
      ge/graph/manager/graph_manager.cc
  11. +0
    -8
      ge/graph/manager/graph_mem_allocator.cc
  12. +0
    -6
      ge/graph/manager/graph_mem_allocator.h
  13. +1
    -1
      ge/graph/manager/host_mem_allocator.h
  14. +0
    -9
      ge/graph/manager/host_mem_manager.cc
  15. +0
    -4
      ge/graph/manager/host_mem_manager.h
  16. +5
    -1
      ge/graph/optimize/graph_optimize.cc
  17. +2
    -67
      ge/graph/passes/assign_remove_pass.cc
  18. +1
    -2
      ge/graph/passes/assign_remove_pass.h
  19. +0
    -6
      ge/graph/passes/constant_fuse_same_pass.cc
  20. +0
    -13
      ge/graph/passes/constant_fuse_same_pass.h
  21. +1
    -2
      ge/graph/passes/inplace_support_check_pass.cc
  22. +0
    -11
      ge/graph/preprocess/graph_preprocess.cc
  23. +0
    -5
      ge/host_cpu_engine/CMakeLists.txt
  24. +0
    -10
      ge/hybrid/common/npu_memory_allocator.cc
  25. +0
    -14
      ge/hybrid/model/hybrid_model_builder.cc
  26. +0
    -15
      ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
  27. +0
    -3
      ge/offline/CMakeLists.txt
  28. +0
    -1
      ge/plugin/engine/CMakeLists.txt
  29. +0
    -3
      inc/framework/common/ge_types.h
  30. +0
    -2
      inc/framework/omg/parser/model_parser.h
  31. +1
    -1
      metadef
  32. +1
    -1
      parser
  33. +1
    -0
      tests/depends/cce/CMakeLists.txt
  34. +1
    -1
      tests/ut/common/graph/CMakeLists.txt
  35. +1
    -1
      tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc
  36. +4
    -14
      tests/ut/ge/CMakeLists.txt
  37. +60
    -60
      third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
  38. +26
    -102
      third_party/fwkacllib/inc/hccl/hcom.h
  39. +2
    -1
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
  40. +1
    -0
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
  41. +49
    -0
      third_party/fwkacllib/inc/register/op_kernel_registry.h
  42. +96
    -0
      third_party/fwkacllib/inc/register/op_registry.h
  43. +8
    -17
      third_party/fwkacllib/inc/runtime/base.h
  44. +5
    -1
      third_party/fwkacllib/inc/runtime/config.h
  45. +1
    -1
      third_party/fwkacllib/inc/runtime/context.h
  46. +1
    -1
      third_party/fwkacllib/inc/runtime/dev.h
  47. +1
    -1
      third_party/fwkacllib/inc/runtime/kernel.h
  48. +17
    -17
      third_party/fwkacllib/inc/runtime/mem.h
  49. +1
    -0
      third_party/fwkacllib/inc/runtime/stream.h
  50. +52
    -0
      third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h
  51. +59
    -0
      third_party/fwkacllib/inc/toolchain/plog.h
  52. +138
    -0
      third_party/fwkacllib/inc/toolchain/slog.h

+ 4
- 4
CMakeLists.txt View File

@@ -72,7 +72,7 @@ if (ENABLE_OPEN_SRC)
endif()
set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH})
set(STATIC_ACL_LIB ${GE_LIB_PATH})
find_module(slog libslog.so ${GE_LIB_PATH})
find_module(slog libalog.so ${GE_LIB_PATH})
find_module(static_mmpa libmmpa.a ${GE_LIB_PATH})
find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH})
find_module(hccl libhccl.so ${GE_LIB_PATH})
@@ -88,7 +88,7 @@ if (ENABLE_OPEN_SRC)
elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
add_subdirectory(tests)
else()
find_module(slog libslog.so ${ASCEND_ATC_DIR})
find_module(slog libalog.so ${ASCEND_ATC_DIR})
find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
if(PLATFORM STREQUAL "train")
@@ -154,7 +154,7 @@ elseif (ENABLE_D OR ENABLE_ACL)
include(cmake/intf_pub_linux.cmake)

# common libraries
find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH})
find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH})
find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})

@@ -174,7 +174,7 @@ elseif(ENABLE_MS_TESTCASES)
include(cmake/intf_pub_linux.cmake)

# common libraries
find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH})
find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH})
find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})



+ 1
- 0
cmake/intf_pub_linux.cmake View File

@@ -16,6 +16,7 @@ target_compile_definitions(intf_pub INTERFACE
$<$<CONFIG:Debug>:CFG_BUILD_DEBUG>
WIN64=1
LINUX=0
LOG_CPP
)
target_link_options(intf_pub INTERFACE
-Wl,-z,relro


+ 4
- 7
ge/CMakeLists.txt View File

@@ -124,7 +124,7 @@ set(TRAIN_SRC_LIST
"graph/manager/graph_var_manager.cc"
"graph/manager/host_mem_manager.cc"
"graph/manager/rdma_pool_allocator.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc>
"graph/manager/host_mem_allocator.cc"
"graph/manager/memory_api.cc"
"graph/manager/model_manager/event_manager.cc"
"graph/manager/trans_var_data_utils.cc"
@@ -166,7 +166,7 @@ set(TRAIN_SRC_LIST
"graph/passes/hccl_group_pass.cc"
"graph/passes/enter_pass.cc"
"graph/passes/assign_remove_pass.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc>
"graph/passes/inplace_support_check_pass.cc"
"graph/passes/flow_ctrl_pass.cc"
"graph/passes/global_step_insert_pass.cc"
"host_kernels/transpose_kernel.cc"
@@ -409,7 +409,7 @@ set(INFER_SRC_LIST
"graph/manager/graph_var_manager.cc"
"graph/manager/host_mem_manager.cc"
"graph/manager/rdma_pool_allocator.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc>
"graph/manager/host_mem_allocator.cc"
"graph/manager/graph_mem_allocator.cc"
"graph/manager/graph_caching_allocator.cc"
"model/ge_model.cc"
@@ -531,7 +531,7 @@ set(INFER_SRC_LIST
"graph/passes/for_pass.cc"
"graph/passes/enter_pass.cc"
"graph/passes/assign_remove_pass.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc>
"graph/passes/inplace_support_check_pass.cc"
"graph/passes/addn_pass.cc"
"graph/passes/common_subexpression_elimination_pass.cc"
"graph/passes/remove_same_const_pass.cc"
@@ -654,7 +654,6 @@ target_compile_definitions(ge_runner PRIVATE
FMK_SUPPORT_DUMP
DAVINCI_CLOUD
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_compile_options(ge_runner PRIVATE
@@ -718,7 +717,6 @@ target_compile_definitions(ge_compiler PRIVATE
FMK_HOST_INFER
COMPILE_OMG_PACKAGE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_compile_options(ge_compiler PRIVATE
@@ -806,7 +804,6 @@ endif()

target_compile_definitions(opensrc_ascendcl PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_compile_options(opensrc_ascendcl PRIVATE


+ 0
- 8
ge/client/ge_api.cc View File

@@ -32,9 +32,7 @@
#include "graph/common/ge_call_wrapper.h"
#include "register/op_registry.h"
#include "common/ge/tbe_plugin_manager.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "toolchain/plog.h"
#endif

using domi::OpRegistry;
using std::map;
@@ -132,11 +130,9 @@ Status GEInitializeImpl(const std::map<string, string> &options) {

// Initialize GE, prepare for execution, call GELib::Initialize
Status GEInitialize(const std::map<string, string> &options) {
#ifndef ONLY_COMPILE_OPEN_SRC
if (DlogReportInitialize() != SUCCESS) {
GELOGW("Dlog report device log initialize failed.");
}
#endif
return GEInitializeImpl(options);
}

@@ -151,11 +147,9 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) {
std::string val = option.second.GetString();
str_options[key] = val;
}
#ifndef ONLY_COMPILE_OPEN_SRC
if (DlogReportInitialize() != SUCCESS) {
GELOGW("Dlog report device log initialize failed.");
}
#endif
return GEInitializeImpl(str_options);
}

@@ -200,11 +194,9 @@ Status GEFinalize() {
// to avoid memory fragment, use malloc_trim to back free stack to system
malloc_trim(0);

#ifndef ONLY_COMPILE_OPEN_SRC
if (DlogReportFinalize() != SUCCESS) {
GELOGW("Dlog report device log finalize failed.");
}
#endif

GELOGT(TRACE_STOP, "GEFinalize finished");
return ret;


+ 0
- 3
ge/common/CMakeLists.txt View File

@@ -73,7 +73,6 @@ target_compile_definitions(ge_common PRIVATE
FMK_SUPPORT_DUMP
OS_CENTOS
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_compile_options(ge_common PRIVATE
@@ -133,7 +132,6 @@ target_compile_definitions(ge_common_static PRIVATE
$<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
$<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
LOG_CPP
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_compile_options(ge_common_static PRIVATE
@@ -182,7 +180,6 @@ target_compile_definitions(ge_common PRIVATE
FMK_SUPPORT_DUMP
OS_CENTOS
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_compile_options(ge_common PRIVATE


+ 1
- 3
ge/executor/CMakeLists.txt View File

@@ -28,7 +28,7 @@ set(SRC_LIST
"../graph/manager/trans_var_data_utils.cc"
"../graph/manager/util/debug.cc"
"../graph/manager/rdma_pool_allocator.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:../graph/manager/host_mem_allocator.cc>
"../graph/manager/host_mem_allocator.cc"
"../hybrid/node_executor/aicpu/aicpu_ext_info.cc"
"../model/ge_model.cc"
"../model/ge_root_model.cc"
@@ -175,7 +175,6 @@ target_compile_definitions(ge_executor PRIVATE
$<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
$<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
LOG_CPP
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(ge_executor PRIVATE
@@ -218,7 +217,6 @@ target_compile_definitions(ge_executor_shared PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
DAVINCI_SUPPORT_PROFILING
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(ge_executor_shared PRIVATE


+ 0
- 5
ge/ge_local_engine/CMakeLists.txt View File

@@ -31,7 +31,6 @@ target_compile_options(ge_local_engine PRIVATE

target_compile_definitions(ge_local_engine PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(ge_local_engine PRIVATE
@@ -73,7 +72,6 @@ target_compile_options(atc_ge_local_engine PRIVATE
target_compile_definitions(atc_ge_local_engine PRIVATE
COMPILE_OMG_PACKAGE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(atc_ge_local_engine PRIVATE
@@ -119,7 +117,6 @@ target_compile_options(ge_local_opskernel_builder PRIVATE

target_compile_definitions(ge_local_opskernel_builder PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(ge_local_opskernel_builder PRIVATE
@@ -161,7 +158,6 @@ target_compile_options(atc_ge_local_opskernel_builder PRIVATE

target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(atc_ge_local_opskernel_builder PRIVATE
@@ -209,7 +205,6 @@ target_compile_options(ge_local_opskernel_builder_static PRIVATE
target_compile_definitions(ge_local_opskernel_builder_static PRIVATE
google=ascend_private
LOG_CPP
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(ge_local_opskernel_builder_static PRIVATE


+ 0
- 38
ge/ge_local_engine/engine/host_cpu_engine.cc View File

@@ -26,7 +26,6 @@
#include "common/math/math_util.h"

namespace {
#ifndef ONLY_COMPILE_OPEN_SRC
#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \
case (DTYPE): { \
GeTensorPtr ge_tensor = nullptr; \
@@ -50,43 +49,6 @@ namespace {
named_outputs.emplace(tensor_name, tensor); \
break; \
}
#else
#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \
case (DTYPE): { \
GeTensorPtr ge_tensor = nullptr; \
if (need_create_flag) { \
GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \
std::unique_ptr<TYPE[]> buf(new (std::nothrow) TYPE[data_num]()); \
if (buf == nullptr) { \
GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \
static_cast<size_t>(sizeof(TYPE) * data_num)); \
return MEMALLOC_FAILED; \
} \
ge_tensor = MakeShared<GeTensor>(out_desc); \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \
GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \
return MEMALLOC_FAILED; \
} \
ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \
ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \
outputs.emplace_back(ge_tensor); \
} else { \
ge_tensor = outputs[i]; \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \
} \
auto tensor = TensorAdapter::AsTensor(*ge_tensor); \
auto tensor_name = op_desc->GetOutputNameByIndex(i); \
GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \
op_desc->GetName().c_str(), i); \
GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \
op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \
named_outputs.emplace(tensor_name, tensor); \
break; \
}
#endif
}

namespace ge {


+ 0
- 1
ge/ge_runtime/CMakeLists.txt View File

@@ -27,7 +27,6 @@ target_compile_options(ge_runtime PRIVATE

target_compile_definitions(ge_runtime PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(ge_runtime PRIVATE


+ 0
- 6
ge/graph/manager/graph_manager.cc View File

@@ -38,10 +38,8 @@
#include "graph/partition/stage_partition.h"
#include "graph/passes/addn_pass.h"
#include "graph/passes/bitcast_pass.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/passes/assign_remove_pass.h"
#include "graph/passes/inplace_support_check_pass.h"
#endif
#include "graph/passes/atomic_addr_clean_pass.h"
#include "graph/passes/attach_stream_label_pass.h"
#include "graph/passes/cast_remove_pass.h"
@@ -2269,20 +2267,16 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
ReshapeRemovePass reshape_remove_pass;
CondRemovePass condition_remove_pass;
BitcastPass bitcast_pass;
#ifndef ONLY_COMPILE_OPEN_SRC
AssignRemovePass assign_remove_pass;
InplaceSupportCheckPass inplace_support_check_pass;
#endif
names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass);
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass);
names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass);
names_to_passes.emplace_back("BitcastPass", &bitcast_pass);
#ifndef ONLY_COMPILE_OPEN_SRC
if (GetContext().GetHostExecFlag()) {
names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass);
names_to_passes.emplace_back("InplaceSupportCheckPass", &inplace_support_check_pass);
}
#endif
GE_TIMESTAMP_START(names_to_passes);
ret = GEPass(compute_graph).Run(names_to_passes);
GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses");


+ 0
- 8
ge/graph/manager/graph_mem_allocator.cc View File

@@ -19,9 +19,7 @@
#include <string>
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/rdma_pool_allocator.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/host_mem_allocator.h"
#endif
namespace ge {
void MemoryAllocator::Initialize(uint32_t device_id) {
GELOGI("MemoryAllocator::Initialize");
@@ -192,12 +190,10 @@ Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) {
GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed.");
return ge::INTERNAL_ERROR;
}
#ifndef ONLY_COMPILE_OPEN_SRC
if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) {
GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed.");
return ge::INTERNAL_ERROR;
}
#endif
return SUCCESS;
}

@@ -219,9 +215,7 @@ void MemManager::Finalize() noexcept {
// caching and rdma allocator use memory allocator, so finalize them first
FinalizeAllocatorMap(caching_allocator_map_);
FinalizeAllocatorMap(rdma_allocator_map_);
#ifndef ONLY_COMPILE_OPEN_SRC
FinalizeAllocatorMap(host_allocator_map_);
#endif
FinalizeAllocatorMap(memory_allocator_map_);
}

@@ -250,9 +244,7 @@ CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) {
RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) {
return Instance().GetAllocator(memory_type, rdma_allocator_map_);
}
#ifndef ONLY_COMPILE_OPEN_SRC
HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) {
return Instance().GetAllocator(memory_type, host_allocator_map_);
}
#endif
} // namespace ge

+ 0
- 6
ge/graph/manager/graph_mem_allocator.h View File

@@ -139,9 +139,7 @@ class MemoryAllocator {
using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;
class CachingAllocator;
class RdmaPoolAllocator;
#ifndef ONLY_COMPILE_OPEN_SRC
class HostMemAllocator;
#endif
class MemManager {
public:
MemManager();
@@ -150,9 +148,7 @@ class MemManager {
static MemoryAllocator *Instance(rtMemType_t memory_type);
CachingAllocator &CachingInstance(rtMemType_t memory_type);
RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type);
#ifndef ONLY_COMPILE_OPEN_SRC
HostMemAllocator &HostMemInstance(rtMemType_t memory_type);
#endif
MemManager(const MemManager &) = delete;
MemManager &operator=(const MemManager &) = delete;
///
@@ -240,9 +236,7 @@ class MemManager {
std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_;
std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_;
std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_;
#ifndef ONLY_COMPILE_OPEN_SRC
std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_;
#endif
std::recursive_mutex allocator_mutex_;
};
} // namespace ge


+ 1
- 1
ge/graph/manager/host_mem_allocator.h View File

@@ -27,7 +27,7 @@
namespace ge {
class HostMemAllocator {
public:
explicit HostMemAllocator(rtMemType_t) {}
explicit HostMemAllocator(rtMemType_t) {}
~HostMemAllocator() = default;

HostMemAllocator(const HostMemAllocator &) = delete;


+ 0
- 9
ge/graph/manager/host_mem_manager.cc View File

@@ -43,29 +43,20 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) {
return GE_GRAPH_MEMORY_ALLOC_FAILED;
}
mem_info.fd = output_para.fd;
#ifndef ONLY_COMPILE_OPEN_SRC
mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc([&output_para](std::unique_ptr<uint8_t[], deleter> &ptr) {
ptr.reset(reinterpret_cast<uint8_t *>(output_para.ptr));
},
[](uint8_t *ptr) {
ptr = nullptr;
});
#else
mem_info.host_address = reinterpret_cast<uint8_t *>(output_para.ptr);
#endif
mem_info.device_address = reinterpret_cast<uint8_t *>(output_para.devPtr);
return SUCCESS;
}

Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) {
GELOGD("SharedMemAllocator::DeAllocate");
#ifndef ONLY_COMPILE_OPEN_SRC
rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd,
mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address};
#else
rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd,
mem_info.host_address, mem_info.device_address};
#endif
rtError_t rt_ret = rtFreeHostSharedMemory(&free_para);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret);


+ 0
- 4
ge/graph/manager/host_mem_manager.h View File

@@ -42,11 +42,7 @@ struct SharedMemInfo {
uint64_t mem_size = 0;
int fd = 0;
uint8_t *device_address = nullptr;
#ifndef ONLY_COMPILE_OPEN_SRC
std::shared_ptr<AlignedPtr> host_aligned_ptr = nullptr;
#else
uint8_t *host_address = nullptr;
#endif
SharedMemInfo() = default;
SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {}
};


+ 5
- 1
ge/graph/optimize/graph_optimize.cc View File

@@ -127,6 +127,10 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std
}

Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) {
if (GetContext().GetHostExecFlag()) {
// graph exec on host, no need OptimizeOriginalGraph
return SUCCESS;
}
if (compute_graph == nullptr) {
GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeOriginalGraph]: compute_graph is nullptr.");
return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;
@@ -162,7 +166,7 @@ Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) {
Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_graph) {
GELOGD("OptimizeOriginalGraphJudgeInsert in");
if (GetContext().GetHostExecFlag()) {
// graph exec on host, no need OptimizeOriginalGraph
// graph exec on host, no need OptimizeOriginalGraphJudgeInsert
return SUCCESS;
}



+ 2
- 67
ge/graph/passes/assign_remove_pass.cc View File

@@ -19,6 +19,7 @@
#include "graph/utils/graph_utils.h"
#include "graph/debug/ge_attr_define.h"

namespace ge {
namespace {
constexpr uint32_t kValidInputNodeOutputNum = 1;
constexpr int32_t kAssignRefInputIndex = 0;
@@ -28,8 +29,6 @@ static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA,
ge::VARIABLE, ge::VARIABLEV2 };
}

namespace ge {
#ifndef ONLY_COMPILE_OPEN_SRC
Status AssignRemovePass::Run(NodePtr &node) {
GELOGD("AssignRemovePass running");

@@ -145,71 +144,7 @@ Status AssignRemovePass::TransformAttr(NodePtr &node) {
}
return SUCCESS;
}
#else
Status AssignRemovePass::Run(NodePtr &node) {
GELOGD("AssignRemovePass running");
if (node->GetType() != ASSIGN) {
GELOGD("No need run AssignRemovePass on [%s, %s].", node->GetName().c_str(), node->GetType().c_str());
return SUCCESS;
}

const auto &ref_in_anchor = node->GetInDataAnchor(kAssignRefInputIndex);
const auto &value_in_anchor = node->GetInDataAnchor(kAssignValueInputIndex);
if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) {
GELOGE(FAILED, "In data anchor is null, node:%s", node->GetName().c_str());
return FAILED;
}
const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor();
const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor();
if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) {
GELOGE(FAILED, "Peer data anchor is null, node:%s", node->GetName().c_str());
return FAILED;
}

if (IsCondMatch(node, ref_peer_anchor, value_peer_anchor)) {
///
/// variable not-const not-const
/// \ / |
/// \ / |
/// Assign ----> variable
/// | |
/// | |
/// node node
///
GELOGI("Optimization for assign_node %s start", node->GetName().c_str());
if (IsolateAndDeleteNode(node, {kAssignRefInputIndex}) != SUCCESS) {
GELOGE(FAILED, "Isolate and delete assign_node %s failed.", node->GetName().c_str());
return FAILED;
}
AddNodeDeleted(node);

const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc();
const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc();
if ((ref_input == nullptr) || (value_input == nullptr)) {
GELOGE(FAILED, "value input is null");
return FAILED;
}
if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME,
ref_input->GetName())) {
GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed.");
return FAILED;
}

// variable has and only has one input
if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str());
return FAILED;
}
if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str());
return FAILED;
}
}

GELOGD("AssignRemovePass success");
return SUCCESS;
}
#endif
///
/// @brief Check if need optimize for assign_node
/// @param [in] assign_node
@@ -218,7 +153,7 @@ Status AssignRemovePass::Run(NodePtr &node) {
/// @return Status
///
bool AssignRemovePass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_peer_anchor,
const OutDataAnchorPtr &value_peer_anchor) {
const OutDataAnchorPtr &value_peer_anchor) {
GELOGD("Check if assign_node %s match optimization condition, ref_input: %s, value_input: %s",
node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(),
value_peer_anchor->GetOwnerNode()->GetName().c_str());


+ 1
- 2
ge/graph/passes/assign_remove_pass.h View File

@@ -25,7 +25,6 @@ class AssignRemovePass : public BaseNodePass {
Status Run(NodePtr &node) override;

private:
#ifndef ONLY_COMPILE_OPEN_SRC
///
/// @brief Optimize for assign_node
/// @param [in] assign_node
@@ -39,7 +38,7 @@ class AssignRemovePass : public BaseNodePass {
/// @return Status
///
Status TransformAttr(NodePtr &node);
#endif
///
/// @brief Check if need optimize for assign_node
/// @param [in] assign_node


+ 0
- 6
ge/graph/passes/constant_fuse_same_pass.cc View File

@@ -115,21 +115,15 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph,
TypeUtils::DataTypeToSerialString(data_type).c_str());
continue;
}
#ifndef ONLY_COMPILE_OPEN_SRC
if ((type_size != 0) && (weight->MutableData().GetAlignedPtr() == nullptr)) {
GELOGW("aligned_ptr is null while size is not 0");
continue;
}
#endif
++insert_const_nums;

SameConstKey map_key;
map_key.data_size = type_size;
#ifndef ONLY_COMPILE_OPEN_SRC
map_key.aligned_ptr = weight->MutableData().GetAlignedPtr();
#else
map_key.data = weight->GetData().GetData();
#endif
map_key.data_type = data_type;
map_key.format = output_tensor->GetFormat();
map_key.shape = output_tensor->GetShape().GetDims();


+ 0
- 13
ge/graph/passes/constant_fuse_same_pass.h View File

@@ -21,20 +21,14 @@
#include <set>
#include <utility>
#include <vector>
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/aligned_ptr.h"
#endif
#include "graph/types.h"
#include "inc/graph_pass.h"

namespace ge {
struct SameConstKey {
int data_size;
#ifndef ONLY_COMPILE_OPEN_SRC
std::shared_ptr<AlignedPtr> aligned_ptr;
#else
const uint8_t *data;
#endif
DataType data_type;
Format format;
std::vector<int64_t> shape;
@@ -44,19 +38,12 @@ struct SameConstKey {
if (data_size != key.data_size) {
return data_size < key.data_size;
}
#ifndef ONLY_COMPILE_OPEN_SRC
if (data_size != 0) {
int ret = memcmp(aligned_ptr->Get(), key.aligned_ptr->Get(), data_size);
if (ret != 0) {
return ret < 0;
}
}
#else
int ret = memcmp(data, key.data, data_size);
if (ret != 0) {
return ret < 0;
}
#endif
if (data_type != key.data_type) {
return data_type < key.data_type;
}


+ 1
- 2
ge/graph/passes/inplace_support_check_pass.cc View File

@@ -19,6 +19,7 @@
#include "graph/utils/graph_utils.h"
#include "graph/debug/ge_attr_define.h"

namespace ge {
namespace {
constexpr uint32_t kInplaceSupportOutputIndex = 0;
constexpr uint32_t kInplaceSupportOutputNum = 1;
@@ -26,8 +27,6 @@ static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge:
ge::CONSTANT, ge::CONSTANTOP,
ge::VARIABLE, ge::VARIABLEV2 };
}

namespace ge {
Status InplaceSupportCheckPass::Run(NodePtr &node) {
GELOGD("InplaceSupportCheckPass running");
if (node->GetAllOutDataAnchorsSize() != kInplaceSupportOutputNum) {


+ 0
- 11
ge/graph/preprocess/graph_preprocess.cc View File

@@ -38,9 +38,6 @@
#include "graph/passes/aicpu_constant_folding_pass.h"
#include "graph/passes/assert_pass.h"
#include "ge/ge_api_types.h"
#ifdef ONLY_COMPILE_OPEN_SRC
#include "graph/passes/assign_remove_pass.h"
#endif
#include "graph/passes/common_subexpression_elimination_pass.h"
#include "graph/passes/cond_pass.h"
#include "graph/passes/cond_remove_pass.h"
@@ -1865,9 +1862,6 @@ Status GraphPrepare::PrepareOptimize() {
VarIsInitializedOpPass var_is_initialized_pass;
ParallelConcatStartOpPass parallel_concat_start_op_pass;
IdentityPass identity_pass(false);
#ifdef ONLY_COMPILE_OPEN_SRC
AssignRemovePass assign_remove_pass;
#endif
SnapshotPass snapshot_pass;
if (!options_.train_graph_flag) {
names_to_passes.emplace_back("DropOutPass", &dropout_pass);
@@ -1882,11 +1876,6 @@ Status GraphPrepare::PrepareOptimize() {
names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass);
names_to_passes.emplace_back("ParallelConcatStartOpPass", &parallel_concat_start_op_pass);
names_to_passes.emplace_back("IdentityPass", &identity_pass);
#ifdef ONLY_COMPILE_OPEN_SRC
if (GetContext().GetHostExecFlag()) {
names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass);
}
#endif
GE_TIMESTAMP_START(names_to_passes);
ret = ge_passes.Run(names_to_passes);
GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses");


+ 0
- 5
ge/host_cpu_engine/CMakeLists.txt View File

@@ -25,7 +25,6 @@ target_compile_options(host_cpu_engine PRIVATE

target_compile_definitions(host_cpu_engine PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(host_cpu_engine PRIVATE
@@ -66,7 +65,6 @@ target_compile_options(atc_host_cpu_engine PRIVATE
target_compile_definitions(atc_host_cpu_engine PRIVATE
COMPILE_OMG_PACKAGE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(atc_host_cpu_engine PRIVATE
@@ -111,7 +109,6 @@ target_compile_options(host_cpu_opskernel_builder PRIVATE

target_compile_definitions(host_cpu_opskernel_builder PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(host_cpu_opskernel_builder PRIVATE
@@ -152,7 +149,6 @@ target_compile_options(atc_host_cpu_opskernel_builder PRIVATE

target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(atc_host_cpu_opskernel_builder PRIVATE
@@ -199,7 +195,6 @@ target_compile_options(host_cpu_opskernel_builder_static PRIVATE
target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE
google=ascend_private
LOG_CPP
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(host_cpu_opskernel_builder_static PRIVATE


+ 0
- 10
ge/hybrid/common/npu_memory_allocator.cc View File

@@ -20,9 +20,7 @@
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/rdma_pool_allocator.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/host_mem_allocator.h"
#endif

namespace ge {
namespace hybrid {
@@ -67,11 +65,7 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) {
if (mem_type == RDMA_HBM) {
buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_);
} else if (mem_type == HOST_DDR) {
#ifndef ONLY_COMPILE_OPEN_SRC
buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size);
#else
buffer = malloc(allocate_size);
#endif
} else {
if (allocate_size > kMaxHbmMemorySize) {
GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size);
@@ -108,11 +102,7 @@ void NpuMemoryAllocator::Deallocate(void *data, MemStorageType mem_type) {
if (mem_type == RDMA_HBM) {
MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_);
} else if (mem_type == HOST_DDR) {
#ifndef ONLY_COMPILE_OPEN_SRC
MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(data);
#else
free(data);
#endif
} else {
MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_);
}


+ 0
- 14
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -25,10 +25,8 @@
#include "graph/manager/graph_var_manager.h"
#include "graph/manager/host_mem_manager.h"
#include "graph/manager/trans_var_data_utils.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/host_mem_allocator.h"
#endif
#include "graph/utils/graph_utils.h"
#include "hybrid/common/npu_memory_allocator.h"
#include "hybrid/node_executor/node_executor.h"
@@ -865,7 +863,6 @@ Status HybridModelBuilder::InitConstantOps() {

std::unique_ptr<TensorValue> var_tensor;
if (GetContext().GetHostExecFlag()) {
#ifndef ONLY_COMPILE_OPEN_SRC
GE_CHECK_NOTNULL(ge_tensor);
// Address for eigen kernel should be aligned with 16 bytes
// Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned
@@ -878,11 +875,6 @@ Status HybridModelBuilder::InitConstantOps() {
}
var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(),
aligned_tensor.GetData().size()));
#else
auto buffer = ge_tensor->MutableData();
GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize());
var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize()));
#endif
} else {
GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor));
GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize());
@@ -937,7 +929,6 @@ Status HybridModelBuilder::InitVariableTensors() {
GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str());
return GE_GRAPH_MALLOC_FAILED;
}
#ifndef ONLY_COMPILE_OPEN_SRC
if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr,
tensor_size) == nullptr) {
GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed.");
@@ -947,11 +938,6 @@ Status HybridModelBuilder::InitVariableTensors() {

std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(),
tensor_size));
#else
GELOGD("Host variable [%s] malloc success.", it.first.c_str());

std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size));
#endif
GE_CHECK_NOTNULL(tensor);
hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor));
}


+ 0
- 15
ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc View File

@@ -18,10 +18,8 @@
#include "hybrid/node_executor/host_cpu/kernel_factory.h"
#include "graph/passes/folding_pass.h"
#include "hybrid/model/hybrid_model.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/host_mem_allocator.h"
#endif
#include "ge_local_engine/engine/host_cpu_engine.h"

namespace ge {
@@ -54,18 +52,11 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
auto input_desc_ptr = context.GetInputDesc(i);
GE_CHECK_NOTNULL(input_desc_ptr);
const auto &input_desc = *input_desc_ptr;
#ifndef ONLY_COMPILE_OPEN_SRC
auto tensor = context.GetInput(i);
GE_CHECK_NOTNULL(tensor);
auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData());
GE_CHECK_NOTNULL(item.second);
auto in_tensor = MakeShared<GeTensor>(input_desc, item.second, item.first);
#else
GE_CHECK_NOTNULL(context.GetInput(i));
auto in_tensor = MakeShared<GeTensor>(input_desc,
reinterpret_cast<const uint8_t *>(context.GetInput(i)->GetData()),
context.GetInput(i)->GetSize());
#endif
GE_CHECK_NOTNULL(in_tensor);
in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType());
in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape());
@@ -84,15 +75,9 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
}
auto tensor = context.GetOutput(i);
GE_CHECK_NOTNULL(tensor);
#ifndef ONLY_COMPILE_OPEN_SRC
auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData());
GE_CHECK_NOTNULL(item.second);
auto out_tensor = MakeShared<GeTensor>(output_desc, item.second, item.first);
#else
auto out_tensor = MakeShared<GeTensor>(output_desc,
reinterpret_cast<const uint8_t *>(tensor->GetData()),
tensor->GetSize());
#endif
GE_CHECK_NOTNULL(out_tensor);
out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType());
out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape());


+ 0
- 3
ge/offline/CMakeLists.txt View File

@@ -30,7 +30,6 @@ target_compile_definitions(atc PRIVATE
COMPILE_OMG_PACKAGE
google=ascend_private
LOG_CPP
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(atc PRIVATE
@@ -93,7 +92,6 @@ target_compile_definitions(atc_atc.bin PRIVATE
COMPILE_OMG_PACKAGE
google=ascend_private
LOG_CPP
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(atc_atc.bin PRIVATE
@@ -154,7 +152,6 @@ target_compile_options(fwk_atc.bin PRIVATE
-O2
-Wno-deprecated-declarations
-fno-common
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_compile_definitions(fwk_atc.bin PRIVATE


+ 0
- 1
ge/plugin/engine/CMakeLists.txt View File

@@ -14,7 +14,6 @@ target_compile_options(engine PRIVATE
target_compile_definitions(engine PRIVATE
REUSE_MEMORY=1
PROTOBUF_INLINE_NOT_IN_HEADERS=0
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_include_directories(engine PRIVATE


+ 0
- 3
inc/framework/common/ge_types.h View File

@@ -37,10 +37,7 @@ enum FrameworkType {
MINDSPORE = 1,
TENSORFLOW = 3,
ANDROID_NN,
#ifndef ONLY_COMPILE_OPEN_SRC
ONNX,
#endif
FRAMEWORK_RESERVED,
};

enum OpEngineType {


+ 0
- 2
inc/framework/omg/parser/model_parser.h View File

@@ -65,7 +65,6 @@ class ModelParser {
*/
virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0;

#ifndef ONLY_COMPILE_OPEN_SRC
/**
* @ingroup domi_omg
* @brief Parse relevant data from memory and save it to graph
@@ -77,7 +76,6 @@ class ModelParser {
* @author
*/
virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0;
#endif

/**
* @ingroup domi_omg


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit fe37bc343ea52c76d35e9e9ec83cea0151bfa900
Subproject commit f08320a6d699f5b537bf66da572bf225b9cd330e

+ 1
- 1
parser

@@ -1 +1 @@
Subproject commit 336cd3107253d3fe41cfb9fec2db62b5f3d8a33b
Subproject commit b2df31dc5810283e2e483df5ba9517e2ece132a0

+ 1
- 0
tests/depends/cce/CMakeLists.txt View File

@@ -46,6 +46,7 @@ set(SRCS
"${GE_CODE_DIR}/metadef/graph/anchor.cc"
"${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc"
"${GE_CODE_DIR}/metadef/graph/buffer.cc"
"${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc"
"${GE_CODE_DIR}/metadef/graph/compute_graph.cc"
"${GE_CODE_DIR}/metadef/graph/graph.cc"
"${GE_CODE_DIR}/metadef/graph/model.cc"


+ 1
- 1
tests/ut/common/graph/CMakeLists.txt View File

@@ -67,6 +67,7 @@ set(SRC_FILES
"${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc"
"${GE_CODE_DIR}/metadef/graph/attr_value.cc"
"${GE_CODE_DIR}/metadef/graph/buffer.cc"
"${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc"
"${GE_CODE_DIR}/metadef/graph/compute_graph.cc"
"${GE_CODE_DIR}/metadef/graph/ge_attr_define.cc"
"${GE_CODE_DIR}/metadef/graph/graph.cc"
@@ -110,7 +111,6 @@ target_compile_options(ut_libgraph PRIVATE

target_compile_definitions(ut_libgraph PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_link_libraries(ut_libgraph


+ 1
- 1
tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc View File

@@ -230,7 +230,7 @@ TEST_F(UtestGeTensor, test_tensor_invalid_null) {
GeTensor tensor(msg_owner, nullptr);
EXPECT_EQ(tensor.GetData().size(), 0);
EXPECT_EQ(tensor.MutableData().size(), 0);
EXPECT_EQ(tensor.SetData(Buffer(100)), ge::GRAPH_PARAM_INVALID);
EXPECT_EQ(tensor.SetData(Buffer(100)), GRAPH_SUCCESS);

TensorUtils::SetWeightSize(tensor.MutableTensorDesc(), 100);
EXPECT_EQ(TensorUtils::GetWeightSize(tensor), 0);


+ 4
- 14
tests/ut/ge/CMakeLists.txt View File

@@ -89,6 +89,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc"
"${GE_CODE_DIR}/metadef/graph/attr_value.cc"
"${GE_CODE_DIR}/metadef/graph/buffer.cc"
"${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc"
"${GE_CODE_DIR}/metadef/graph/compute_graph.cc"
"${GE_CODE_DIR}/metadef/graph/graph.cc"
"${GE_CODE_DIR}/metadef/graph/gnode.cc"
@@ -227,6 +228,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/passes/for_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/enter_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/assign_remove_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/inplace_support_check_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/addn_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/common_subexpression_elimination_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/transop_symmetry_elimination_pass.cc"
@@ -303,6 +305,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/common/local_context.cc"
"${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc"
"${GE_CODE_DIR}/ge/common/dump/dump_op.cc"
"${GE_CODE_DIR}/ge/common/model_saver.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc"
@@ -370,6 +373,7 @@ set(GRAPH_LOAD_COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc"
"${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc"
"${GE_CODE_DIR}/ge/common/thread_pool.cc"
)

@@ -723,7 +727,6 @@ add_library(ge_ut_common STATIC ${COMMON_SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS})

target_compile_definitions(ge_ut_common PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_link_libraries(ge_ut_common PRIVATE
@@ -738,7 +741,6 @@ add_library(ge_ut_common_format STATIC ${COMMON_SRC_FILES} ${COMMON_FORMAT_SRC_F

target_compile_definitions(ge_ut_common_format PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_link_libraries(ge_ut_common_format PRIVATE
@@ -795,7 +797,6 @@ add_library(ge_load_common STATIC ${GRAPH_LOAD_COMMON_SRC_FILES} ${PROTO_SRCS} $

target_compile_definitions(ge_load_common PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_link_libraries(ge_load_common PRIVATE
@@ -810,7 +811,6 @@ add_library(ge_execute_common STATIC ${GRAPH_EXECUTE_COMMON_SRC_FILES} ${PROTO_S

target_compile_definitions(ge_execute_common PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_link_libraries(ge_execute_common PRIVATE
@@ -825,7 +825,6 @@ add_library(ge_build_common STATIC ${GRAPH_BUILD_COMMON_SRC_FILES} ${PROTO_SRCS}

target_compile_definitions(ge_build_common PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_link_libraries(ge_build_common PRIVATE
@@ -898,10 +897,6 @@ target_compile_options(ut_libge_others_utest PRIVATE
-g --coverage -fprofile-arcs -ftest-coverage
)

target_compile_definitions(ut_libge_others_utest PRIVATE
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_link_libraries(ut_libge_others_utest
$<BUILD_INTERFACE:intf_pub>
ge_load_common ge_execute_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov
@@ -919,10 +914,6 @@ target_compile_options(ut_libge_kernel_utest PRIVATE
-g --coverage -fprofile-arcs -ftest-coverage
)

target_compile_definitions(ut_libge_kernel_utest PRIVATE
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_link_libraries(ut_libge_kernel_utest
$<BUILD_INTERFACE:intf_pub>
ge_load_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov
@@ -943,7 +934,6 @@ target_compile_options(ut_libge_distinct_load_utest PRIVATE

target_compile_definitions(ut_libge_distinct_load_utest PRIVATE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_link_libraries(ut_libge_distinct_load_utest


+ 60
- 60
third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h View File

@@ -1,60 +1,60 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OP_TYPE_LIST_H_
#define AICPU_OP_TYPE_LIST_H_
enum OpKernelType {
TF_KERNEL,
CPU_KERNEL
};
enum ReturnCode {
OP_TYPE_NOT_SUPPORT,
FORMAT_NOT_SUPPORT,
DTYPE_NOT_SUPPORT
};
#pragma pack(push, 1)
//One byte alignment
struct SysOpInfo {
uint64_t opLen;
uint64_t opType;
OpKernelType kernelsType;
};
struct OpParamInfo {
uint64_t num;
uint64_t dtypeList;
uint64_t formatList;
};
struct SysOpCheckInfo {
uint64_t opListNum;
uint64_t offSetLen;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};
struct SysOpCheckResp {
uint64_t opListNum;
bool isWithoutJson;
uint64_t returnCodeList;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};
#pragma pack(pop)
#endif // AICPU_OP_TYPE_LIST_H_
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OP_TYPE_LIST_H_
#define AICPU_OP_TYPE_LIST_H_
enum OpKernelType {
TF_KERNEL,
CPU_KERNEL
};
enum ReturnCode {
OP_TYPE_NOT_SUPPORT,
FORMAT_NOT_SUPPORT,
DTYPE_NOT_SUPPORT
};
#pragma pack(push, 1)
//One byte alignment
struct SysOpInfo {
uint64_t opLen;
uint64_t opType;
OpKernelType kernelsType;
};
struct OpParamInfo {
uint64_t num;
uint64_t dtypeList;
uint64_t formatList;
};
struct SysOpCheckInfo {
uint64_t opListNum;
uint64_t offSetLen;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};
struct SysOpCheckResp {
uint64_t opListNum;
bool isWithoutJson;
uint64_t returnCodeList;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};
#pragma pack(pop)
#endif // AICPU_OP_TYPE_LIST_H_

+ 26
- 102
third_party/fwkacllib/inc/hccl/hcom.h View File

@@ -33,15 +33,6 @@ extern "C" {



/**
* @brief Get the rank number in the group.
*
* @param group A string identifying the group name.
* @param rankSize A pointer identifying the rank number.
* @return HcclResult
*/
HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);

/**
* @brief Get the rank number in the group.
*
@@ -51,15 +42,6 @@ HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);
*/
HcclResult HcomGetRankSize(const char *group, u32 *rankSize);

/**
* @brief Get the rank number of this rank's server within the group.
*
* @param group A string identifying the group name.
* @param localRankSize A pointer identifying the rank number.
* @return HcclResult
*/
HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);

/**
* @brief Get the rank number of this rank's server within the group.
*
@@ -69,15 +51,6 @@ HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);
*/
HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize);

/**
* @brief Get the rank id of this rank.
*
* @param group A string identifying the group name.
* @param rankId A pointer identifying the rank id.
* @return HcclResult
*/
HcclResult hcom_get_rank_id(const char *group, u32 *rankId);

/**
* @brief Get the rank id of this rank.
*
@@ -87,15 +60,6 @@ HcclResult hcom_get_rank_id(const char *group, u32 *rankId);
*/
HcclResult HcomGetRankId(const char *group, u32 *rankId);

/**
* @brief Get the local rank id of this rank's server within the group.
*
* @param group A string identifying the group name.
* @param localRankId A pointer identifying the local rank id.
* @return HcclResult
*/
HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);

/**
* @brief Get the local rank id of this rank's server within the group.
*
@@ -105,16 +69,6 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
*/
HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId);

/**
* @brief Get the world rank id according to the group rank id.
*
* @param group A string identifying the group name.
* @param groupRank An integer(u32) identifying the group rank id.
* @param worldRank A pointer identifying the world rank id.
* @return HcclResult
*/
HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank);

/**
* @brief Get the world rank id according to the group rank id.
*
@@ -125,16 +79,6 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank,
*/
HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank);

/**
* @brief Get the group rank id according to the world rank id.
*
* @param worldRank An integer(u32) identifying the world rank id.
* @param group A string identifying the group name.
* @param groupRank A pointer identifying the group rank id.
* @return HcclResult
*/
HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank);

/**
* @brief Get the group rank id according to the world rank id.
*
@@ -145,16 +89,6 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group,
*/
HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank);

/**
* @brief Create group.
*
* @param group A string identifying the group name.
* @param rankNum An integer(u32) identifying the number of ranks in the group.
* @param rankIds A list identifying the ranks in the group.
* @return HcclResult
*/
HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds);

/**
* @brief Create group.
*
@@ -165,14 +99,6 @@ HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds);
*/
HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds);

/**
* @brief Destroy group
*
* @param group A string identifying the group name.
* @return HcclResult
*/
HcclResult hcom_destroy_group(const char *group);

/**
* @brief Destroy group
*
@@ -189,46 +115,54 @@ HcclResult HcomDestroyGroup(const char *group);
* @param IdxList A list identifying the index of end gradient in each segment.
* @return HcclResult
*/
extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList);
extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList);

/**
* @brief Set the gradient split strategy with in the group, according to gradient index.
* @brief Set the gradient split strategy with in the group, according to gradient data size.
*
* @param group A string identifying the group name.
* @param segmentNum An integer(u32) identifying the segments number of gradients.
* @param IdxList A list identifying the index of end gradient in each segment.
* @param sizeList A list identifying the percent of each segment.
* @return HcclResult
*/
extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList);
extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);

/**
* @brief Set the gradient split strategy with in the group, according to gradient data size.
* @brief Initialize hcom executor.
*
* @param group A string identifying the group name.
* @param segmentNum An integer(u32) identifying the segments number of gradients.
* @param sizeList A list identifying the percent of each segment.
* @param void
* @return HcclResult
*/
extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList);
HcclResult HcomExecInitialize();

/**
* @brief Set the gradient split strategy with in the group, according to gradient data size.
* @brief Finalize hcom executor.
*
* @param group A string identifying the group name.
* @param segmentNum An integer(u32) identifying the segments number of gradients.
* @param sizeList A list identifying the percent of each segment.
* @param void
* @return HcclResult
*/
extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);
HcclResult HcomExecFinalize();

/**
* @brief Register memories and init resources for remote access.
* @brief Put collective communication operation into hcom executor.
*
* @param addrList memory addresses for remote access.
* @param count number of remote memory addresses.
* @param opInfo information about collective communication operation.
* @param callback callback after collective communication operation.
* @return HcclResult
*/
extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count);
HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);

/**
* @brief Put remote access operation into hcom executor.
*
* @param remoteAccessType operation type (read or write).
* @param addrInfos address information about collective communication operation.
* @param callback callback after collective communication operation.
* @return HcclResult
*/
HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
std::function<void(HcclResult status)> callback);

/**
* @brief Register memories and init resources for remote access.
@@ -239,16 +173,6 @@ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrLis
*/
extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count);

HcclResult HcomExecInitialize();

HcclResult HcomExecFinalize();

HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);

HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
std::function<void(HcclResult status)> callback);

#ifdef __cplusplus
}
#endif // __cplusplus


+ 2
- 1
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h View File

@@ -279,8 +279,9 @@ typedef struct {
#define M_NAME_MAX MAX_FNAME

#define M_F_OK F_OK
#define M_R_OK R_OK
#define M_X_OK X_OK
#define M_W_OK W_OK
#define M_R_OK R_OK

#define MM_DT_DIR DT_DIR
#define MM_DT_REG DT_REG


+ 1
- 0
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h View File

@@ -322,6 +322,7 @@ typedef VOID (*mmPf)(VOID);
#define M_NAME_MAX _MAX_FNAME

#define M_F_OK 0
#define M_X_OK 1
#define M_W_OK 2
#define M_R_OK 4



+ 49
- 0
third_party/fwkacllib/inc/register/op_kernel_registry.h View File

@@ -0,0 +1,49 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_REGISTER_OP_KERNEL_REGISTRY_H_
#define INC_REGISTER_OP_KERNEL_REGISTRY_H_
#include <memory>
#include <string>
#include "register/register_types.h"
#include "register.h"

namespace ge {
class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry {
public:
using CreateFn = HostCpuOp* (*)();
~OpKernelRegistry();

static OpKernelRegistry& GetInstance() {
static OpKernelRegistry instance;
return instance;
}

bool IsRegistered(const std::string &op_type);

void RegisterHostCpuOp(const std::string &op_type, CreateFn create_fn);

std::unique_ptr<HostCpuOp> CreateHostCpuOp(const std::string &op_type);

private:
OpKernelRegistry();
class OpKernelRegistryImpl;
/*lint -e148*/
std::unique_ptr<OpKernelRegistryImpl> impl_;
};
} // namespace ge

#endif // INC_REGISTER_OP_KERNEL_REGISTRY_H_

+ 96
- 0
third_party/fwkacllib/inc/register/op_registry.h View File

@@ -0,0 +1,96 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_REGISTER_OP_REGISTRY_H_
#define INC_REGISTER_OP_REGISTRY_H_

#include <limits.h>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>

#include "register/register.h"

namespace domi {
enum RemoveInputType {
OMG_MOVE_TYPE_DTYPE = 0,
OMG_MOVE_TYPE_VALUE,
OMG_MOVE_TYPE_SHAPE,
OMG_MOVE_TYPE_FORMAT,
OMG_MOVE_TYPE_AXIS,
OMG_MOVE_TYPE_SCALAR_VALUE,
OMG_REMOVE_TYPE_WITH_COND = 1000,
OMG_REMOVE_INPUT_WITH_ORIGINAL_TYPE,
OMG_INPUT_REORDER,
};

struct RemoveInputConfigure {
int inputIdx = INT_MAX;
std::string attrName;
RemoveInputType moveType;
bool attrValue = false;
std::string originalType;
std::vector<int> input_order;
};

class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry {
public:
static OpRegistry *Instance();

std::vector<OpRegistrationData> registrationDatas;

bool Register(const OpRegistrationData &reg_data);

domi::ImplyType GetImplyType(const std::string &op_type);

void GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const domi::ImplyType &imply_type);

domi::ParseParamFunc GetParseParamFunc(const std::string &op_type, const std::string &ori_type);

domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &ori_type);

domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type, const std::string &ori_type);

domi::FusionParseParamByOpFunc GetFusionParseParamByOpFunc(const std::string &op_type,
const std::string &ori_type);

domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type);

Status GetParseSubgraphPostFunc(const std::string &op_type, domi::ParseSubgraphFuncV2 &parse_subgraph_func);

domi::ImplyType GetImplyTypeByOriOpType(const std::string &ori_optype);

const std::vector<RemoveInputConfigure> &GetRemoveInputConfigure(const std::string &ori_optype) const;

bool GetOmTypeByOriOpType(const std::string &ori_optype, std::string &om_type);

ParseOpToGraphFunc GetParseOpToGraphFunc(const std::string &op_type, const std::string &ori_type);

private:
std::unordered_map<std::string, domi::ImplyType> op_run_mode_map_;
std::unordered_map<std::string, ParseParamFunc> op_parse_params_fn_map_;
std::unordered_map<std::string, ParseParamByOpFunc> parse_params_by_op_func_map_;
std::unordered_map<std::string, FusionParseParamFunc> fusion_op_parse_params_fn_map_;
std::unordered_map<std::string, FusionParseParamByOpFunc> fusion_parse_params_by_op_fn_map_;
std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_;
std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_;
std::unordered_map<std::string, std::string> origin_type_to_om_type_;
std::unordered_map<std::string, ParseOpToGraphFunc> parse_op_to_graph_fn_map_;
std::unordered_map<std::string, ParseSubgraphFuncV2> op_types_to_parse_subgraph_post_func_v2_;
};
} // namespace domi
#endif // INC_REGISTER_OP_REGISTRY_H_

+ 8
- 17
third_party/fwkacllib/inc/runtime/base.h View File

@@ -81,26 +81,17 @@ typedef enum tagRtLimitType {
} rtLimitType_t;

typedef struct rtExceptionInfo {
uint32_t taskid;
uint32_t streamid;
uint32_t tid;
uint32_t deviceid;
uint32_t taskid;
uint32_t streamid;
uint32_t tid;
uint32_t deviceid;
uint32_t retcode;
} rtExceptionInfo;

typedef struct rtTaskFailInfo {
uint32_t taskid;
uint32_t streamid;
uint32_t tid;
uint32_t deviceid;
uint32_t retcode;
} rtTaskFailInfo;

typedef void (*rtErrorCallback)(rtExceptionType);

typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo);

typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo);

typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen);

/**
@@ -143,13 +134,13 @@ RTS_API rtError_t rtProfilerConfig(uint16_t type);
* @ingroup profiling_base
* @brief start rts profiler.
*/
RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList);
RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList);

/**
* @ingroup profiling_base
* @brief stop rts profiler.
*/
RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList);
RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList);

/**
* @ingroup profiling_base
@@ -209,7 +200,7 @@ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCal
* @param [out] NA
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback);
RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallback callback);

/**
* @ingroup dvrt_base


+ 5
- 1
third_party/fwkacllib/inc/runtime/config.h View File

@@ -42,6 +42,7 @@ typedef enum tagRtChipType {
CHIP_MDC,
CHIP_LHISI,
CHIP_DC,
CHIP_CLOUD_V2,
CHIP_END,
} rtChipType_t;

@@ -62,6 +63,7 @@ typedef enum tagRtPlatformType {
PLATFORM_LHISI_ES,
PLATFORM_LHISI_CS,
PLATFORM_DC,
PLATFORM_CLOUD_V2,
PLATFORM_END,
} rtPlatformType_t;

@@ -119,7 +121,9 @@ typedef struct tagRtMemoryConfig {
uint32_t compilerSize;
} rtMemoryConfig_t;

typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t;
typedef struct tagRtPlatformConfig {
uint32_t platformConfig;
} rtPlatformConfig_t;

/**
* @ingroup


+ 1
- 1
third_party/fwkacllib/inc/runtime/context.h View File

@@ -47,7 +47,7 @@ typedef struct tagRtGroupInfo {
uint32_t aivectorNum;
uint32_t sdmaNum;
uint32_t activeStreamNum;
void* extrPtr;
void *extrPtr;
} rtGroupInfo_t;

/**


+ 1
- 1
third_party/fwkacllib/inc/runtime/dev.h View File

@@ -185,7 +185,7 @@ RTS_API rtError_t rtDisableP2P(uint32_t devIdDes, uint32_t phyIdSrc);
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtDeviceCanAccessPeer(int32_t* canAccessPeer, uint32_t device, uint32_t peerDevice);
RTS_API rtError_t rtDeviceCanAccessPeer(int32_t *canAccessPeer, uint32_t device, uint32_t peerDevice);

/**
* @ingroup dvrt_dev


+ 1
- 1
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -387,7 +387,7 @@ typedef void *rtModel_t;
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag);
RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag);

/**
* @ingroup rt_kernel


+ 17
- 17
third_party/fwkacllib/inc/runtime/mem.h View File

@@ -159,11 +159,11 @@ typedef struct rtAiCoreMemorySize {
* @ingroup dvrt_mem
* @brief memory type
*/
typedef enum tagRtMemoryType {
RT_MEMORY_TYPE_HOST = 1,
RT_MEMORY_TYPE_DEVICE = 2 ,
RT_MEMORY_TYPE_SVM = 3,
RT_MEMORY_TYPE_DVPP = 4
typedef enum tagRtMemoryType {
RT_MEMORY_TYPE_HOST = 1,
RT_MEMORY_TYPE_DEVICE = 2,
RT_MEMORY_TYPE_SVM = 3,
RT_MEMORY_TYPE_DVPP = 4
} rtMemoryType_t;

/**
@@ -179,23 +179,23 @@ typedef struct tagRtPointerAttributes {


typedef struct rtMallocHostSharedMemoryIn {
const char* name;
const uint64_t size;
uint32_t flag;
const char *name;
const uint64_t size;
uint32_t flag;
} rtMallocHostSharedMemoryIn;

typedef struct rtMallocHostSharedMemoryOut {
int fd;
void* ptr;
void* devPtr;
int fd;
void *ptr;
void *devPtr;
} rtMallocHostSharedMemoryOut;

typedef struct rtFreeHostSharedMemoryIn {
const char* name;
const uint64_t size;
int fd;
void* ptr;
void* devPtr;
const char *name;
const uint64_t size;
int fd;
void *ptr;
void *devPtr;
} rtFreeHostSharedMemoryIn;


@@ -267,7 +267,7 @@ RTS_API rtError_t rtFreeHost(void *hostPtr);
*/

RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in,
rtMallocHostSharedMemoryOut *out);
rtMallocHostSharedMemoryOut *out);

/**
* @ingroup dvrt_mem


+ 1
- 0
third_party/fwkacllib/inc/runtime/stream.h View File

@@ -36,6 +36,7 @@ extern "C" {
#define RT_STREAM_FORBIDDEN_DEFAULT (0x10)
#define RT_STREAM_HEAD (0x20)
#define RT_STREAM_PRIMARY_DEFAULT (0x40)
#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80)

/**
* @ingroup stream_type


+ 52
- 0
third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h View File

@@ -0,0 +1,52 @@
/**
* @file ExternalSoftDp.h
*
* Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/

#ifndef EXTERNALSOFTDP_H
#define EXTERNALSOFTDP_H

#include <stdint.h>

extern "C" {
struct SoftDpProcsessInfo {
uint8_t* inputBuffer;
uint32_t inputBufferSize;

uint8_t* outputBuffer;
uint32_t outputBufferSize;

uint32_t outputWidth;
uint32_t outputHeight;

uint32_t reserved;
};

struct DpCropInfo {
uint32_t left;
uint32_t right;
uint32_t up;
uint32_t down;
};

/*
* @brief decode and resize interface
* @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct
* @return success: return 0, fail: return error number
*/
uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo);

/*
* @brief decode crop and resize interface
* @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct
* @param [in] const DpCropInfo& cropInfo: crop struct
* @return success: return 0, fail: return error number
*/
uint32_t DecodeAndCropAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo, const DpCropInfo& cropInfo);
}
#endif // EXTERNALSOFTDP_H

+ 59
- 0
third_party/fwkacllib/inc/toolchain/plog.h View File

@@ -0,0 +1,59 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _PLOG_H_
#define _PLOG_H_
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#ifndef LINUX
#define LINUX 0
#endif // LINUX
#ifndef WIN
#define WIN 1
#endif
#ifndef OS_TYPE
#define OS_TYPE 0
#endif // OS_TYPE
#if (OS_TYPE == LINUX)
#define DLL_EXPORT __attribute__((visibility("default")))
#else
#define DLL_EXPORT _declspec(dllexport)
#endif
/**
* @ingroup plog
* @brief DlogReportInitialize: init log in service process before all device setting.
* @return: 0: SUCCEED, others: FAILED
*/
DLL_EXPORT int DlogReportInitialize();
/**
* @ingroup plog
* @brief DlogReportFinalize: release log resource in service process after all device reset.
* @return: 0: SUCCEED, others: FAILED
*/
DLL_EXPORT int DlogReportFinalize();
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // D_PLOG_H_

+ 138
- 0
third_party/fwkacllib/inc/toolchain/slog.h View File

@@ -18,7 +18,9 @@
#define D_SYSLOG_H_

#ifdef __cplusplus
#ifndef LOG_CPP
extern "C" {
#endif
#endif // __cplusplus

#ifndef LINUX
@@ -105,6 +107,7 @@ extern "C" {
#define SECURITY_LOG_MASK (0x00100000)
#define RUN_LOG_MASK (0x01000000)
#define OPERATION_LOG_MASK (0x10000000)
#define RESERVERD_LENGTH 52

typedef struct tagDCODE {
const char *cName;
@@ -116,6 +119,18 @@ typedef struct tagKV {
char *value;
} KeyValue;

typedef enum {
APPLICATION = 0,
SYSTEM
} ProcessType;

typedef struct {
ProcessType type;
unsigned int pid;
unsigned int deviceId;
char reserved[RESERVERD_LENGTH];
} LogAttr;

/**
* @ingroup slog
*
@@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent);
*/
DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel);

/**
* @ingroup slog
* @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION
* @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID)
* @return: 0: SUCCEED, others: FAILED
*/
DLL_EXPORT int DlogSetAttr(LogAttr logAttr);

/**
* @ingroup slog
* @brief dlog_error: print error log
@@ -367,6 +390,121 @@ void DlogInner(int moduleId, int level, const char *fmt, ...);
void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);

#ifdef __cplusplus
#ifndef LOG_CPP
}
#endif // LOG_CPP
#endif // __cplusplus

#ifdef LOG_CPP
#ifdef __cplusplus
extern "C" {
#endif
/**
* @ingroup slog
* @brief DlogGetlevelForC: get module loglevel and enableEvent
*
* @param [in]moduleId: moudule id(see slog.h, eg: CCE), others: invalid
* @param [out]enableEvent: 1: enable; 0: disable
* @return: module level(0: debug, 1: info, 2: warning, 3: error, 4: null output)
*/
DLL_EXPORT int DlogGetlevelForC(int moduleId, int *enableEvent);

/**
* @ingroup slog
* @brief DlogSetlevelForC: set module loglevel and enableEvent
*
* @param [in]moduleId: moudule id(see slog.h, eg: CCE), -1: all modules, others: invalid
* @param [in]level: log level(0: debug, 1: info, 2: warning, 3: error, 4: null output)
* @param [in]enableEvent: 1: enable; 0: disable, others:invalid
* @return: 0: SUCCEED, others: FAILED
*/
DLL_EXPORT int DlogSetlevelForC(int moduleId, int level, int enableEvent);

/**
* @ingroup slog
* @brief CheckLogLevelForC: check module level enable or not
* users no need to call it because all dlog interface(include inner interface) has already called
*
* @param [in]moduleId: module id, eg: CCE
* @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG
* @return: 1:enable, 0:disable
*/
DLL_EXPORT int CheckLogLevelForC(int moduleId, int logLevel);

/**
* @ingroup slog
* @brief DlogSetAttrForC: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION
* @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID)
* @return: 0: SUCCEED, others: FAILED
*/
DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);

/**
* @ingroup slog
* @brief DlogForC: print log, need caller to specify level
* call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time
*
* @param [in]moduleId: module id, eg: CCE
* @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
* @param [in]fmt: log content
*/
#define DlogForC(moduleId, level, fmt, ...) \
do { \
if(CheckLogLevelForC(moduleId, level) == 1) { \
DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (0)

/**
* @ingroup slog
* @brief DlogSubForC: print log, need caller to specify level and submodule
* call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time
*
* @param [in]moduleId: module id, eg: CCE
* @param [in]submodule: eg: engine
* @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
* @param [in]fmt: log content
*/
#define DlogSubForC(moduleId, submodule, level, fmt, ...) \
do { \
if(CheckLogLevelForC(moduleId, level) == 1) { \
DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \
} \
} while (0)

/**
* @ingroup slog
* @brief DlogWithKVForC: print log, need caller to specify level and other paramters
* call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time
*
* @param [in]moduleId: module id, eg: CCE
* @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
* @param [in]pstKVArray: key-value array
* @param [in]kvNum: key-value element num in array
* @param [in]fmt: log content
*/
#define DlogWithKVForC(moduleId, level, pstKVArray, kvNum, fmt, ...) \
do { \
if(CheckLogLevelForC(moduleId, level) == 1) { \
DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (0)

/**
* @ingroup slog
* @brief DlogFlushForC: flush log buffer to file
*/
DLL_EXPORT void DlogFlushForC(void);

/**
* @ingroup slog
* @brief Internal log interface, other modules are not allowed to call this interface
*/
void DlogInnerForC(int moduleId, int level, const char *fmt, ...);
void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);

#ifdef __cplusplus
}
#endif
#endif // LOG_CPP
#endif // D_SYSLOG_H_

Loading…
Cancel
Save