Browse Source

Move host_cpu kernel to cann.

tags/v1.3.0
zhaozhixuan 3 years ago
parent
commit
52e8dd5c88
43 changed files with 309 additions and 2356 deletions
  1. +2
    -2
      build.sh
  2. +0
    -7
      ge/CMakeLists.txt
  3. +11
    -1
      ge/engine_manager/dnnengine_manager.cc
  4. +0
    -6
      ge/executor/CMakeLists.txt
  5. +4
    -0
      ge/ge_local_engine/engine/host_cpu_engine.cc
  6. +3
    -0
      ge/ge_local_engine/engine/host_cpu_engine.h
  7. +4
    -3
      ge/graph/manager/graph_manager.cc
  8. +0
    -8
      ge/graph/optimize/graph_optimize.cc
  9. +0
    -272
      ge/host_cpu_engine/CMakeLists.txt
  10. +0
    -30
      ge/host_cpu_engine/common/constant/constant.h
  11. +0
    -76
      ge/host_cpu_engine/engine/host_cpu_engine.cc
  12. +0
    -125
      ge/host_cpu_engine/engine/host_cpu_engine.h
  13. +0
    -161
      ge/host_cpu_engine/module.mk
  14. +0
    -114
      ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc
  15. +0
    -51
      ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h
  16. +0
    -67
      ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc
  17. +0
    -86
      ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h
  18. +0
    -40
      ge/host_cpu_engine/ops_kernel_store/op/host_op.cc
  19. +0
    -36
      ge/host_cpu_engine/ops_kernel_store/op/host_op.h
  20. +0
    -45
      ge/host_cpu_engine/ops_kernel_store/op/op.h
  21. +0
    -55
      ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc
  22. +0
    -94
      ge/host_cpu_engine/ops_kernel_store/op/op_factory.h
  23. +0
    -179
      ge/host_cpu_engine/proto/task.proto
  24. +13
    -52
      ge/hybrid/model/hybrid_model_builder.cc
  25. +1
    -1
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.h
  26. +117
    -76
      ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
  27. +15
    -22
      ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.h
  28. +0
    -60
      ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc
  29. +0
    -42
      ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h
  30. +0
    -42
      ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc
  31. +0
    -42
      ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h
  32. +0
    -43
      ge/hybrid/node_executor/host_cpu/kernel/kernel.h
  33. +0
    -34
      ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc
  34. +0
    -42
      ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h
  35. +0
    -157
      ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc
  36. +0
    -48
      ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h
  37. +0
    -42
      ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc
  38. +0
    -42
      ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h
  39. +0
    -58
      ge/hybrid/node_executor/host_cpu/kernel_factory.cc
  40. +0
    -88
      ge/hybrid/node_executor/host_cpu/kernel_factory.h
  41. +1
    -1
      ge/opskernel_manager/optimizer_priority.pbtxt
  42. +1
    -6
      tests/ut/ge/CMakeLists.txt
  43. +137
    -0
      tests/ut/ge/hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc

+ 2
- 2
build.sh View File

@@ -166,7 +166,7 @@ build_graphengine()
echo "execute command: cmake ${CMAKE_ARGS} .. failed." echo "execute command: cmake ${CMAKE_ARGS} .. failed."
return 1 return 1
fi fi
COMMON_TARGET="ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt "
COMMON_TARGET="ge_local_engine ge_local_opskernel_builder ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt "
TARGET=${COMMON_TARGET} TARGET=${COMMON_TARGET}
if [ "x${PLATFORM}" = "xtrain" ] if [ "x${PLATFORM}" = "xtrain" ]
then then
@@ -253,7 +253,7 @@ generate_package()
ACL_LIB=("libge_common.so" "libgraph.so" "libregister.so" "liberror_manager.so" "libge_executor.so") ACL_LIB=("libge_common.so" "libgraph.so" "libregister.so" "liberror_manager.so" "libge_executor.so")
ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so" "liberror_manager.so") ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so" "liberror_manager.so")
FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so" "liberror_manager.so") FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so" "liberror_manager.so")
PLUGIN_OPSKERNEL=("libge_local_engine.so" "libge_local_opskernel_builder.so" "libhost_cpu_engine.so" "libhost_cpu_opskernel_builder.so" "optimizer_priority.pbtxt")
PLUGIN_OPSKERNEL=("libge_local_engine.so" "libge_local_opskernel_builder.so" "optimizer_priority.pbtxt")
PARSER_LIB=("lib_caffe_parser.so" "libfmk_onnx_parser.so" "libfmk_parser.so" "libparser_common.so") PARSER_LIB=("lib_caffe_parser.so" "libfmk_onnx_parser.so" "libfmk_parser.so" "libparser_common.so")


rm -rf ${OUTPUT_PATH:?}/${FWK_PATH}/ rm -rf ${OUTPUT_PATH:?}/${FWK_PATH}/


+ 0
- 7
ge/CMakeLists.txt View File

@@ -2,7 +2,6 @@ if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
add_subdirectory(common) add_subdirectory(common)
add_subdirectory(plugin/engine) add_subdirectory(plugin/engine)
add_subdirectory(ge_local_engine) add_subdirectory(ge_local_engine)
add_subdirectory(host_cpu_engine)
add_subdirectory(executor) add_subdirectory(executor)
add_subdirectory(offline) add_subdirectory(offline)
elseif (ENABLE_D) elseif (ENABLE_D)
@@ -408,12 +407,6 @@ set(TRAIN_SRC_LIST
"hybrid/node_executor/compiledsubgraph/known_node_executor.cc" "hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
"hybrid/node_executor/ge_local/ge_local_node_executor.cc" "hybrid/node_executor/ge_local/ge_local_node_executor.cc"
"hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" "hybrid/node_executor/host_cpu/host_cpu_node_executor.cc"
"hybrid/node_executor/host_cpu/kernel_factory.cc"
"hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc"
"hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
"hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
"hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
"hybrid/node_executor/host_cpu/kernel/data_kernel.cc"
"hybrid/node_executor/controlop/control_op_executor.cc" "hybrid/node_executor/controlop/control_op_executor.cc"
"hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
"hybrid/node_executor/hccl/hccl_node_executor.cc" "hybrid/node_executor/hccl/hccl_node_executor.cc"


+ 11
- 1
ge/engine_manager/dnnengine_manager.cc View File

@@ -31,6 +31,7 @@
#include "graph/utils/graph_utils.h" #include "graph/utils/graph_utils.h"
#include "graph/utils/node_utils.h" #include "graph/utils/node_utils.h"
#include "init/gelib.h" #include "init/gelib.h"
#include "framework/common/types.h"


namespace { namespace {
const char *const kSchedulerUnits = "schedule_units"; const char *const kSchedulerUnits = "schedule_units";
@@ -50,6 +51,15 @@ const char *const kHostCpuOpKernelLibName = "DNN_VM_HOST_CPU_OP_STORE";
} // namespace } // namespace


namespace ge { namespace ge {
namespace {
const std::set<std::string> kNotCpuOp = {DATA, CONSTANT, CONSTANTOP, VARIABLE, NETOUTPUT};

bool ExecOnHostCpu(const OpDescPtr &op_desc) {
bool is_host_cpu_op = (kNotCpuOp.find(op_desc->GetType()) == kNotCpuOp.end());
return ge::GetContext().GetHostExecFlag() && is_host_cpu_op;
}
} // namespace

DNNEngineManager::DNNEngineManager() : init_flag_(false) {} DNNEngineManager::DNNEngineManager() : init_flag_(false) {}
DNNEngineManager::~DNNEngineManager() { DNNEngineManager::~DNNEngineManager() {
engines_attrs_map_.clear(); engines_attrs_map_.clear();
@@ -206,7 +216,7 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) {
GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str()); GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str());
return ""; return "";
} }
GE_IF_BOOL_EXEC(ge::GetContext().GetHostExecFlag(), return GetHostCpuEngineName(op_infos, op_desc));
GE_IF_BOOL_EXEC(ExecOnHostCpu(op_desc), return GetHostCpuEngineName(op_infos, op_desc));
std::string ge_core_type; std::string ge_core_type;
Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type); Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type);
GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE")); GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE"));


+ 0
- 6
ge/executor/CMakeLists.txt View File

@@ -92,12 +92,6 @@ set(SRC_LIST
"../hybrid/node_executor/compiledsubgraph/known_node_executor.cc" "../hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
"../hybrid/node_executor/ge_local/ge_local_node_executor.cc" "../hybrid/node_executor/ge_local/ge_local_node_executor.cc"
"../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" "../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc"
"../hybrid/node_executor/host_cpu/kernel_factory.cc"
"../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/data_kernel.cc"
"../hybrid/node_executor/controlop/control_op_executor.cc" "../hybrid/node_executor/controlop/control_op_executor.cc"
"../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
"../hybrid/node_executor/rts/rts_node_executor.cc" "../hybrid/node_executor/rts/rts_node_executor.cc"


+ 4
- 0
ge/ge_local_engine/engine/host_cpu_engine.cc View File

@@ -57,6 +57,7 @@ namespace ge {
namespace { namespace {
const char *kEnvKeyOppPath = "ASCEND_OPP_PATH"; const char *kEnvKeyOppPath = "ASCEND_OPP_PATH";
const char *kHostCpuLibRelativePath = "/op_impl/built-in/host_cpu"; const char *kHostCpuLibRelativePath = "/op_impl/built-in/host_cpu";
const std::string kConstantFoldingName = "libconstant_folding_ops.so";
} }


Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) {
@@ -352,6 +353,9 @@ Status HostCpuEngine::LoadLib(const std::string &lib_path) {
} }


GELOGI("Lib: %s has been opened", lib_path.c_str()); GELOGI("Lib: %s has been opened", lib_path.c_str());
if (lib_path.find(kConstantFoldingName) != lib_path.npos) {
constant_folding_handle_ = handle;
}
lib_handles_.emplace_back(handle); lib_handles_.emplace_back(handle);
return SUCCESS; return SUCCESS;
} }


+ 3
- 0
ge/ge_local_engine/engine/host_cpu_engine.h View File

@@ -54,6 +54,8 @@ class GE_FUNC_VISIBILITY HostCpuEngine {


ge::Status Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, std::vector<GeTensorPtr> &outputs); ge::Status Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, std::vector<GeTensorPtr> &outputs);


void *GetConstantFoldingHandle() const { return constant_folding_handle_; }

private: private:
HostCpuEngine() = default; HostCpuEngine() = default;


@@ -85,6 +87,7 @@ class GE_FUNC_VISIBILITY HostCpuEngine {


std::mutex mu_; std::mutex mu_;
std::vector<void *> lib_handles_; std::vector<void *> lib_handles_;
void *constant_folding_handle_ = nullptr;
bool initialized_ = false; bool initialized_ = false;
}; };
} // namespace ge } // namespace ge


+ 4
- 3
ge/graph/manager/graph_manager.cc View File

@@ -490,9 +490,10 @@ Status GraphManager::ModifyDataIndex(const Graph &graph, const std::map<std::str
auto iter = graph_option.find(OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE); auto iter = graph_option.find(OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE);
if (iter != graph_option.end() && !iter->second.empty()) { if (iter != graph_option.end() && !iter->second.empty()) {
// If data inputs shape range is set, user must set valid data index. // If data inputs shape range is set, user must set valid data index.
std::string failed_reason = "Data index must be set continuous from 0 when data shape range enabled!";
REPORT_INPUT_ERROR("E10003", std::vector<std::string>({"parameter", "value", "reason"}),
std::vector<std::string>({"--data_index", "-", failed_reason}));
std::string situation = "Data op index";
std::string reason = "Data index must be set continuous from 0 when data shape range enabled!";
REPORT_INPUT_ERROR("E19025", std::vector<std::string>({"situation", "reason"}),
std::vector<std::string>({situation, reason}));
GELOGE(GRAPH_PARAM_INVALID, "[COMP][AddGraph]Input data index is invalid when data shape range enabled."); GELOGE(GRAPH_PARAM_INVALID, "[COMP][AddGraph]Input data index is invalid when data shape range enabled.");
return GRAPH_PARAM_INVALID; return GRAPH_PARAM_INVALID;
} }


+ 0
- 8
ge/graph/optimize/graph_optimize.cc View File

@@ -140,10 +140,6 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std
} }


Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) {
if (GetContext().GetHostExecFlag()) {
// graph exec on host, no need OptimizeOriginalGraph
return SUCCESS;
}
if (compute_graph == nullptr) { if (compute_graph == nullptr) {
REPORT_INNER_ERROR("E19999", "Param compute_graph is nullptr, check invalid"); REPORT_INNER_ERROR("E19999", "Param compute_graph is nullptr, check invalid");
GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[Check][Param] compute_graph is nullptr."); GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[Check][Param] compute_graph is nullptr.");
@@ -186,10 +182,6 @@ Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) {


Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_graph) { Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_graph) {
GELOGD("OptimizeOriginalGraphJudgeInsert in"); GELOGD("OptimizeOriginalGraphJudgeInsert in");
if (GetContext().GetHostExecFlag()) {
// graph exec on host, no need OptimizeOriginalGraphJudgeInsert
return SUCCESS;
}


GE_CHECK_NOTNULL(compute_graph); GE_CHECK_NOTNULL(compute_graph);
Status ret = SUCCESS; Status ret = SUCCESS;


+ 0
- 272
ge/host_cpu_engine/CMakeLists.txt View File

@@ -1,272 +0,0 @@
set(SRC_LIST
"engine/host_cpu_engine.cc"
"ops_kernel_store/host_cpu_ops_kernel_info.cc"
"ops_kernel_store/op/op_factory.cc"
"ops_kernel_store/op/host_op.cc"
)

set(CPU_OPS_KERNEL_LIST
"ops_kernel_store/host_cpu_ops_kernel_builder.cc"
)

############ libhost_cpu_engine.so ############
add_library(host_cpu_engine SHARED ${SRC_LIST})

add_dependencies(host_cpu_engine
graphengine_protos
)

target_compile_options(host_cpu_engine PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(host_cpu_engine PRIVATE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(host_cpu_engine PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/graphengine_protos
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(host_cpu_engine PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(host_cpu_engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
ascend_protobuf
c_sec
graph
slog
-Wl,--as-needed
)

############ atcstub/libhost_cpu_engine.so ############
add_library(atc_host_cpu_engine SHARED ${SRC_LIST})

add_dependencies(atc_host_cpu_engine
graphengine_protos
)

target_compile_options(atc_host_cpu_engine PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(atc_host_cpu_engine PRIVATE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(atc_host_cpu_engine PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/graphengine_protos
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(atc_host_cpu_engine PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(atc_host_cpu_engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
ascend_protobuf
c_sec
graph
slog
-Wl,--as-needed
)

set_target_properties(atc_host_cpu_engine PROPERTIES
OUTPUT_NAME host_cpu_engine
LIBRARY_OUTPUT_DIRECTORY atclib
)

############ libhost_cpu_opskernel_builder.so ############
add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})

add_dependencies(host_cpu_opskernel_builder
graphengine_protos
)

target_compile_options(host_cpu_opskernel_builder PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(host_cpu_opskernel_builder PRIVATE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(host_cpu_opskernel_builder PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/graphengine_protos
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(host_cpu_opskernel_builder PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(host_cpu_opskernel_builder PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
ascend_protobuf
c_sec
slog
graph
register
-Wl,--as-needed
)

############ atclib/libhost_cpu_opskernel_builder.so ############
add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})

add_dependencies(atc_host_cpu_opskernel_builder
graphengine_protos
)

target_compile_options(atc_host_cpu_opskernel_builder PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE
google=ascend_private
FUNC_VISIBILITY
)

target_include_directories(atc_host_cpu_opskernel_builder PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/graphengine_protos
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_options(atc_host_cpu_opskernel_builder PRIVATE
-Wl,-Bsymbolic
)

target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
ascend_protobuf
c_sec
slog
graph
register
-Wl,--as-needed
)

set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES
OUTPUT_NAME host_cpu_opskernel_builder
LIBRARY_OUTPUT_DIRECTORY atclib
)

############ libhost_cpu_opskernel_builder.a ############
add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST})

add_dependencies(host_cpu_opskernel_builder_static
graphengine_protos
)

target_compile_options(host_cpu_opskernel_builder_static PRIVATE
-Werror
-fno-common
-fvisibility=hidden
)

target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE
google=ascend_private
LOG_CPP
FUNC_VISIBILITY
)

target_include_directories(host_cpu_opskernel_builder_static PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/graphengine_protos
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(host_cpu_opskernel_builder_static PRIVATE
$<BUILD_INTERFACE:intf_pub>
ascend_protobuf
c_sec
)

############ install ############
set(INSTALL_BASE_DIR "")
set(INSTALL_LIBRARY_DIR lib)

install(TARGETS host_cpu_engine host_cpu_opskernel_builder OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
)

install(TARGETS atc_host_cpu_engine atc_host_cpu_opskernel_builder OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}/atclib
)

+ 0
- 30
ge/host_cpu_engine/common/constant/constant.h View File

@@ -1,30 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HOST_CPU_ENGINE_COMMON_CONSTANT_CONSTANT_H_
#define GE_HOST_CPU_ENGINE_COMMON_CONSTANT_CONSTANT_H_

#include <string>

namespace ge {
namespace host_cpu {
// engine name
const char kHostCpuEngineName[] = "DNN_VM_HOST_CPU";
const char kHostCpuOpKernelLibName[] = "DNN_VM_HOST_CPU_OP_STORE";
} // namespace host_cpu
} // namespace ge

#endif // GE_HOST_CPU_ENGINE_COMMON_CONSTANT_CONSTANT_H_

+ 0
- 76
ge/host_cpu_engine/engine/host_cpu_engine.cc View File

@@ -1,76 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "host_cpu_engine/engine/host_cpu_engine.h"
#include <map>
#include <memory>
#include <string>
#include <securec.h>
#include "framework/common/debug/ge_log.h"
#include "common/ge/ge_util.h"
#include "host_cpu_engine/common/constant/constant.h"
#include "host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h"

namespace ge {
namespace host_cpu {
HostCpuEngine &HostCpuEngine::Instance() {
static HostCpuEngine instance;
return instance;
}

Status HostCpuEngine::Initialize(const std::map<string, string> &options) {
if (ops_kernel_store_ == nullptr) {
ops_kernel_store_ = MakeShared<HostCpuOpsKernelInfoStore>();
if (ops_kernel_store_ == nullptr) {
GELOGE(FAILED, "[Create][HostCpuEngine]Make HostCpuOpsKernelInfoStore failed.");
REPORT_INNER_ERROR("E19999", "HostCpuEngine::Initialize failed for new HostCpuEngine.");
return FAILED;
}
}
return SUCCESS;
}

void HostCpuEngine::GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map) {
if (ops_kernel_store_ != nullptr) {
// add buildin opsKernel to opsKernelInfoMap
ops_kernel_map[kHostCpuOpKernelLibName] = ops_kernel_store_;
}
}

void HostCpuEngine::GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &) {
// no optimizer for host cpu engine
}

Status HostCpuEngine::Finalize() {
ops_kernel_store_ = nullptr;
return SUCCESS;
}
} // namespace host_cpu
} // namespace ge

ge::Status Initialize(const std::map<string, string> &options) {
return ge::host_cpu::HostCpuEngine::Instance().Initialize(options);
}

void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map) {
ge::host_cpu::HostCpuEngine::Instance().GetOpsKernelInfoStores(ops_kernel_map);
}

void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers) {
ge::host_cpu::HostCpuEngine::Instance().GetGraphOptimizerObjs(graph_optimizers);
}

ge::Status Finalize() { return ge::host_cpu::HostCpuEngine::Instance().Finalize(); }

+ 0
- 125
ge/host_cpu_engine/engine/host_cpu_engine.h View File

@@ -1,125 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
#define GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <map>
#include <memory>
#include <string>
#include "common/opskernel/ops_kernel_info_store.h"
#include "common/optimizer/graph_optimizer.h"

using OpsKernelInfoStorePtr = std::shared_ptr<ge::OpsKernelInfoStore>;
using GraphOptimizerPtr = std::shared_ptr<ge::GraphOptimizer>;

namespace ge {
namespace host_cpu {
/**
* host cpu engine.
* Used for the ops which executes on host.
*/
class GE_FUNC_VISIBILITY HostCpuEngine {
public:
/**
* get HostCpuEngine instance.
* @return HostCpuEngine instance.
*/
static HostCpuEngine &Instance();

virtual ~HostCpuEngine() = default;

/**
* When Ge start, GE will invoke this interface
* @return The status whether initialize successfully
*/
Status Initialize(const std::map<string, string> &options);

/**
* After the initialize, GE will invoke this interface
* to get the Ops kernel Store.
* @param ops_kernel_map The host cpu's ops kernel info
*/
void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);

/**
* After the initialize, GE will invoke this interface
* to get the Graph Optimizer.
* @param graph_optimizers The host cpu's Graph Optimizer objs
*/
void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);

/**
* When the graph finished, GE will invoke this interface
* @return The status whether initialize successfully
*/
Status Finalize();

HostCpuEngine(const HostCpuEngine &HostCpuEngine) = delete;
HostCpuEngine(const HostCpuEngine &&HostCpuEngine) = delete;
HostCpuEngine &operator=(const HostCpuEngine &HostCpuEngine) = delete;
HostCpuEngine &operator=(HostCpuEngine &&HostCpuEngine) = delete;

private:
HostCpuEngine() = default;

OpsKernelInfoStorePtr ops_kernel_store_ = nullptr;
};
} // namespace host_cpu
} // namespace ge

extern "C" {

/**
* When Ge start, GE will invoke this interface
* @return The status whether initialize successfully
*/
GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options);

/**
* After the initialize, GE will invoke this interface to get the Ops kernel Store
* @param ops_kernel_map The host cpu's ops kernel info
*/
GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map);

/**
* After the initialize, GE will invoke this interface to get the Graph Optimizer
* @param graph_optimizers The host cpu's Graph Optimizer objs
*/
GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers);

/**
* When the graph finished, GE will invoke this interface
* @return The status whether initialize successfully
*/
GE_FUNC_VISIBILITY ge::Status Finalize();
}

#endif // GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_

+ 0
- 161
ge/host_cpu_engine/module.mk View File

@@ -1,161 +0,0 @@
LOCAL_PATH := $(call my-dir)


local_lib_src_files := engine/host_cpu_engine.cc \
ops_kernel_store/host_cpu_ops_kernel_info.cc \
ops_kernel_store/op/op_factory.cc \
ops_kernel_store/op/host_op.cc \

local_lib_inc_path := proto/task.proto \
${LOCAL_PATH} \
${TOPDIR}inc \
${TOPDIR}metadef/inc \
${TOPDIR}graphengine/inc \
${TOPDIR}inc/external \
${TOPDIR}metadef/inc/external \
${TOPDIR}graphengine/inc/external \
${TOPDIR}metadef/inc/external/graph \
$(TOPDIR)libc_sec/include \
${TOPDIR}third_party/protobuf/include \
${TOPDIR}graphengine/inc/framework \
$(TOPDIR)graphengine/ge \

#compiler for host
include $(CLEAR_VARS)
LOCAL_MODULE := libhost_cpu_engine
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libgraph \
libregister \
libruntime

LOCAL_SRC_FILES := $(local_lib_src_files)
LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_HOST_SHARED_LIBRARY}

#compiler for atc
include $(CLEAR_VARS)
LOCAL_MODULE := atclib/libhost_cpu_engine
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE -Dgoogle=ascend_private
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libgraph \
libregister \
libruntime_compile

LOCAL_SRC_FILES := $(local_lib_src_files)
LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_HOST_SHARED_LIBRARY}

#compiler for host ops kernel builder
include $(CLEAR_VARS)
LOCAL_MODULE := libhost_cpu_opskernel_builder
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libgraph \
libregister \

LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc

LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_HOST_SHARED_LIBRARY}

#compiler for device ops kernel builder
include $(CLEAR_VARS)
LOCAL_MODULE := libhost_cpu_opskernel_builder
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libgraph \
libregister \

LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc

LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_SHARED_LIBRARY}

#compiler for host static lib
include $(CLEAR_VARS)
LOCAL_MODULE := libhost_cpu_opskernel_builder
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES := libascend_protobuf \
libgraph \
libregister \

LOCAL_SHARED_LIBRARIES := libc_sec \
libslog \

LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc

LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_HOST_STATIC_LIBRARY}

#compiler for device static lib
include $(CLEAR_VARS)
LOCAL_MODULE := libhost_cpu_opskernel_builder
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES := libascend_protobuf \
libgraph \
libregister \

LOCAL_SHARED_LIBRARIES := libc_sec \
libslog \

LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc

LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_STATIC_LIBRARY}

#compiler for atc ops kernel builder
include $(CLEAR_VARS)
LOCAL_MODULE := atclib/libhost_cpu_opskernel_builder
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libgraph \
libregister \

LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc

LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_HOST_SHARED_LIBRARY}

+ 0
- 114
ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc View File

@@ -1,114 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "host_cpu_ops_kernel_builder.h"
#include <memory>
#include "common/ge_inner_error_codes.h"
#include "ge/ge_api_types.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include <securec.h>
#include "framework/common/debug/ge_log.h"
#include "host_cpu_engine/common/constant/constant.h"
#include "register/ops_kernel_builder_registry.h"

namespace ge {
namespace host_cpu {
REGISTER_OPS_KERNEL_BUILDER(kHostCpuOpKernelLibName, HostCpuOpsKernelBuilder);

Status HostCpuOpsKernelBuilder::Finalize() {
return SUCCESS;
}
Status HostCpuOpsKernelBuilder::Initialize(const map<std::string, std::string> &options) {
return SUCCESS;
}

Status HostCpuOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) {
OpDescPtr op_desc = ge_node.GetOpDesc();
if (op_desc == nullptr) {
GELOGE(FAILED, "[Get][OpDesc]CalcOpRunningParam failed, as op desc is null");
REPORT_INNER_ERROR("E19999", "GetOpDesc failed.");
return FAILED;
}

bool is_shape_unknown = false;
if (NodeUtils::GetNodeUnknownShapeStatus(ge_node, is_shape_unknown) == GRAPH_SUCCESS) {
if (is_shape_unknown) {
GELOGI("op:%s is unknown shape, does not need to calc output size.", ge_node.GetName().c_str());
return SUCCESS;
}
}

const string name = ge_node.GetName();
const string type = ge_node.GetType();
GELOGD("Calc op[%s:%s] running param, output size=%zu.", name.c_str(), type.c_str(), op_desc->GetOutputsSize());

for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) {
GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast<uint32_t>(i));
Format format = output_tensor.GetFormat();
DataType data_type = output_tensor.GetDataType();

int64_t mem_size = 0;
// If mem size has been set, no need reset.
if ((TensorUtils::GetSize(output_tensor, mem_size) == GRAPH_SUCCESS) && (mem_size > 0)) {
GELOGD("Op[%s:%s] out[%zu] mem size has been set, no need calc again, format=%s, data_type=%s, mem_size=%ld.",
name.c_str(), type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(),
TypeUtils::DataTypeToSerialString(data_type).c_str(), mem_size);
continue;
}

int64_t output_mem_size = 0;
GeShape output_shape = output_tensor.GetShape();
if ((TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size) != GRAPH_SUCCESS) ||
(output_mem_size < 0)) {
GELOGE(FAILED,
"[Calc][TensorMemSize] fail for op[%s:%s] out[%zu] mem size, mem_size=%ld, format=%s, data_type=%s.",
name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(),
TypeUtils::DataTypeToSerialString(data_type).c_str());
REPORT_CALL_ERROR("E19999",
"CalcTensorMemSize failed for op[%s:%s] out[%zu] mem size, mem_size=%ld, format=%s, data_type=%s.",
name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(),
TypeUtils::DataTypeToSerialString(data_type).c_str());
return FAILED;
}
GELOGI("Calc op[%s:%s] out[%zu] mem size is %ld, format=%s, data_type=%s.",
name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(),
TypeUtils::DataTypeToSerialString(data_type).c_str());

TensorUtils::SetSize(output_tensor, output_mem_size);
if (op_desc->UpdateOutputDesc(static_cast<uint32_t>(i), output_tensor) != GRAPH_SUCCESS) {
GELOGE(FAILED,
"[Update][OutputDesc] fail for op[%s:%s] out[%zu] desc , format=%s, data_type=%s.",
name.c_str(), type.c_str(), i,
TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str());
REPORT_CALL_ERROR("E19999", "UpdateOutputDesc failed for op[%s:%s] out[%zu] desc , format=%s, data_type=%s.",
name.c_str(), type.c_str(), i,
TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str());
return FAILED;
}
}

GELOGD("Calc op[%s:%s] running param success.", name.c_str(), type.c_str());
return SUCCESS;
}

Status HostCpuOpsKernelBuilder::GenerateTask(const Node &node, RunContext &context, vector<domi::TaskDef> &tasks) {
// no need to generate device task
return SUCCESS;
}
} // namespace host_cpu
} // namespace ge

+ 0
- 51
ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h View File

@@ -1,51 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_
#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include "common/opskernel/ops_kernel_builder.h"

namespace ge {
namespace host_cpu {
class GE_FUNC_VISIBILITY HostCpuOpsKernelBuilder : public OpsKernelBuilder {
public:
Status Initialize(const map<std::string, std::string> &options) override;

Status Finalize() override;

Status CalcOpRunningParam(Node &node) override;

Status GenerateTask(const Node &node, RunContext &context, std::vector<domi::TaskDef> &tasks) override;
};
} // namespace host_cpu
} // namespace ge

#endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_

+ 0
- 67
ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc View File

@@ -1,67 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h"
#include <memory>
#include "common/constant/constant.h"
#include "ge/ge_api_types.h"
#include "framework/common/debug/ge_log.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "op/op_factory.h"

namespace ge {
namespace host_cpu {
using domi::TaskDef;
using std::map;
using std::string;
using std::vector;

Status HostCpuOpsKernelInfoStore::Initialize(const map<string, string> &options) {
GELOGI("HostCpuOpsKernelInfoStore init start.");
OpInfo default_op_info = {.engine = kHostCpuEngineName,
.opKernelLib = kHostCpuOpKernelLibName,
.computeCost = 0,
.flagPartial = false,
.flagAsync = false,
.isAtomic = false};
// Init op_info_map_
auto all_ops = OpFactory::Instance().GetAllOps();
for (auto &op : all_ops) {
op_info_map_[op] = default_op_info;
}

GELOGI("HostCpuOpsKernelInfoStore inited success. op num=%zu", op_info_map_.size());

return SUCCESS;
}

Status HostCpuOpsKernelInfoStore::Finalize() {
op_info_map_.clear();
return SUCCESS;
}

void HostCpuOpsKernelInfoStore::GetAllOpsKernelInfo(map<string, OpInfo> &infos) const { infos = op_info_map_; }

bool HostCpuOpsKernelInfoStore::CheckSupported(const OpDescPtr &op_desc, std::string &) const {
if (op_desc == nullptr) {
return false;
}
return op_info_map_.count(op_desc->GetType()) > 0;
}
} // namespace host_cpu
} // namespace ge

+ 0
- 86
ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h View File

@@ -1,86 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_
#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <map>
#include <string>
#include <vector>

#include "common/opskernel/ops_kernel_info_store.h"

namespace ge {
namespace host_cpu {
class GE_FUNC_VISIBILITY HostCpuOpsKernelInfoStore : public OpsKernelInfoStore {
public:
HostCpuOpsKernelInfoStore() {}
~HostCpuOpsKernelInfoStore() override = default;

/**
* Initialize related resources of the host cpu kernelinfo store
* @return status whether this operation success
*/
Status Initialize(const std::map<std::string, std::string> &options) override;

/**
* Release related resources of the host cpu kernel info store
* @return status whether this operation success
*/
Status Finalize() override;

/**
* Check to see if an operator is fully supported or partially supported.
* @param op_desc OpDesc information
* @param reason unsupported reason
* @return bool value indicate whether the operator is fully supported
*/
bool CheckSupported(const OpDescPtr &op_desc, std::string &reason) const override;

/**
* Returns the full operator information.
* @param infos reference of a map,
* contain operator's name and detailed information
*/
void GetAllOpsKernelInfo(std::map<std::string, ge::OpInfo> &infos) const override;

HostCpuOpsKernelInfoStore(const HostCpuOpsKernelInfoStore &ops_kernel_store) = delete;
HostCpuOpsKernelInfoStore(const HostCpuOpsKernelInfoStore &&ops_kernel_store) = delete;
HostCpuOpsKernelInfoStore &operator=(const HostCpuOpsKernelInfoStore &ops_kernel_store) = delete;
HostCpuOpsKernelInfoStore &operator=(HostCpuOpsKernelInfoStore &&ops_kernel_store) = delete;

private:
// store op name and OpInfo key-value pair
std::map<std::string, ge::OpInfo> op_info_map_;
};
} // namespace host_cpu
} // namespace ge

#endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_

+ 0
- 40
ge/host_cpu_engine/ops_kernel_store/op/host_op.cc View File

@@ -1,40 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "host_cpu_engine/ops_kernel_store/op/host_op.h"
#include "framework/common/util.h"
#include "host_cpu_engine/ops_kernel_store/op/op_factory.h"

namespace ge {
namespace host_cpu {
Status HostOp::Run() {
// no need to generate device task
return SUCCESS;
}

REGISTER_OP_CREATOR(NoOp, HostOp);
REGISTER_OP_CREATOR(Variable, HostOp);
REGISTER_OP_CREATOR(Constant, HostOp);
REGISTER_OP_CREATOR(Assign, HostOp);
REGISTER_OP_CREATOR(RandomUniform, HostOp);
REGISTER_OP_CREATOR(Add, HostOp);
REGISTER_OP_CREATOR(Mul, HostOp);
REGISTER_OP_CREATOR(ConcatV2, HostOp);
REGISTER_OP_CREATOR(Data, HostOp);
REGISTER_OP_CREATOR(Fill, HostOp);
REGISTER_OP_CREATOR(NetOutput, HostOp);
} // namespace host_cpu
} // namespace ge

+ 0
- 36
ge/host_cpu_engine/ops_kernel_store/op/host_op.h View File

@@ -1,36 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_
#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_

#include "host_cpu_engine/ops_kernel_store/op/op.h"

namespace ge {
namespace host_cpu {
class GE_FUNC_VISIBILITY HostOp : public Op {
public:
HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {}
~HostOp() override = default;
HostOp &operator=(const HostOp &op) = delete;
HostOp(const HostOp &op) = delete;

Status Run() override;
};
} // namespace host_cpu
} // namespace ge

#endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_

+ 0
- 45
ge/host_cpu_engine/ops_kernel_store/op/op.h View File

@@ -1,45 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_H_
#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_H_

#include <climits>
#include <string>
#include <vector>
#include "common/ge_inner_error_codes.h"
#include "common/opskernel/ops_kernel_info_types.h"
#include "graph/node.h"

namespace ge {
namespace host_cpu {
/**
* The base class for all op.
*/
class GE_FUNC_VISIBILITY Op {
public:
Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {}
virtual ~Op() = default;
virtual Status Run() = 0;

protected:
const RunContext &run_context_;
const Node &node_;
};
} // namespace host_cpu
} // namespace ge

#endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_H_

+ 0
- 55
ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc View File

@@ -1,55 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "host_cpu_engine/ops_kernel_store/op/op_factory.h"
#include "framework/common/debug/ge_log.h"
#include "common/ge_inner_error_codes.h"
#include "graph/op_desc.h"

namespace ge {
namespace host_cpu {
OpFactory &OpFactory::Instance() {
static OpFactory instance;
return instance;
}

std::shared_ptr<Op> OpFactory::CreateOp(const Node &node, RunContext &run_context) {
auto iter = op_creator_map_.find(node.GetType());
if (iter != op_creator_map_.end()) {
return iter->second(node, run_context);
}

GELOGE(FAILED, "Not supported OP, type = %s, name = %s", node.GetType().c_str(), node.GetName().c_str());
return nullptr;
}

void OpFactory::RegisterCreator(const std::string &type, const OP_CREATOR_FUNC &func) {
if (func == nullptr) {
GELOGW("Func is NULL.");
return;
}

auto iter = op_creator_map_.find(type);
if (iter != op_creator_map_.end()) {
GELOGW("%s creator already exist", type.c_str());
return;
}

op_creator_map_[type] = func;
all_ops_.emplace_back(type);
}
} // namespace host_cpu
} // namespace ge

+ 0
- 94
ge/host_cpu_engine/ops_kernel_store/op/op_factory.h View File

@@ -1,94 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_FACTORY_H_
#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_FACTORY_H_

#include <functional>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "common/ge/ge_util.h"
#include "host_cpu_engine/ops_kernel_store/op/op.h"

namespace ge {
namespace host_cpu {
using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunContext &)>;

/**
* manage all the op, support create op.
*/
class GE_FUNC_VISIBILITY OpFactory {
public:
static OpFactory &Instance();

/**
* @brief create Op.
* @param [in] node share ptr of node
* @param [in] run_context run context
* @return not nullptr success
* @return nullptr fail
*/
std::shared_ptr<Op> CreateOp(const Node &node, RunContext &run_context);

/**
* @brief Register Op create function.
* @param [in] type Op type
* @param [in] func Op create func
*/
void RegisterCreator(const std::string &type, const OP_CREATOR_FUNC &func);

const std::vector<std::string> &GetAllOps() const { return all_ops_; }

bool CheckSupported(const std::string &type) { return op_creator_map_.find(type) != op_creator_map_.end(); }

OpFactory(const OpFactory &) = delete;
OpFactory &operator=(const OpFactory &) = delete;
OpFactory(OpFactory &&) = delete;
OpFactory &operator=(OpFactory &&) = delete;

private:
OpFactory() = default;
~OpFactory() = default;

// the op creator function map
std::map<std::string, OP_CREATOR_FUNC> op_creator_map_;
std::vector<std::string> all_ops_;
};

class GE_FUNC_VISIBILITY OpRegistrar {
public:
OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) {
OpFactory::Instance().RegisterCreator(type, func);
}
~OpRegistrar() = default;

OpRegistrar(const OpRegistrar &) = delete;
OpRegistrar &operator=(const OpRegistrar &) = delete;
OpRegistrar(OpRegistrar &&) = delete;
OpRegistrar &operator=(OpRegistrar &&) = delete;
};

#define REGISTER_OP_CREATOR(type, clazz) \
std::shared_ptr<Op> Creator_##type##Op(const Node &node, RunContext &run_context) { \
return MakeShared<clazz>(node, run_context); \
} \
OpRegistrar g_##type##Op_creator(#type, Creator_##type##Op)
} // namespace host_cpu
} // namespace ge

#endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_FACTORY_H_

+ 0
- 179
ge/host_cpu_engine/proto/task.proto View File

@@ -1,179 +0,0 @@
/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Apache License for more details at
* http://www.apache.org/licenses/LICENSE-2.0
*/
syntax = "proto3";

package domi;

message ModelTaskDef {
string version = 1;

map<string, string> attr = 9; // Extended field
repeated TaskDef task = 10;

uint64 memory_size = 11;
uint32 stream_num = 12;
uint32 event_num = 13;
uint64 weight_size = 14;

repeated bytes op = 15; // input/output opdef in bytes

uint64 base_addr = 16; // base addr
uint64 weight_addr = 17; // weight addr
uint32 batch_num = 18;
}


message TaskDef {
uint32 id = 1;
uint32 type = 2;

uint32 stream_id = 10;
uint32 event_id = 11;

KernelDef kernel = 20;
KernelExDef kernel_ex = 21;
KernelHcclDef kernel_hccl = 25;
EventExDef event_ex = 26;
LogTimeStampDef log_timestamp = 28;

uint32 label_id = 30;

MemcpyAsyncDef memcpy_async = 31;
StreamSwitchDef stream_switch = 32;
StreamActiveDef stream_active = 33;
bytes private_def = 34;
uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future
StreamSwitchNDef stream_switch_n = 36;

LabelSetDef label_set = 37;
LabelGotoExDef label_goto_ex = 38;
LabelSwitchByIndexDef label_switch_by_index = 39;
KernelDefWithHandle kernel_with_handle = 40;
}

message KernelDef {
KernelContext context = 1;

string stub_func = 10;
uint32 block_dim = 11;
uint32 args_size = 12;
bytes args = 13;
bytes sm_desc = 14;
bytes flowtable = 15;
string so_name = 16;
string kernel_name = 17;
bytes kernel_ext_info = 18;
uint32 kernel_ext_info_size = 19;
}

message KernelDefWithHandle {
KernelContext context = 1;

uint64 handle = 10;
string dev_func = 11;
uint32 block_dim = 12;
uint32 args_size = 13;
bytes args = 14;
bytes sm_desc = 15;
string original_kernel_key = 16;
string node_info = 17;
}

message KernelContext {
uint32 kernel_type = 1;
uint32 op_id = 2; // OP type in CCE
uint32 kernel_func_id = 3;
uint32 op_index = 4; // TE/Custom operator
bool is_flowtable = 5; // Identify whether args is a flowtable structure
bytes args_offset = 6; // args offset information
uint32 args_count = 7; // args count
repeated uint32 origin_op_index = 8;
}


message KernelExDef {
uint32 flags = 1;

uint32 op_index = 4;
uint32 args_size = 12;
bytes args = 13;
bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput
uint32 task_info_size = 15;
bytes kernel_ext_info = 16;
uint32 kernel_ext_info_size = 17;
}


message KernelHcclDef {
uint32 op_index = 8;
string hccl_type = 9;
}


message EventExDef {
uint32 op_index = 1;
uint32 event_type = 2;
}

message LogTimeStampDef {
uint64 logid = 1;
bool notify = 2;
uint32 flat = 3;
}

message MemcpyAsyncDef {
uint64 dst = 1;
uint64 dst_max = 2;
uint64 src = 3;
uint64 count = 4;
uint32 kind = 5;
uint32 op_index = 6;
}

message StreamSwitchDef {
uint32 op_index = 1;
uint32 true_stream_id = 2;
int64 value = 3;
uint64 value_ptr = 4;
uint32 data_type = 5;
}

message StreamActiveDef {
uint32 op_index = 1;
uint32 active_stream_id = 2;
}

message StreamSwitchNDef {
uint32 op_index = 1;
uint32 size = 2;
repeated int64 target_value = 3;
repeated uint32 true_stream_id = 4;
uint32 element_size = 5;
uint32 data_type = 6;
}

message LabelSetDef {
uint32 op_index = 1;
uint32 label_id = 2;
uint32 model_id = 3;
}

message LabelGotoExDef {
uint32 op_index = 1;
uint32 label_id = 2;
uint32 model_id = 3;
}

message LabelSwitchByIndexDef {
uint32 op_index = 1;
uint32 label_max = 2;
}

+ 13
- 52
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -1278,7 +1278,8 @@ Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const
} }


Status HybridModelBuilder::IndexTaskDefs() { Status HybridModelBuilder::IndexTaskDefs() {
const auto &root_graph = ge_root_model_->GetRootGraph();
const auto root_graph = ge_root_model_->GetRootGraph();
const auto &root_graph_name = root_graph->GetName();
if (SetOutputNameAttr(*root_graph) != SUCCESS) { if (SetOutputNameAttr(*root_graph) != SUCCESS) {
GELOGW("Set output name attr failed."); GELOGW("Set output name attr failed.");
} }
@@ -1288,62 +1289,22 @@ Status HybridModelBuilder::IndexTaskDefs() {
auto &ge_model = it.second; auto &ge_model = it.second;
GE_CHECK_NOTNULL(ge_model); GE_CHECK_NOTNULL(ge_model);


const auto &sub_graph = root_graph->GetSubgraph(name);
if (sub_graph == nullptr) {
continue;
}

bool is_unknown_shape = sub_graph->GetGraphUnknownFlag();
if (!is_unknown_shape) {
GE_CHK_STATUS_RET_NOLOG(LoadGeModel(*sub_graph, ge_model));
continue;
}

// index task defs
GELOGD("To index tasks for subgraph: %s", name.c_str());
std::unordered_map<int64_t, NodePtr> node_map;
for (const auto &node : sub_graph->GetDirectNode()) {
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(node->GetOpDesc());
auto node_id = node->GetOpDesc()->GetId();
GELOGD("op_index = %ld, node_name = %s", node_id, node->GetName().c_str());
node_map.emplace(node_id, node);
}

auto tasks = ge_model->GetModelTaskDefPtr()->task();
for (int i = 0; i < tasks.size(); ++i) {
const domi::TaskDef &task_def = tasks[i];
GELOGI("Task id = %d, task type = %d", i, task_def.type());
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
uint32_t op_index = -1;
if (task_type == RT_MODEL_TASK_KERNEL) {
op_index = task_def.kernel().context().op_index();
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
op_index = task_def.kernel_ex().op_index();
} else if (task_type == RT_MODEL_TASK_HCCL) {
op_index = task_def.kernel_hccl().op_index();
} else if (task_type == RT_MODEL_TASK_ALL_KERNEL) {
op_index = task_def.kernel_with_handle().context().op_index();
} else {
GELOGD("Skip task type: %d", static_cast<int>(task_type));
auto sub_graph = root_graph->GetSubgraph(name);
if (name != root_graph_name) {
if (sub_graph == nullptr) {
continue; continue;
} }


auto iter = node_map.find(op_index);
if (iter == node_map.end()) {
GELOGE(INTERNAL_ERROR, "[Find][Node]Failed to get node by index = %u.", op_index);
REPORT_INNER_ERROR("E19999", "Failed to get node by index = %u.", op_index);
return INTERNAL_ERROR;
}

auto &node = iter->second;
if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc());
bool is_unknown_shape = sub_graph->GetGraphUnknownFlag();
if (!is_unknown_shape) {
GE_CHK_STATUS_RET_NOLOG(LoadGeModel(*sub_graph, ge_model));
continue;
} }

GELOGD("Task loaded for node: %s, task type = %d, op_index = %u", node->GetName().c_str(), task_type, op_index);
hybrid_model_.task_defs_[node].emplace_back(task_def);
} else {
sub_graph = root_graph;
} }

GE_CHK_STATUS_RET_NOLOG(IndexTaskDefs(sub_graph, ge_model));
} }


return SUCCESS; return SUCCESS;


+ 1
- 1
ge/hybrid/node_executor/aicpu/aicpu_node_executor.h View File

@@ -166,7 +166,7 @@ class AicpuNodeTask : public AicpuNodeTaskBase {


Status UpdateIoAddr(TaskContext &context) override; Status UpdateIoAddr(TaskContext &context) override;


private:
protected:
// host mem // host mem
std::unique_ptr<uint8_t[]> args_; std::unique_ptr<uint8_t[]> args_;




+ 117
- 76
ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc View File

@@ -15,60 +15,38 @@
*/ */


#include "hybrid/node_executor/host_cpu/host_cpu_node_executor.h" #include "hybrid/node_executor/host_cpu/host_cpu_node_executor.h"
#include "hybrid/node_executor/host_cpu/kernel_factory.h"
#include "graph/passes/folding_pass.h" #include "graph/passes/folding_pass.h"
#include "hybrid/model/hybrid_model.h" #include "hybrid/model/hybrid_model.h"
#include "graph/manager/graph_mem_manager.h" #include "graph/manager/graph_mem_manager.h"
#include "ge_local_engine/engine/host_cpu_engine.h" #include "ge_local_engine/engine/host_cpu_engine.h"
#include "aicpu/common/aicpu_task_struct.h"


namespace ge { namespace ge {
namespace hybrid { namespace hybrid {
REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::HOST_CPU, HostCpuNodeExecutor); REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::HOST_CPU, HostCpuNodeExecutor);


Status HostNodeTaskBase::UpdateArgs(TaskContext &) {
// no need update args
return SUCCESS;
}

Status HostNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) {
GELOGD("[%s] Start execute.", context.GetNodeName());
GE_CHK_STATUS_RET(Execute(context), "[Invoke][Execute] failed for node:%s type:%s.",
node_->GetName().c_str(), node_->GetType().c_str())
if (done_callback) {
GELOGD("[%s] Start invoke callback.", context.GetNodeName());
done_callback();
Status HostAicpuNodeTask::UpdateArgs(TaskContext &context) {
if (context.NumInputs() == 0 && context.NumOutputs() == 0) {
GELOGD("Node[%s] has no input and output, no need to update args.", node_name_.c_str());
return SUCCESS;
} }
GELOGD("[%s] Done execute successfully.", context.GetNodeName());
return SUCCESS;
}


Status CpuKernelNodeTask::Execute(TaskContext &context) {
const auto &op_desc = node_->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);

std::vector<ConstGeTensorPtr> inputs;
vector<uint64_t> io_addrs;
io_addrs.reserve(context.NumInputs() + context.NumOutputs());
for (int32_t i = 0; i < context.NumInputs(); ++i) { for (int32_t i = 0; i < context.NumInputs(); ++i) {
auto input_desc_ptr = context.GetInputDesc(i);
GE_CHECK_NOTNULL(input_desc_ptr);
const auto &input_desc = *input_desc_ptr;
auto tensor = context.GetInput(i); auto tensor = context.GetInput(i);
GE_CHECK_NOTNULL(tensor); GE_CHECK_NOTNULL(tensor);
auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData());
GE_CHECK_NOTNULL(item.second); GE_CHECK_NOTNULL(item.second);
auto in_tensor = MakeShared<GeTensor>(input_desc, item.second, item.first);
GE_CHECK_NOTNULL(in_tensor);
in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType());
in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape());
inputs.emplace_back(in_tensor);
GELOGD("node:%s allocate input %d, size=%zu", op_desc->GetName().c_str(), i, in_tensor->GetData().size());
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(item.second->MutableGet()));
} }


std::vector<GeTensorPtr> outputs;
for (int32_t i = 0; i < context.NumOutputs(); ++i) { for (int32_t i = 0; i < context.NumOutputs(); ++i) {
const auto &output_desc = op_desc->GetOutputDesc(i);
const auto &output_desc = context.GetOutputDesc(i);
GE_CHECK_NOTNULL(output_desc);
AllocationAttr attr; AllocationAttr attr;
attr.SetMemType(HOST_DDR); attr.SetMemType(HOST_DDR);
if (context.AllocateOutput(i, output_desc, nullptr, &attr) != SUCCESS) {
if (context.AllocateOutput(i, *output_desc, nullptr, &attr) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "node:%s Failed to allocate output %d", context.GetNodeName(), i); REPORT_CALL_ERROR("E19999", "node:%s Failed to allocate output %d", context.GetNodeName(), i);
GELOGE(FAILED, "[Invoke][AllocateOutput]node:%s Failed to allocate output %d", context.GetNodeName(), i); GELOGE(FAILED, "[Invoke][AllocateOutput]node:%s Failed to allocate output %d", context.GetNodeName(), i);
return FAILED; return FAILED;
@@ -77,37 +55,61 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
GE_CHECK_NOTNULL(tensor); GE_CHECK_NOTNULL(tensor);
auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData());
GE_CHECK_NOTNULL(item.second); GE_CHECK_NOTNULL(item.second);
auto out_tensor = MakeShared<GeTensor>(output_desc, item.second, item.first);
GE_CHECK_NOTNULL(out_tensor);
out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType());
out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape());
outputs.emplace_back(out_tensor);
GELOGD("node:%s allocate output %d, size=%zu", op_desc->GetName().c_str(), i, out_tensor->GetData().size());
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(item.second->MutableGet()));
} }
auto io_addr = args_.get() + sizeof(aicpu::AicpuParamHead);


return HostCpuEngine::GetInstance().Run(node_, inputs, outputs);
// if has input and output, need copy to ioaddr
int cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead),
&io_addrs[0], sizeof(uint64_t) * io_addrs.size());
if (cpy_ret != EOK) {
REPORT_INNER_ERROR("E19999", "Node[%s] memcpy io addr to AicpuParamHead failed,"
"ret=%d, args_size=%u, io nums=%zu.",
node_name_.c_str(), cpy_ret, args_size_, io_addrs.size());
GELOGE(INTERNAL_ERROR, "[Update][io_addr]Node[%s] memcpy io addr to AicpuParamHead failed,"
"ret=%d, args_size=%u, io nums=%zu.",
node_name_.c_str(), cpy_ret, args_size_, io_addrs.size());
return INTERNAL_ERROR;
}
return SUCCESS;
} }


Status HostCpuNodeTask::Execute(TaskContext &context) {
RunContext run_context;
auto host_kernel = hybrid::host_cpu::KernelFactory::Instance().CreateKernel(node_);
if (host_kernel == nullptr) {
REPORT_CALL_ERROR("E19999", "CreateKernel failed for node %s type %s is not supported by host kernel.",
node_->GetName().c_str(), node_->GetType().c_str());
GELOGE(UNSUPPORTED, "[Create][Kernel]node %s type %s is not supported by host kernel.",
node_->GetName().c_str(), node_->GetType().c_str());
return UNSUPPORTED;
Status HostAicpuNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) {
GELOGD("[%s] Start execute.", context.GetNodeName());
GE_CHK_STATUS_RET(Execute(context), "[Invoke][Execute] failed for node:%s.", node_name_.c_str());
if (done_callback) {
GELOGD("[%s] Start invoke callback.", context.GetNodeName());
done_callback();
} }
GELOGD("[%s] Done execute successfully.", context.GetNodeName());
return SUCCESS;
}


Status compute_ret = host_kernel->Compute(context);
if (compute_ret != SUCCESS) {
REPORT_CALL_ERROR("E19999", "node %s type %s compute failed.",
node_->GetName().c_str(), node_->GetType().c_str());
GELOGE(compute_ret, "[Invoke][Compute]node %s type %s compute failed or not imply.",
node_->GetName().c_str(), node_->GetType().c_str());
return compute_ret;
Status HostAicpuNodeTask::Execute(TaskContext &context) {
GELOGD("Node[%s] launch task start.", node_name_.c_str());
if (run_cpu_kernel_) {
GE_CHK_STATUS_RET(run_cpu_kernel_(args_.get()), "[Run][CpuKernel] failed for node:%s.", node_name_.c_str());
} else {
REPORT_CALL_ERROR("E19999", "Run cpu kernel failed node:%s, cpu kernel is not initialized.", node_name_.c_str());
GELOGE(INTERNAL_ERROR,
"[Run][Kernel]Run cpu kernel failed node:%s, cpu kernel is not initialized.",node_name_.c_str());
return INTERNAL_ERROR;
} }


GELOGD("Node[%s] launch task successfully.", node_name_.c_str());
return SUCCESS;
}

Status HostAicpuNodeTask::SetHostExtInfo() {
if (aicpu_ext_handle_.GetExtInfoLen() == 0) {
GELOGD("Node[%s] don't have ext info, no need update.", node_name_.c_str());
return SUCCESS;
}

auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_.get());
GE_CHECK_NOTNULL(aicpu_param_head);
aicpu_param_head->extInfoLength = aicpu_ext_handle_.GetExtInfoLen();
aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_handle_.GetExtInfo());
return SUCCESS; return SUCCESS;
} }


@@ -115,32 +117,71 @@ Status HostCpuNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) co
return task.UpdateArgs(context); return task.UpdateArgs(context);
} }


Status HostCpuNodeExecutor::ValidateTaskDef(const domi::TaskDef &task_def) {
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (task_type != RT_MODEL_TASK_KERNEL) {
REPORT_CALL_ERROR("E19999", "[Check][TaskType]Invalid task type (%d) in host cpu excutor.",
static_cast<int>(task_type));
GELOGE(INTERNAL_ERROR,
"[Check][TaskType]Invalid task type (%d) in host cpu excutor.", static_cast<int>(task_type));
return INTERNAL_ERROR;
}
auto kernel_type = static_cast<ccKernelType>(task_def.kernel().context().kernel_type());
if (kernel_type != ccKernelType::AI_CPU) {
REPORT_INNER_ERROR("E19999", "Invalid kernel type(%d) in host cpu excutor.",
static_cast<int>(kernel_type));
GELOGE(INTERNAL_ERROR,
"[Check][TaskType]Invalid kernel type(%d) in host cpu excutor.", static_cast<int>(kernel_type));
return INTERNAL_ERROR;
}

return SUCCESS;
}

Status HostCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, Status HostCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node,
std::shared_ptr<NodeTask> &task) const { std::shared_ptr<NodeTask> &task) const {
GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node);
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
auto mem_type = static_cast<uint32_t>(HOST_DDR);
for (size_t i = 0; i < op_desc->GetOutputsSize(); i++) {
(void)AttrUtils::SetInt(op_desc->MutableOutputDesc(i), ATTR_OUTPUT_MEMORY_TYPE, mem_type);
auto node_item = model.GetNodeItem(node);
GE_CHECK_NOTNULL(node_item);
auto task_defs = model.GetTaskDefs(node);
GE_CHECK_NOTNULL(task_defs);

if ((*task_defs).size() != 1) {
REPORT_CALL_ERROR("E19999", "[Check][Size]Node[%s] task_def num[%zu] != 1",
node->GetName().c_str(), (*task_defs).size());
GELOGE(PARAM_INVALID, "[Check][Size]Node[%s] task_def num[%zu] != 1",
node->GetName().c_str(), (*task_defs).size());
return PARAM_INVALID;
} }
const std::string &name = node->GetName();
const std::string &type = node->GetType();
if (HostCpuEngine::GetInstance().CheckSupported(type)) {
GELOGI("create CpuKernelNodeTask for node %s, type %s.", name.c_str(), type.c_str());
task = MakeShared<CpuKernelNodeTask>(node);
GE_CHECK_NOTNULL(task);
} else if (hybrid::host_cpu::KernelFactory::Instance().CreateKernel(node) != nullptr) {
GELOGI("create HostCpuNodeTask for node %s, type %s.", name.c_str(), type.c_str());
task = MakeShared<HostCpuNodeTask>(node);
GE_CHECK_NOTNULL(task);
const auto &task_def = (*task_defs)[0];
GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
"[Validate][TaskDef] failed for Node[%s].", node->GetName().c_str());
auto host_aicpu_task = MakeShared<HostAicpuNodeTask>(node_item, task_def);
GE_CHK_BOOL_RET_STATUS(host_aicpu_task != nullptr, MEMALLOC_FAILED,
"[Check][State]Load task for node %s failed.", node->GetName().c_str());
GE_CHK_STATUS_RET(host_aicpu_task->Init(model),
"[Init][AicpuNodeTaskBase] failed for Node[%s].", node->GetName().c_str());
GE_CHK_STATUS_RET(host_aicpu_task->SetHostExtInfo(),
"[Set][HostExtInfo] failed for Node[%s].", node->GetName().c_str());

auto handle = HostCpuEngine::GetInstance().GetConstantFoldingHandle();
if (handle == nullptr) {
REPORT_CALL_ERROR("E19999", "Get constant folding handle failed.");
GELOGE(INTERNAL_ERROR, "[Get][Handle]Get constant folding handle failed.");
return INTERNAL_ERROR;
}
auto run_cpu_kernel = (uint32_t (*)(void *))mmDlsym(handle, "RunCpuKernel");
if (run_cpu_kernel != nullptr) {
host_aicpu_task->SetRunKernel(run_cpu_kernel);
} else { } else {
REPORT_INNER_ERROR("E19999", "Create NodeTask failed for node %s type %s.",
name.c_str(), type.c_str());
GELOGE(UNSUPPORTED, "[Create][NodeTask]node %s type %s is not support in HostCpuNodeExecutor now.",
name.c_str(), type.c_str());
return UNSUPPORTED;
REPORT_CALL_ERROR("E19999", "Get run cpu kernel failed.");
GELOGE(INTERNAL_ERROR, "[Get][Kernel]Get run cpu kernel failed.");
return INTERNAL_ERROR;
} }

task = std::move(host_aicpu_task);
GELOGD("Node[%s] load task end.", node->GetName().c_str());

return SUCCESS; return SUCCESS;
} }
} // namespace hybrid } // namespace hybrid


+ 15
- 22
ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.h View File

@@ -19,39 +19,29 @@


#include "hybrid/node_executor/node_executor.h" #include "hybrid/node_executor/node_executor.h"
#include "inc/kernel.h" #include "inc/kernel.h"
#include "hybrid/node_executor/aicpu/aicpu_node_executor.h"


namespace ge { namespace ge {
namespace hybrid { namespace hybrid {
class HostNodeTaskBase : public NodeTask {

class HostAicpuNodeTask : public AicpuNodeTask {
public: public:
explicit HostNodeTaskBase(const NodePtr &node) : node_(node) {}
~HostNodeTaskBase() override = default;
Status UpdateArgs(TaskContext &context) override;
HostAicpuNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def)
: AicpuNodeTask(node_item, task_def) {}
~HostAicpuNodeTask() override = default;

Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;


protected:
NodePtr node_;
Status UpdateArgs(TaskContext &context) override;


private:
virtual Status Execute(TaskContext &context) = 0;
};
void SetRunKernel(std::function<uint32_t(void *)> run_cpu_kernel) { run_cpu_kernel_ = run_cpu_kernel; }


class CpuKernelNodeTask : public HostNodeTaskBase {
public:
explicit CpuKernelNodeTask(const NodePtr &node) : HostNodeTaskBase(node) {}
~CpuKernelNodeTask() override = default;
Status SetHostExtInfo();


private: private:
Status Execute(TaskContext &context) override;
};

class HostCpuNodeTask : public HostNodeTaskBase {
public:
explicit HostCpuNodeTask(const NodePtr &node) : HostNodeTaskBase(node) {}
~HostCpuNodeTask() override = default;
Status Execute(TaskContext &context);


private:
Status Execute(TaskContext &context) override;
std::function<uint32_t(void *)> run_cpu_kernel_ = nullptr;
}; };


class HostCpuNodeExecutor : public NodeExecutor { class HostCpuNodeExecutor : public NodeExecutor {
@@ -61,6 +51,9 @@ class HostCpuNodeExecutor : public NodeExecutor {
Status LoadTask(const HybridModel &model, Status LoadTask(const HybridModel &model,
const NodePtr &node, const NodePtr &node,
std::shared_ptr<NodeTask> &task) const override; std::shared_ptr<NodeTask> &task) const override;

private:
static Status ValidateTaskDef(const domi::TaskDef &task_def);
}; };
} // namespace hybrid } // namespace hybrid
} // namespace ge } // namespace ge


+ 0
- 60
ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc View File

@@ -1,60 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "hybrid/node_executor/host_cpu/kernel/assign_kernel.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "hybrid/node_executor/host_cpu/kernel_factory.h"

namespace {
const size_t kAssignRefInputIndex = 0;
const size_t kAssignValueInputIndex = 1;
const size_t kAssignRefOutputIndex = 0;
}

namespace ge {
namespace hybrid {
namespace host_cpu {
Status AssignKernel::Compute(TaskContext& context) {
auto ref_tensor = context.MutableInput(kAssignRefInputIndex);
GE_CHECK_NOTNULL(ref_tensor);
const auto value_tensor = context.GetInput(kAssignValueInputIndex);
GE_CHECK_NOTNULL(value_tensor);
if (value_tensor->GetSize() > ref_tensor->GetSize()) {
REPORT_INNER_ERROR("E19999", "[%s] value_input_size=%zu bigger than ref_input_size=%zu. check invalid",
node_->GetName().c_str(), value_tensor->GetSize(), ref_tensor->GetSize());
GELOGE(INTERNAL_ERROR, "[Check][Size][%s] value_input_size=%zu, but ref_input_size=%zu.",
node_->GetName().c_str(), value_tensor->GetSize(), ref_tensor->GetSize());
return INTERNAL_ERROR;
}

GELOGI("[%s] value_input_data=%p, ref_input_size=%zu, value_input_size=%zu.",
node_->GetName().c_str(), ref_tensor->GetData(), ref_tensor->GetSize(), value_tensor->GetSize());
if (value_tensor->GetSize() > 0) {
GE_CHK_RT_RET(rtMemcpy(ref_tensor->MutableData(), ref_tensor->GetSize(), value_tensor->GetData(),
value_tensor->GetSize(), RT_MEMCPY_HOST_TO_HOST));
}
GE_CHK_STATUS_RET(context.SetOutput(kAssignRefOutputIndex, *ref_tensor),
"[Set][Output] failed for[%s].", context.GetNodeName());

GELOGD("[%s] compute success.", node_->GetName().c_str());
return SUCCESS;
}

REGISTER_KERNEL_CREATOR(Assign, AssignKernel);
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

+ 0
- 42
ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h View File

@@ -1,42 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HYBRID_HOST_CPU_KERNEL_ASSIGN_KERNEL_H_
#define GE_HYBRID_HOST_CPU_KERNEL_ASSIGN_KERNEL_H_

#include "hybrid/node_executor/host_cpu/kernel/kernel.h"

namespace ge {
namespace hybrid {
namespace host_cpu {
class AssignKernel : public Kernel {
public:
AssignKernel(const NodePtr &node) : Kernel(node) {}
~AssignKernel() override = default;
AssignKernel &operator=(const AssignKernel &op) = delete;
AssignKernel(const AssignKernel &op) = delete;

/**
* @brief compute for node_task.
* @return result
*/
Status Compute(TaskContext& context) override;
};
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

#endif // GE_HYBRID_HOST_CPU_KERNEL_ASSIGN_KERNEL_H_

+ 0
- 42
ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc View File

@@ -1,42 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "hybrid/node_executor/host_cpu/kernel/data_kernel.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "hybrid/node_executor/host_cpu/kernel_factory.h"

namespace {
constexpr size_t kDataInputIndex = 0;
constexpr size_t kDataOutputIndex = 0;
}

namespace ge {
namespace hybrid {
namespace host_cpu {
Status DataKernel::Compute(TaskContext& context) {
auto input = context.MutableInput(kDataInputIndex);
GE_CHECK_NOTNULL(input);
GE_CHK_STATUS_RET(context.SetOutput(kDataOutputIndex, *input),
"[Set][Output] failed for [%s].", context.GetNodeName())
GELOGD("[%s] compute success.", node_->GetName().c_str());
return SUCCESS;
}

REGISTER_KERNEL_CREATOR(Data, DataKernel);
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

+ 0
- 42
ge/hybrid/node_executor/host_cpu/kernel/data_kernel.h View File

@@ -1,42 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_
#define GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_

#include "hybrid/node_executor/host_cpu/kernel/kernel.h"

namespace ge {
namespace hybrid {
namespace host_cpu {
class DataKernel : public Kernel {
public:
DataKernel(const NodePtr &node) : Kernel(node) {}
~DataKernel() override = default;
DataKernel &operator=(const DataKernel &op) = delete;
DataKernel(const DataKernel &op) = delete;

/**
* @brief compute for node_task.
* @return result
*/
Status Compute(TaskContext& context) override;
};
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

#endif // GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_

+ 0
- 43
ge/hybrid/node_executor/host_cpu/kernel/kernel.h View File

@@ -1,43 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HYBRID_HOST_CPU_KERNEL_KERNEL_H_
#define GE_HYBRID_HOST_CPU_KERNEL_KERNEL_H_

#include "common/ge_inner_error_codes.h"
#include "graph/node.h"
#include "hybrid/node_executor/task_context.h"

namespace ge {
namespace hybrid {
namespace host_cpu {
/**
* The base class for all host_kernel.
*/
class Kernel {
public:
Kernel(const NodePtr &node) : node_(node) {}
virtual ~Kernel() = default;
virtual Status Compute(TaskContext& context) = 0;

protected:
const NodePtr &node_;
};
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

#endif // GE_HYBRID_HOST_CPU_KERNEL_KERNEL_H_

+ 0
- 34
ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc View File

@@ -1,34 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "hybrid/node_executor/host_cpu/kernel/no_op_kernel.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "hybrid/node_executor/host_cpu/kernel_factory.h"

namespace ge {
namespace hybrid {
namespace host_cpu {
Status NoOpKernel::Compute(TaskContext& context) {
GELOGD("[%s] no need to compute.", node_->GetName().c_str());
return SUCCESS;
}

REGISTER_KERNEL_CREATOR(NoOp, NoOpKernel);
REGISTER_KERNEL_CREATOR(NetOutput, NoOpKernel);
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

+ 0
- 42
ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h View File

@@ -1,42 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HYBRID_HOST_CPU_KERNEL_NO_OP_KERNEL_H_
#define GE_HYBRID_HOST_CPU_KERNEL_NO_OP_KERNEL_H_

#include "hybrid/node_executor/host_cpu/kernel/kernel.h"

namespace ge {
namespace hybrid {
namespace host_cpu {
class NoOpKernel : public Kernel {
public:
NoOpKernel(const NodePtr &node) : Kernel(node) {}
~NoOpKernel() override = default;
NoOpKernel &operator=(const NoOpKernel &op) = delete;
NoOpKernel(const NoOpKernel &op) = delete;

/**
* @brief compute for node_task.
* @return result
*/
Status Compute(TaskContext& context) override;
};
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

#endif // GE_HYBRID_HOST_CPU_KERNEL_NO_OP_KERNEL_H_

+ 0
- 157
ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc View File

@@ -1,157 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h"
#include <random>
#include "common/fp16_t.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "graph/utils/type_utils.h"
#include "hybrid/node_executor/host_cpu/kernel_factory.h"

namespace {
const char *const kAttrDtype = "dtype";
}

namespace ge {
namespace hybrid {
namespace host_cpu {
Status RandomUniformKernel::Compute(TaskContext& context) {
int64_t seed = 0;
int64_t seed2 = 0;
(void)AttrUtils::GetInt(node_->GetOpDesc(), "seed", seed);
(void)AttrUtils::GetInt(node_->GetOpDesc(), "seed2", seed2);
DataType data_type = DT_FLOAT;
if (!AttrUtils::GetDataType(node_->GetOpDesc(), kAttrDtype, data_type)) {
REPORT_CALL_ERROR("E19999", "GetDataType failed for [%s].", node_->GetName().c_str());
GELOGE(PARAM_INVALID, "[Get][DataType] failed for [%s].", node_->GetName().c_str());
return PARAM_INVALID;
}
switch (data_type) {
case DT_FLOAT16:
if (GenerateFP16(node_->GetOpDesc(), seed, seed2, context) != SUCCESS) {
GELOGE(FAILED, "[Invoke][GenerateFP16]Generate random_distribution failed for %s, data_type=DT_FLOAT16",
node_->GetName().c_str());
return FAILED;
}
break;
case DT_FLOAT:
if (Generate<float>(node_->GetOpDesc(), seed, seed2, context) != SUCCESS) {
GELOGE(FAILED, "[Invoke][Generate]Generate random_distribution failed for %s, data_type=DT_FLOAT",
node_->GetName().c_str());
return FAILED;
}
break;
case DT_DOUBLE:
if (Generate<double>(node_->GetOpDesc(), seed, seed2, context) != SUCCESS) {
GELOGE(FAILED, "[Invoke][Generate]Generate random_distribution failed for %s, data_type=DT_DOUBLE",
node_->GetName().c_str());
return FAILED;
}
break;
default:
REPORT_INNER_ERROR("E19999", "[Check][DataType]Supported DataType is DT_FLOAT16 / DT_FLOAT / DT_DOUBLE,"
"but data_type=%s, node:%s",
TypeUtils::DataTypeToSerialString(data_type).c_str(),
node_->GetName().c_str());
GELOGE(UNSUPPORTED, "[Check][DataType]Supported DataType is DT_FLOAT16 / DT_FLOAT / DT_DOUBLE,"
"but data_type=%s, node:%s",
TypeUtils::DataTypeToSerialString(data_type).c_str(),
node_->GetName().c_str());
return UNSUPPORTED;
}

GELOGD("[%s] compute success.", node_->GetName().c_str());
return SUCCESS;
}

template <typename T>
Status RandomUniformKernel::Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2,
TaskContext& context) {
GE_CHECK_NOTNULL(op_desc_ptr);
// RandomUniformOp has and only has one output
int64_t data_num = op_desc_ptr->GetOutputDesc(0).GetShape().GetShapeSize();
AllocationAttr attr;
attr.SetMemType(HOST_DDR);
auto tensor_size = data_num * sizeof(T);
TensorValue tensor;
GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr),
"[Invoke][AllocateTensor][%s] Failed to allocate output of size %zu",
context.GetNodeName(),
tensor_size);

auto *buf = reinterpret_cast<T *>(tensor.MutableData());
int64_t final_seed;
if (seed == 0) {
if (seed2 == 0) {
std::random_device rd;
final_seed = rd();
} else {
final_seed = seed2;
}
} else {
final_seed = seed;
}
std::mt19937_64 gen(final_seed);
std::uniform_real_distribution<T> distribution(0, 1);
for (int64_t i = 0; i < data_num; i++) {
*(buf + i) = distribution(gen);
}

GE_CHK_STATUS_RET(context.SetOutput(0, tensor), "[Set][Output] failed for [%s].", context.GetNodeName());
return SUCCESS;
}

Status RandomUniformKernel::GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2,
TaskContext& context) {
GE_CHECK_NOTNULL(op_desc_ptr);
// RandomUniformOp has and only has one output
int64_t data_num = op_desc_ptr->GetOutputDesc(0).GetShape().GetShapeSize();
AllocationAttr attr;
attr.SetMemType(HOST_DDR);
auto tensor_size = data_num * sizeof(fp16_t);
TensorValue tensor;
GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr),
"[Invoke][AllocateTensor][%s] Failed to allocate output of size %zu",
context.GetNodeName(),
tensor_size);

auto *buf = reinterpret_cast<fp16_t *>(tensor.MutableData());
int64_t final_seed;
if (seed == 0) {
if (seed2 == 0) {
std::random_device rd;
final_seed = rd();
} else {
final_seed = seed2;
}
} else {
final_seed = seed;
}
std::mt19937_64 gen(final_seed);
std::uniform_real_distribution<float> distribution(0, 1);
for (int64_t i = 0; i < data_num; i++) {
*(buf + i) = static_cast<fp16_t>(distribution(gen));
}

GE_CHK_STATUS_RET(context.SetOutput(0, tensor), "[Set][Output]failed for [%s].", context.GetNodeName());
return SUCCESS;
}

REGISTER_KERNEL_CREATOR(RandomUniform, RandomUniformKernel);
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

+ 0
- 48
ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h View File

@@ -1,48 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HYBRID_HOST_CPU_KERNEL_RANDOM_UNIFORM_KERNEL_H_
#define GE_HYBRID_HOST_CPU_KERNEL_RANDOM_UNIFORM_KERNEL_H_

#include "hybrid/node_executor/host_cpu/kernel/kernel.h"

namespace ge {
namespace hybrid {
namespace host_cpu {
class RandomUniformKernel : public Kernel {
public:
RandomUniformKernel(const NodePtr &node) : Kernel(node) {}
~RandomUniformKernel() override = default;
RandomUniformKernel &operator=(const RandomUniformKernel &op) = delete;
RandomUniformKernel(const RandomUniformKernel &op) = delete;

/**
* @brief compute for node_task.
* @return result
*/
Status Compute(TaskContext& context) override;

private:
template <typename T>
Status Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext& context);

static Status GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext& context);
};
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

#endif // GE_HYBRID_HOST_CPU_KERNEL_RANDOM_UNIFORM_KERNEL_H_

+ 0
- 42
ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc View File

@@ -1,42 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "hybrid/node_executor/host_cpu/kernel/variable_kernel.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "hybrid/node_executor/host_cpu/kernel_factory.h"

namespace ge {
namespace hybrid {
namespace host_cpu {
Status VariableKernel::Compute(TaskContext& context) {
auto tensor = context.GetVariable(node_->GetName());
if (tensor == nullptr) {
REPORT_INNER_ERROR("E19999", "Get Variable from task context for node:%s failed.", context.GetNodeName());
GELOGE(PARAM_INVALID, "[Check][Param]Get Variable from task context for node:%s failed.", context.GetNodeName());
return PARAM_INVALID;
}
// Constant & Variable Op has and only has one output
GE_CHK_STATUS_RET(context.SetOutput(0, *tensor), "[Set][Output] failed for [%s].", context.GetNodeName());
GELOGD("[%s] compute success.", node_->GetName().c_str());
return SUCCESS;
}

REGISTER_KERNEL_CREATOR(Variable, VariableKernel);
REGISTER_KERNEL_CREATOR(Constant, VariableKernel);
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

+ 0
- 42
ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h View File

@@ -1,42 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HYBRID_HOST_CPU_KERNEL_VARIABLE_KERNEL_H_
#define GE_HYBRID_HOST_CPU_KERNEL_VARIABLE_KERNEL_H_

#include "hybrid/node_executor/host_cpu/kernel/kernel.h"

namespace ge {
namespace hybrid {
namespace host_cpu {
class VariableKernel : public Kernel {
public:
VariableKernel(const NodePtr &node) : Kernel(node) {}
~VariableKernel() override = default;
VariableKernel &operator=(const VariableKernel &op) = delete;
VariableKernel(const VariableKernel &op) = delete;

/**
* @brief compute for node_task.
* @return result
*/
Status Compute(TaskContext& context) override;
};
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

#endif // GE_HYBRID_HOST_CPU_KERNEL_VARIABLE_KERNEL_H_

+ 0
- 58
ge/hybrid/node_executor/host_cpu/kernel_factory.cc View File

@@ -1,58 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "hybrid/node_executor/host_cpu/kernel_factory.h"
#include "framework/common/debug/ge_log.h"

namespace ge {
namespace hybrid {
namespace host_cpu {
KernelFactory &KernelFactory::Instance() {
static KernelFactory instance;
return instance;
}

std::shared_ptr<Kernel> KernelFactory::CreateKernel(const NodePtr &node) {
if (node == nullptr) {
GELOGW("node is NULL.");
return nullptr;
}
auto iter = kernel_creator_map_.find(node->GetType());
if (iter != kernel_creator_map_.end()) {
return iter->second(node);
}
REPORT_INNER_ERROR("E19999", "Not supported because kernel_creator_map_ not contain type:%s, name = %s",
node->GetType().c_str(), node->GetName().c_str());
GELOGE(FAILED, "[Find][NodeType]Not supported because kernel_creator_map_ not contain type = %s, name = %s",
node->GetType().c_str(), node->GetName().c_str());
return nullptr;
}

void KernelFactory::RegisterCreator(const std::string &type, const KERNEL_CREATOR_FUNC &func) {
if (func == nullptr) {
GELOGW("Func is NULL.");
return;
}
auto iter = kernel_creator_map_.find(type);
if (iter != kernel_creator_map_.end()) {
GELOGW("%s creator already exist", type.c_str());
return;
}
kernel_creator_map_[type] = func;
}
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

+ 0
- 88
ge/hybrid/node_executor/host_cpu/kernel_factory.h View File

@@ -1,88 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_HYBRID_NODE_EXECUTOR_HOST_CPU_KERNEL_FACTORY_H_
#define GE_HYBRID_NODE_EXECUTOR_HOST_CPU_KERNEL_FACTORY_H_

#include <functional>
#include <map>
#include <string>
#include "common/ge/ge_util.h"
#include "hybrid/node_executor/host_cpu/kernel/kernel.h"

namespace ge {
namespace hybrid {
namespace host_cpu {
using KERNEL_CREATOR_FUNC = std::function<std::shared_ptr<Kernel>(const NodePtr &)>;

/**
* manage all the host_cpu_kernel, support create kernel.
*/
class KernelFactory {
public:
static KernelFactory &Instance();

/**
* @brief create Kernel.
* @param [in] node
* @return not nullptr success
* @return nullptr fail
*/
std::shared_ptr<Kernel> CreateKernel(const NodePtr &node);

/**
* @brief Register Kernel create function.
* @param [in] type: Kernel type
* @param [in] func: Kernel create func
*/
void RegisterCreator(const std::string &type, const KERNEL_CREATOR_FUNC &func);

KernelFactory(const KernelFactory &) = delete;
KernelFactory &operator=(const KernelFactory &) = delete;
KernelFactory(KernelFactory &&) = delete;
KernelFactory &operator=(KernelFactory &&) = delete;

private:
KernelFactory() = default;
~KernelFactory() = default;

// the kernel creator function map
std::map<std::string, KERNEL_CREATOR_FUNC> kernel_creator_map_;
};

class KernelRegistrar {
public:
KernelRegistrar(const std::string &type, const KERNEL_CREATOR_FUNC &func) {
KernelFactory::Instance().RegisterCreator(type, func);
}
~KernelRegistrar() = default;

KernelRegistrar(const KernelRegistrar &) = delete;
KernelRegistrar &operator=(const KernelRegistrar &) = delete;
KernelRegistrar(KernelRegistrar &&) = delete;
KernelRegistrar &operator=(KernelRegistrar &&) = delete;
};

#define REGISTER_KERNEL_CREATOR(type, clazz) \
std::shared_ptr<Kernel> Creator_##type##Kernel(const NodePtr &node) { \
return MakeShared<clazz>(node); \
} \
KernelRegistrar g_##type##Kernel_creator(#type, Creator_##type##Kernel)
} // namespace host_cpu
} // namespace hybrid
} // namespace ge

#endif // GE_HYBRID_NODE_EXECUTOR_HOST_CPU_KERNEL_FACTORY_H_

+ 1
- 1
ge/opskernel_manager/optimizer_priority.pbtxt View File

@@ -1 +1 @@
optimizer:["aicpu_tf_optimizer","aicpu_ascend_optimizer","AIcoreEngine","VectorEngine","hccl_graph_optimizer", "hvd_graph_optimizer", "DNN_VM_RTS_GRAPH_OPTIMIZER_STORE"]
optimizer:["aicpu_tf_optimizer","aicpu_ascend_optimizer","AIcoreEngine","VectorEngine","hccl_graph_optimizer", "hvd_graph_optimizer", "DNN_VM_RTS_GRAPH_OPTIMIZER_STORE","aicpu_host_cpu_optimizer"]

+ 1
- 6
tests/ut/ge/CMakeLists.txt View File

@@ -613,12 +613,6 @@ set(SINGLE_OP_SRC_FILES
"${GE_CODE_DIR}/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel_factory.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/controlop/control_op_executor.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/controlop/control_op_executor.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/hccl/hccl_node_executor.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/hccl/hccl_node_executor.cc"
@@ -839,6 +833,7 @@ set(HYBRID_TEST_FILES
"hybrid/executor/worker/execution_engine_unittest.cc" "hybrid/executor/worker/execution_engine_unittest.cc"
"hybrid/model/hybrid_model_builder_unittest.cc" "hybrid/model/hybrid_model_builder_unittest.cc"
"hybrid/node_executor/rts/rts_node_task_unittest.cc" "hybrid/node_executor/rts/rts_node_task_unittest.cc"
"hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc"
"hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc"
"hybrid/executor/hybrid_model_async_executor_unittest.cc" "hybrid/executor/hybrid_model_async_executor_unittest.cc"
"hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc"


+ 137
- 0
tests/ut/ge/hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc View File

@@ -0,0 +1,137 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <vector>

#define private public
#define protected public
#include "hybrid/executor/subgraph_context.h"
#include "hybrid/node_executor/host_cpu/host_cpu_node_executor.h"
#include "model/ge_root_model.h"
#include "graph/passes/graph_builder_utils.h"
#include "aicpu/common/aicpu_task_struct.h"
#include "graph/manager/graph_mem_manager.h"
#include "ge_local_engine/engine/host_cpu_engine.h"
#undef private
#undef protected

using namespace std;
using namespace testing;

namespace ge {
using namespace hybrid;

namespace {
struct AicpuTaskStruct {
aicpu::AicpuParamHead head;
uint64_t io_addrp[2];
}__attribute__((packed));
} // namespace

class UtestHostCpuNodeTask : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};

TEST_F(UtestHostCpuNodeTask, test_load) {
ut::GraphBuilder builder = ut::GraphBuilder("graph");
auto node = builder.AddNode("Data", "Data", 1, 1);
auto graph = builder.GetGraph();

GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph);
HybridModel hybrid_model(ge_root_model);
std::unique_ptr<NodeItem> node_item;
ASSERT_EQ(NodeItem::Create(node, node_item), SUCCESS);
hybrid_model.node_items_[node] = std::move(node_item);
hybrid_model.task_defs_[node] = {};

NodeTaskPtr task = nullptr;
HostCpuNodeExecutor node_executor;
ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), PARAM_INVALID);

AicpuTaskStruct args;
args.head.length = sizeof(args);
args.head.ioAddrNum = 2;

domi::TaskDef task_def;
task_def.set_type(RT_MODEL_TASK_ALL_KERNEL);
task_def.mutable_kernel()->set_args(reinterpret_cast<const char *>(&args), args.head.length);
task_def.mutable_kernel()->set_args_size(args.head.length);
hybrid_model.task_defs_[node] = {task_def};
hybrid_model.node_items_[node]->num_inputs = 1;
hybrid_model.node_items_[node]->num_outputs = 1;
ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), INTERNAL_ERROR);

domi::TaskDef &host_task_def = hybrid_model.task_defs_[node][0];
host_task_def.set_type(RT_MODEL_TASK_KERNEL);
ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), INTERNAL_ERROR);
domi::KernelContext *context = host_task_def.mutable_kernel()->mutable_context();
context->set_kernel_type(6); // ccKernelType::AI_CPU
ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), INTERNAL_ERROR);
HostCpuEngine::GetInstance().constant_folding_handle_ = (void *)0x01;
ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), INTERNAL_ERROR);
}

TEST_F(UtestHostCpuNodeTask, test_execute) {
ut::GraphBuilder builder = ut::GraphBuilder("graph");
auto node = builder.AddNode("Data", "Data", 1, 1);
std::unique_ptr<NodeItem> node_item;
ASSERT_EQ(NodeItem::Create(node, node_item), SUCCESS);
domi::TaskDef task_def;

HostAicpuNodeTask task(node_item.get(), task_def);
std::function<void()> call_back = []{};
NodeState node_state(*node_item, nullptr);
TaskContext context(nullptr, &node_state, nullptr);
ASSERT_EQ(task.ExecuteAsync(context, call_back), INTERNAL_ERROR);

std::function<uint32_t (void *)> run_cpu_kernel = [](void *){ return 0; };
task.SetRunKernel(run_cpu_kernel);
ASSERT_EQ(task.ExecuteAsync(context, call_back), SUCCESS);
}

TEST_F(UtestHostCpuNodeTask, test_update_args) {
ut::GraphBuilder builder = ut::GraphBuilder("graph");
auto node = builder.AddNode("Data", "Data", 1, 1);
std::unique_ptr<NodeItem> node_item;
ASSERT_EQ(NodeItem::Create(node, node_item), SUCCESS);
NodeState node_state(*node_item, nullptr);
TaskContext context(nullptr, &node_state, nullptr);

auto *in_addr = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(1);
auto tmp = TensorBuffer::Create(in_addr, 1);
std::shared_ptr<TensorBuffer> input_buffer(tmp.release());
TensorValue input_start[1] = {TensorValue(input_buffer)};
context.inputs_start_ = input_start;

auto *out_addr = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(1);
tmp = TensorBuffer::Create(out_addr, 1);
std::shared_ptr<TensorBuffer> output_buffer(tmp.release());
TensorValue output_start[1] = {TensorValue(output_buffer)};
context.outputs_start_ = output_start;

domi::TaskDef task_def;
HostAicpuNodeTask task(node_item.get(), task_def);
ASSERT_EQ(task.UpdateArgs(context), INTERNAL_ERROR);

task.args_size_ = sizeof(AicpuTaskStruct);
task.args_.reset(new(std::nothrow) uint8_t[task.args_size_]());
ASSERT_EQ(task.UpdateArgs(context), SUCCESS);
}
} // namespace ge

Loading…
Cancel
Save