From: @zhao_zhixuan Reviewed-by: Signed-off-by:tags/v1.3.0
| @@ -166,7 +166,7 @@ build_graphengine() | |||||
| echo "execute command: cmake ${CMAKE_ARGS} .. failed." | echo "execute command: cmake ${CMAKE_ARGS} .. failed." | ||||
| return 1 | return 1 | ||||
| fi | fi | ||||
| COMMON_TARGET="ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt " | |||||
| COMMON_TARGET="ge_local_engine ge_local_opskernel_builder ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt " | |||||
| TARGET=${COMMON_TARGET} | TARGET=${COMMON_TARGET} | ||||
| if [ "x${PLATFORM}" = "xtrain" ] | if [ "x${PLATFORM}" = "xtrain" ] | ||||
| then | then | ||||
| @@ -279,7 +279,7 @@ generate_package() | |||||
| ACL_LIB=("libge_common.so" "libgraph.so" "libregister.so" "liberror_manager.so" "libge_executor.so") | ACL_LIB=("libge_common.so" "libgraph.so" "libregister.so" "liberror_manager.so" "libge_executor.so") | ||||
| ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so" "liberror_manager.so") | ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so" "liberror_manager.so") | ||||
| FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so" "liberror_manager.so") | FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so" "liberror_manager.so") | ||||
| PLUGIN_OPSKERNEL=("libge_local_engine.so" "libge_local_opskernel_builder.so" "libhost_cpu_engine.so" "libhost_cpu_opskernel_builder.so" "optimizer_priority.pbtxt") | |||||
| PLUGIN_OPSKERNEL=("libge_local_engine.so" "libge_local_opskernel_builder.so" "optimizer_priority.pbtxt") | |||||
| PARSER_LIB=("lib_caffe_parser.so" "libfmk_onnx_parser.so" "libfmk_parser.so" "libparser_common.so") | PARSER_LIB=("lib_caffe_parser.so" "libfmk_onnx_parser.so" "libfmk_parser.so" "libparser_common.so") | ||||
| rm -rf ${OUTPUT_PATH:?}/${FWK_PATH}/ | rm -rf ${OUTPUT_PATH:?}/${FWK_PATH}/ | ||||
| @@ -2,7 +2,6 @@ if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | |||||
| add_subdirectory(common) | add_subdirectory(common) | ||||
| add_subdirectory(plugin/engine) | add_subdirectory(plugin/engine) | ||||
| add_subdirectory(ge_local_engine) | add_subdirectory(ge_local_engine) | ||||
| add_subdirectory(host_cpu_engine) | |||||
| add_subdirectory(executor) | add_subdirectory(executor) | ||||
| add_subdirectory(offline) | add_subdirectory(offline) | ||||
| elseif (ENABLE_D) | elseif (ENABLE_D) | ||||
| @@ -409,12 +408,6 @@ set(TRAIN_SRC_LIST | |||||
| "hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | "hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | ||||
| "hybrid/node_executor/ge_local/ge_local_node_executor.cc" | "hybrid/node_executor/ge_local/ge_local_node_executor.cc" | ||||
| "hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" | "hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" | ||||
| "hybrid/node_executor/host_cpu/kernel_factory.cc" | |||||
| "hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" | |||||
| "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | |||||
| "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | |||||
| "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | |||||
| "hybrid/node_executor/host_cpu/kernel/data_kernel.cc" | |||||
| "hybrid/node_executor/controlop/control_op_executor.cc" | "hybrid/node_executor/controlop/control_op_executor.cc" | ||||
| "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | ||||
| "hybrid/node_executor/hccl/hccl_node_executor.cc" | "hybrid/node_executor/hccl/hccl_node_executor.cc" | ||||
| @@ -31,6 +31,7 @@ | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| #include "framework/common/types.h" | |||||
| namespace { | namespace { | ||||
| const char *const kSchedulerUnits = "schedule_units"; | const char *const kSchedulerUnits = "schedule_units"; | ||||
| @@ -50,6 +51,15 @@ const char *const kHostCpuOpKernelLibName = "DNN_VM_HOST_CPU_OP_STORE"; | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| namespace { | |||||
| const std::set<std::string> kNotCpuOp = {DATA, CONSTANT, CONSTANTOP, VARIABLE, NETOUTPUT}; | |||||
| bool ExecOnHostCpu(const OpDescPtr &op_desc) { | |||||
| bool is_host_cpu_op = (kNotCpuOp.find(op_desc->GetType()) == kNotCpuOp.end()); | |||||
| return ge::GetContext().GetHostExecFlag() && is_host_cpu_op; | |||||
| } | |||||
| } // namespace | |||||
| DNNEngineManager::DNNEngineManager() : init_flag_(false) {} | DNNEngineManager::DNNEngineManager() : init_flag_(false) {} | ||||
| DNNEngineManager::~DNNEngineManager() { | DNNEngineManager::~DNNEngineManager() { | ||||
| engines_attrs_map_.clear(); | engines_attrs_map_.clear(); | ||||
| @@ -206,7 +216,7 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { | |||||
| GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str()); | GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str()); | ||||
| return ""; | return ""; | ||||
| } | } | ||||
| GE_IF_BOOL_EXEC(ge::GetContext().GetHostExecFlag(), return GetHostCpuEngineName(op_infos, op_desc)); | |||||
| GE_IF_BOOL_EXEC(ExecOnHostCpu(op_desc), return GetHostCpuEngineName(op_infos, op_desc)); | |||||
| std::string ge_core_type; | std::string ge_core_type; | ||||
| Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type); | Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type); | ||||
| GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE")); | GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE")); | ||||
| @@ -92,12 +92,6 @@ set(SRC_LIST | |||||
| "../hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | "../hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | ||||
| "../hybrid/node_executor/ge_local/ge_local_node_executor.cc" | "../hybrid/node_executor/ge_local/ge_local_node_executor.cc" | ||||
| "../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" | "../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" | ||||
| "../hybrid/node_executor/host_cpu/kernel_factory.cc" | |||||
| "../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" | |||||
| "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | |||||
| "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | |||||
| "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | |||||
| "../hybrid/node_executor/host_cpu/kernel/data_kernel.cc" | |||||
| "../hybrid/node_executor/controlop/control_op_executor.cc" | "../hybrid/node_executor/controlop/control_op_executor.cc" | ||||
| "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | ||||
| "../hybrid/node_executor/rts/rts_node_executor.cc" | "../hybrid/node_executor/rts/rts_node_executor.cc" | ||||
| @@ -57,6 +57,7 @@ namespace ge { | |||||
| namespace { | namespace { | ||||
| const char *kEnvKeyOppPath = "ASCEND_OPP_PATH"; | const char *kEnvKeyOppPath = "ASCEND_OPP_PATH"; | ||||
| const char *kHostCpuLibRelativePath = "/op_impl/built-in/host_cpu"; | const char *kHostCpuLibRelativePath = "/op_impl/built-in/host_cpu"; | ||||
| const std::string kConstantFoldingName = "libconstant_folding_ops.so"; | |||||
| } | } | ||||
| Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { | Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { | ||||
| @@ -352,6 +353,9 @@ Status HostCpuEngine::LoadLib(const std::string &lib_path) { | |||||
| } | } | ||||
| GELOGI("Lib: %s has been opened", lib_path.c_str()); | GELOGI("Lib: %s has been opened", lib_path.c_str()); | ||||
| if (lib_path.find(kConstantFoldingName) != lib_path.npos) { | |||||
| constant_folding_handle_ = handle; | |||||
| } | |||||
| lib_handles_.emplace_back(handle); | lib_handles_.emplace_back(handle); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -54,6 +54,8 @@ class GE_FUNC_VISIBILITY HostCpuEngine { | |||||
| ge::Status Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, std::vector<GeTensorPtr> &outputs); | ge::Status Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, std::vector<GeTensorPtr> &outputs); | ||||
| void *GetConstantFoldingHandle() const { return constant_folding_handle_; } | |||||
| private: | private: | ||||
| HostCpuEngine() = default; | HostCpuEngine() = default; | ||||
| @@ -85,6 +87,7 @@ class GE_FUNC_VISIBILITY HostCpuEngine { | |||||
| std::mutex mu_; | std::mutex mu_; | ||||
| std::vector<void *> lib_handles_; | std::vector<void *> lib_handles_; | ||||
| void *constant_folding_handle_ = nullptr; | |||||
| bool initialized_ = false; | bool initialized_ = false; | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -491,9 +491,10 @@ Status GraphManager::ModifyDataIndex(const Graph &graph, const std::map<std::str | |||||
| auto iter = graph_option.find(OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE); | auto iter = graph_option.find(OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE); | ||||
| if (iter != graph_option.end() && !iter->second.empty()) { | if (iter != graph_option.end() && !iter->second.empty()) { | ||||
| // If data inputs shape range is set, user must set valid data index. | // If data inputs shape range is set, user must set valid data index. | ||||
| std::string failed_reason = "Data index must be set continuous from 0 when data shape range enabled!"; | |||||
| REPORT_INPUT_ERROR("E10003", std::vector<std::string>({"parameter", "value", "reason"}), | |||||
| std::vector<std::string>({"--data_index", "-", failed_reason})); | |||||
| std::string situation = "Data op index"; | |||||
| std::string reason = "Data index must be set continuous from 0 when data shape range enabled!"; | |||||
| REPORT_INPUT_ERROR("E19025", std::vector<std::string>({"situation", "reason"}), | |||||
| std::vector<std::string>({situation, reason})); | |||||
| GELOGE(GRAPH_PARAM_INVALID, "[COMP][AddGraph]Input data index is invalid when data shape range enabled."); | GELOGE(GRAPH_PARAM_INVALID, "[COMP][AddGraph]Input data index is invalid when data shape range enabled."); | ||||
| return GRAPH_PARAM_INVALID; | return GRAPH_PARAM_INVALID; | ||||
| } | } | ||||
| @@ -26,6 +26,7 @@ namespace { | |||||
| const char *const kVectorCore = "VectorCore"; | const char *const kVectorCore = "VectorCore"; | ||||
| const char *const kVectorEngine = "VectorEngine"; | const char *const kVectorEngine = "VectorEngine"; | ||||
| const char *const kAicoreEngine = "AIcoreEngine"; | const char *const kAicoreEngine = "AIcoreEngine"; | ||||
| const char *const kHostCpuEngine = "DNN_VM_HOST_CPU"; | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| @@ -140,10 +141,6 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std | |||||
| } | } | ||||
| Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { | Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { | ||||
| if (GetContext().GetHostExecFlag()) { | |||||
| // graph exec on host, no need OptimizeOriginalGraph | |||||
| return SUCCESS; | |||||
| } | |||||
| if (compute_graph == nullptr) { | if (compute_graph == nullptr) { | ||||
| REPORT_INNER_ERROR("E19999", "Param compute_graph is nullptr, check invalid"); | REPORT_INNER_ERROR("E19999", "Param compute_graph is nullptr, check invalid"); | ||||
| GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[Check][Param] compute_graph is nullptr."); | GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[Check][Param] compute_graph is nullptr."); | ||||
| @@ -170,6 +167,10 @@ Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { | |||||
| if (iter->first == exclude_core_Type) { | if (iter->first == exclude_core_Type) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| if (GetContext().GetHostExecFlag() && iter->first != kHostCpuEngine) { | |||||
| // graph exec on host, no need OptimizeOriginalGraph for other engine. | |||||
| continue; | |||||
| } | |||||
| ret = (iter->second)->OptimizeOriginalGraph(*compute_graph); | ret = (iter->second)->OptimizeOriginalGraph(*compute_graph); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| REPORT_INNER_ERROR("E19999", "Call OptimizeOriginalGraph failed, ret:%d, engine_name:%s, " | REPORT_INNER_ERROR("E19999", "Call OptimizeOriginalGraph failed, ret:%d, engine_name:%s, " | ||||
| @@ -186,10 +187,6 @@ Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { | |||||
| Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_graph) { | Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_graph) { | ||||
| GELOGD("OptimizeOriginalGraphJudgeInsert in"); | GELOGD("OptimizeOriginalGraphJudgeInsert in"); | ||||
| if (GetContext().GetHostExecFlag()) { | |||||
| // graph exec on host, no need OptimizeOriginalGraphJudgeInsert | |||||
| return SUCCESS; | |||||
| } | |||||
| GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
| Status ret = SUCCESS; | Status ret = SUCCESS; | ||||
| @@ -212,6 +209,10 @@ Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_ | |||||
| GELOGI("[OptimizeOriginalGraphJudgeInsert]: engine type will exclude: %s", exclude_core_Type.c_str()); | GELOGI("[OptimizeOriginalGraphJudgeInsert]: engine type will exclude: %s", exclude_core_Type.c_str()); | ||||
| continue; | continue; | ||||
| } | } | ||||
| if (GetContext().GetHostExecFlag() && iter->first != kHostCpuEngine) { | |||||
| // graph exec on host, no need OptimizeOriginalGraphJudgeInsert for other engine. | |||||
| continue; | |||||
| } | |||||
| GELOGI("Begin to refine running format by engine %s", iter->first.c_str()); | GELOGI("Begin to refine running format by engine %s", iter->first.c_str()); | ||||
| ret = (iter->second)->OptimizeOriginalGraphJudgeInsert(*compute_graph); | ret = (iter->second)->OptimizeOriginalGraphJudgeInsert(*compute_graph); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -1,272 +0,0 @@ | |||||
| set(SRC_LIST | |||||
| "engine/host_cpu_engine.cc" | |||||
| "ops_kernel_store/host_cpu_ops_kernel_info.cc" | |||||
| "ops_kernel_store/op/op_factory.cc" | |||||
| "ops_kernel_store/op/host_op.cc" | |||||
| ) | |||||
| set(CPU_OPS_KERNEL_LIST | |||||
| "ops_kernel_store/host_cpu_ops_kernel_builder.cc" | |||||
| ) | |||||
| ############ libhost_cpu_engine.so ############ | |||||
| add_library(host_cpu_engine SHARED ${SRC_LIST}) | |||||
| add_dependencies(host_cpu_engine | |||||
| graphengine_protos | |||||
| ) | |||||
| target_compile_options(host_cpu_engine PRIVATE | |||||
| -Werror | |||||
| -fno-common | |||||
| -fvisibility=hidden | |||||
| ) | |||||
| target_compile_definitions(host_cpu_engine PRIVATE | |||||
| google=ascend_private | |||||
| FUNC_VISIBILITY | |||||
| ) | |||||
| target_include_directories(host_cpu_engine PRIVATE | |||||
| ${CMAKE_CURRENT_LIST_DIR} | |||||
| ${GE_CODE_DIR}/ge | |||||
| ${GE_CODE_DIR}/inc | |||||
| ${GE_CODE_DIR}/inc/external | |||||
| ${GE_CODE_DIR}/inc/framework | |||||
| ${METADEF_DIR}/inc | |||||
| ${METADEF_DIR}/inc/external | |||||
| ${METADEF_DIR}/inc/external/graph | |||||
| ${CMAKE_BINARY_DIR} | |||||
| ${CMAKE_BINARY_DIR}/proto/graphengine_protos | |||||
| #### yellow zone #### | |||||
| ${GE_CODE_DIR}/../inc | |||||
| #### blue zone #### | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ) | |||||
| target_link_options(host_cpu_engine PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(host_cpu_engine PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | |||||
| -Wl,--no-as-needed | |||||
| ascend_protobuf | |||||
| c_sec | |||||
| graph | |||||
| slog | |||||
| -Wl,--as-needed | |||||
| ) | |||||
| ############ atcstub/libhost_cpu_engine.so ############ | |||||
| add_library(atc_host_cpu_engine SHARED ${SRC_LIST}) | |||||
| add_dependencies(atc_host_cpu_engine | |||||
| graphengine_protos | |||||
| ) | |||||
| target_compile_options(atc_host_cpu_engine PRIVATE | |||||
| -Werror | |||||
| -fno-common | |||||
| -fvisibility=hidden | |||||
| ) | |||||
| target_compile_definitions(atc_host_cpu_engine PRIVATE | |||||
| google=ascend_private | |||||
| FUNC_VISIBILITY | |||||
| ) | |||||
| target_include_directories(atc_host_cpu_engine PRIVATE | |||||
| ${CMAKE_CURRENT_LIST_DIR} | |||||
| ${GE_CODE_DIR}/ge | |||||
| ${GE_CODE_DIR}/inc | |||||
| ${GE_CODE_DIR}/inc/external | |||||
| ${GE_CODE_DIR}/inc/framework | |||||
| ${METADEF_DIR}/inc | |||||
| ${METADEF_DIR}/inc/external | |||||
| ${METADEF_DIR}/inc/external/graph | |||||
| ${CMAKE_BINARY_DIR} | |||||
| ${CMAKE_BINARY_DIR}/proto/graphengine_protos | |||||
| #### yellow zone #### | |||||
| ${GE_CODE_DIR}/../inc | |||||
| #### blue zone #### | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ) | |||||
| target_link_options(atc_host_cpu_engine PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(atc_host_cpu_engine PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | |||||
| -Wl,--no-as-needed | |||||
| ascend_protobuf | |||||
| c_sec | |||||
| graph | |||||
| slog | |||||
| -Wl,--as-needed | |||||
| ) | |||||
| set_target_properties(atc_host_cpu_engine PROPERTIES | |||||
| OUTPUT_NAME host_cpu_engine | |||||
| LIBRARY_OUTPUT_DIRECTORY atclib | |||||
| ) | |||||
| ############ libhost_cpu_opskernel_builder.so ############ | |||||
| add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) | |||||
| add_dependencies(host_cpu_opskernel_builder | |||||
| graphengine_protos | |||||
| ) | |||||
| target_compile_options(host_cpu_opskernel_builder PRIVATE | |||||
| -Werror | |||||
| -fno-common | |||||
| -fvisibility=hidden | |||||
| ) | |||||
| target_compile_definitions(host_cpu_opskernel_builder PRIVATE | |||||
| google=ascend_private | |||||
| FUNC_VISIBILITY | |||||
| ) | |||||
| target_include_directories(host_cpu_opskernel_builder PRIVATE | |||||
| ${CMAKE_CURRENT_LIST_DIR} | |||||
| ${GE_CODE_DIR}/ge | |||||
| ${GE_CODE_DIR}/inc | |||||
| ${GE_CODE_DIR}/inc/external | |||||
| ${GE_CODE_DIR}/inc/framework | |||||
| ${METADEF_DIR}/inc | |||||
| ${METADEF_DIR}/inc/external | |||||
| ${METADEF_DIR}/inc/external/graph | |||||
| ${CMAKE_BINARY_DIR} | |||||
| ${CMAKE_BINARY_DIR}/proto/graphengine_protos | |||||
| #### yellow zone #### | |||||
| ${GE_CODE_DIR}/../inc | |||||
| #### blue zone #### | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ) | |||||
| target_link_options(host_cpu_opskernel_builder PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(host_cpu_opskernel_builder PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | |||||
| -Wl,--no-as-needed | |||||
| ascend_protobuf | |||||
| c_sec | |||||
| slog | |||||
| graph | |||||
| register | |||||
| -Wl,--as-needed | |||||
| ) | |||||
| ############ atclib/libhost_cpu_opskernel_builder.so ############ | |||||
| add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) | |||||
| add_dependencies(atc_host_cpu_opskernel_builder | |||||
| graphengine_protos | |||||
| ) | |||||
| target_compile_options(atc_host_cpu_opskernel_builder PRIVATE | |||||
| -Werror | |||||
| -fno-common | |||||
| -fvisibility=hidden | |||||
| ) | |||||
| target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE | |||||
| google=ascend_private | |||||
| FUNC_VISIBILITY | |||||
| ) | |||||
| target_include_directories(atc_host_cpu_opskernel_builder PRIVATE | |||||
| ${CMAKE_CURRENT_LIST_DIR} | |||||
| ${GE_CODE_DIR}/ge | |||||
| ${GE_CODE_DIR}/inc | |||||
| ${GE_CODE_DIR}/inc/external | |||||
| ${GE_CODE_DIR}/inc/framework | |||||
| ${METADEF_DIR}/inc | |||||
| ${METADEF_DIR}/inc/external | |||||
| ${METADEF_DIR}/inc/external/graph | |||||
| ${CMAKE_BINARY_DIR} | |||||
| ${CMAKE_BINARY_DIR}/proto/graphengine_protos | |||||
| #### yellow zone #### | |||||
| ${GE_CODE_DIR}/../inc | |||||
| #### blue zone #### | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ) | |||||
| target_link_options(atc_host_cpu_opskernel_builder PRIVATE | |||||
| -Wl,-Bsymbolic | |||||
| ) | |||||
| target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | |||||
| -Wl,--no-as-needed | |||||
| ascend_protobuf | |||||
| c_sec | |||||
| slog | |||||
| graph | |||||
| register | |||||
| -Wl,--as-needed | |||||
| ) | |||||
| set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES | |||||
| OUTPUT_NAME host_cpu_opskernel_builder | |||||
| LIBRARY_OUTPUT_DIRECTORY atclib | |||||
| ) | |||||
| ############ libhost_cpu_opskernel_builder.a ############ | |||||
| add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST}) | |||||
| add_dependencies(host_cpu_opskernel_builder_static | |||||
| graphengine_protos | |||||
| ) | |||||
| target_compile_options(host_cpu_opskernel_builder_static PRIVATE | |||||
| -Werror | |||||
| -fno-common | |||||
| -fvisibility=hidden | |||||
| ) | |||||
| target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE | |||||
| google=ascend_private | |||||
| LOG_CPP | |||||
| FUNC_VISIBILITY | |||||
| ) | |||||
| target_include_directories(host_cpu_opskernel_builder_static PRIVATE | |||||
| ${CMAKE_CURRENT_LIST_DIR} | |||||
| ${GE_CODE_DIR}/ge | |||||
| ${GE_CODE_DIR}/inc | |||||
| ${GE_CODE_DIR}/inc/external | |||||
| ${GE_CODE_DIR}/inc/framework | |||||
| ${METADEF_DIR}/inc | |||||
| ${METADEF_DIR}/inc/external | |||||
| ${METADEF_DIR}/inc/external/graph | |||||
| ${CMAKE_BINARY_DIR} | |||||
| ${CMAKE_BINARY_DIR}/proto/graphengine_protos | |||||
| #### yellow zone #### | |||||
| ${GE_CODE_DIR}/../inc | |||||
| #### blue zone #### | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ) | |||||
| target_link_libraries(host_cpu_opskernel_builder_static PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | |||||
| ascend_protobuf | |||||
| c_sec | |||||
| ) | |||||
| ############ install ############ | |||||
| set(INSTALL_BASE_DIR "") | |||||
| set(INSTALL_LIBRARY_DIR lib) | |||||
| install(TARGETS host_cpu_engine host_cpu_opskernel_builder OPTIONAL | |||||
| LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} | |||||
| ) | |||||
| install(TARGETS atc_host_cpu_engine atc_host_cpu_opskernel_builder OPTIONAL | |||||
| LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}/atclib | |||||
| ) | |||||
| @@ -1,30 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HOST_CPU_ENGINE_COMMON_CONSTANT_CONSTANT_H_ | |||||
| #define GE_HOST_CPU_ENGINE_COMMON_CONSTANT_CONSTANT_H_ | |||||
| #include <string> | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| // engine name | |||||
| const char kHostCpuEngineName[] = "DNN_VM_HOST_CPU"; | |||||
| const char kHostCpuOpKernelLibName[] = "DNN_VM_HOST_CPU_OP_STORE"; | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| #endif // GE_HOST_CPU_ENGINE_COMMON_CONSTANT_CONSTANT_H_ | |||||
| @@ -1,76 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "host_cpu_engine/engine/host_cpu_engine.h" | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <securec.h> | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "common/ge/ge_util.h" | |||||
| #include "host_cpu_engine/common/constant/constant.h" | |||||
| #include "host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h" | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| HostCpuEngine &HostCpuEngine::Instance() { | |||||
| static HostCpuEngine instance; | |||||
| return instance; | |||||
| } | |||||
| Status HostCpuEngine::Initialize(const std::map<string, string> &options) { | |||||
| if (ops_kernel_store_ == nullptr) { | |||||
| ops_kernel_store_ = MakeShared<HostCpuOpsKernelInfoStore>(); | |||||
| if (ops_kernel_store_ == nullptr) { | |||||
| GELOGE(FAILED, "[Create][HostCpuEngine]Make HostCpuOpsKernelInfoStore failed."); | |||||
| REPORT_INNER_ERROR("E19999", "HostCpuEngine::Initialize failed for new HostCpuEngine."); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void HostCpuEngine::GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map) { | |||||
| if (ops_kernel_store_ != nullptr) { | |||||
| // add buildin opsKernel to opsKernelInfoMap | |||||
| ops_kernel_map[kHostCpuOpKernelLibName] = ops_kernel_store_; | |||||
| } | |||||
| } | |||||
| void HostCpuEngine::GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &) { | |||||
| // no optimizer for host cpu engine | |||||
| } | |||||
| Status HostCpuEngine::Finalize() { | |||||
| ops_kernel_store_ = nullptr; | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| ge::Status Initialize(const std::map<string, string> &options) { | |||||
| return ge::host_cpu::HostCpuEngine::Instance().Initialize(options); | |||||
| } | |||||
| void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map) { | |||||
| ge::host_cpu::HostCpuEngine::Instance().GetOpsKernelInfoStores(ops_kernel_map); | |||||
| } | |||||
| void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers) { | |||||
| ge::host_cpu::HostCpuEngine::Instance().GetGraphOptimizerObjs(graph_optimizers); | |||||
| } | |||||
| ge::Status Finalize() { return ge::host_cpu::HostCpuEngine::Instance().Finalize(); } | |||||
| @@ -1,125 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | |||||
| #define GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | |||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "common/opskernel/ops_kernel_info_store.h" | |||||
| #include "common/optimizer/graph_optimizer.h" | |||||
| using OpsKernelInfoStorePtr = std::shared_ptr<ge::OpsKernelInfoStore>; | |||||
| using GraphOptimizerPtr = std::shared_ptr<ge::GraphOptimizer>; | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| /** | |||||
| * host cpu engine. | |||||
| * Used for the ops which executes on host. | |||||
| */ | |||||
| class GE_FUNC_VISIBILITY HostCpuEngine { | |||||
| public: | |||||
| /** | |||||
| * get HostCpuEngine instance. | |||||
| * @return HostCpuEngine instance. | |||||
| */ | |||||
| static HostCpuEngine &Instance(); | |||||
| virtual ~HostCpuEngine() = default; | |||||
| /** | |||||
| * When Ge start, GE will invoke this interface | |||||
| * @return The status whether initialize successfully | |||||
| */ | |||||
| Status Initialize(const std::map<string, string> &options); | |||||
| /** | |||||
| * After the initialize, GE will invoke this interface | |||||
| * to get the Ops kernel Store. | |||||
| * @param ops_kernel_map The host cpu's ops kernel info | |||||
| */ | |||||
| void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||||
| /** | |||||
| * After the initialize, GE will invoke this interface | |||||
| * to get the Graph Optimizer. | |||||
| * @param graph_optimizers The host cpu's Graph Optimizer objs | |||||
| */ | |||||
| void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||||
| /** | |||||
| * When the graph finished, GE will invoke this interface | |||||
| * @return The status whether initialize successfully | |||||
| */ | |||||
| Status Finalize(); | |||||
| HostCpuEngine(const HostCpuEngine &HostCpuEngine) = delete; | |||||
| HostCpuEngine(const HostCpuEngine &&HostCpuEngine) = delete; | |||||
| HostCpuEngine &operator=(const HostCpuEngine &HostCpuEngine) = delete; | |||||
| HostCpuEngine &operator=(HostCpuEngine &&HostCpuEngine) = delete; | |||||
| private: | |||||
| HostCpuEngine() = default; | |||||
| OpsKernelInfoStorePtr ops_kernel_store_ = nullptr; | |||||
| }; | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| extern "C" { | |||||
| /** | |||||
| * When Ge start, GE will invoke this interface | |||||
| * @return The status whether initialize successfully | |||||
| */ | |||||
| GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options); | |||||
| /** | |||||
| * After the initialize, GE will invoke this interface to get the Ops kernel Store | |||||
| * @param ops_kernel_map The host cpu's ops kernel info | |||||
| */ | |||||
| GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||||
| /** | |||||
| * After the initialize, GE will invoke this interface to get the Graph Optimizer | |||||
| * @param graph_optimizers The host cpu's Graph Optimizer objs | |||||
| */ | |||||
| GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||||
| /** | |||||
| * When the graph finished, GE will invoke this interface | |||||
| * @return The status whether initialize successfully | |||||
| */ | |||||
| GE_FUNC_VISIBILITY ge::Status Finalize(); | |||||
| } | |||||
| #endif // GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | |||||
| @@ -1,161 +0,0 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| local_lib_src_files := engine/host_cpu_engine.cc \ | |||||
| ops_kernel_store/host_cpu_ops_kernel_info.cc \ | |||||
| ops_kernel_store/op/op_factory.cc \ | |||||
| ops_kernel_store/op/host_op.cc \ | |||||
| local_lib_inc_path := proto/task.proto \ | |||||
| ${LOCAL_PATH} \ | |||||
| ${TOPDIR}inc \ | |||||
| ${TOPDIR}metadef/inc \ | |||||
| ${TOPDIR}graphengine/inc \ | |||||
| ${TOPDIR}inc/external \ | |||||
| ${TOPDIR}metadef/inc/external \ | |||||
| ${TOPDIR}graphengine/inc/external \ | |||||
| ${TOPDIR}metadef/inc/external/graph \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| ${TOPDIR}third_party/protobuf/include \ | |||||
| ${TOPDIR}graphengine/inc/framework \ | |||||
| $(TOPDIR)graphengine/ge \ | |||||
| #compiler for host | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libhost_cpu_engine | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libascend_protobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| libruntime | |||||
| LOCAL_SRC_FILES := $(local_lib_src_files) | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_HOST_SHARED_LIBRARY} | |||||
| #compiler for atc | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := atclib/libhost_cpu_engine | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE -Dgoogle=ascend_private | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libascend_protobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| libruntime_compile | |||||
| LOCAL_SRC_FILES := $(local_lib_src_files) | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_HOST_SHARED_LIBRARY} | |||||
| #compiler for host ops kernel builder | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libhost_cpu_opskernel_builder | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libascend_protobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_HOST_SHARED_LIBRARY} | |||||
| #compiler for device ops kernel builder | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libhost_cpu_opskernel_builder | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libascend_protobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_SHARED_LIBRARY} | |||||
| #compiler for host static lib | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libhost_cpu_opskernel_builder | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := libascend_protobuf \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| LOCAL_SHARED_LIBRARIES := libc_sec \ | |||||
| libslog \ | |||||
| LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_HOST_STATIC_LIBRARY} | |||||
| #compiler for device static lib | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libhost_cpu_opskernel_builder | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := libascend_protobuf \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| LOCAL_SHARED_LIBRARIES := libc_sec \ | |||||
| libslog \ | |||||
| LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_STATIC_LIBRARY} | |||||
| #compiler for atc ops kernel builder | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := atclib/libhost_cpu_opskernel_builder | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libascend_protobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_HOST_SHARED_LIBRARY} | |||||
| @@ -1,114 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "host_cpu_ops_kernel_builder.h" | |||||
| #include <memory> | |||||
| #include "common/ge_inner_error_codes.h" | |||||
| #include "ge/ge_api_types.h" | |||||
| #include "graph/utils/node_utils.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "graph/utils/type_utils.h" | |||||
| #include <securec.h> | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "host_cpu_engine/common/constant/constant.h" | |||||
| #include "register/ops_kernel_builder_registry.h" | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| REGISTER_OPS_KERNEL_BUILDER(kHostCpuOpKernelLibName, HostCpuOpsKernelBuilder); | |||||
| Status HostCpuOpsKernelBuilder::Finalize() { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HostCpuOpsKernelBuilder::Initialize(const map<std::string, std::string> &options) { | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HostCpuOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { | |||||
| OpDescPtr op_desc = ge_node.GetOpDesc(); | |||||
| if (op_desc == nullptr) { | |||||
| GELOGE(FAILED, "[Get][OpDesc]CalcOpRunningParam failed, as op desc is null"); | |||||
| REPORT_INNER_ERROR("E19999", "GetOpDesc failed."); | |||||
| return FAILED; | |||||
| } | |||||
| bool is_shape_unknown = false; | |||||
| if (NodeUtils::GetNodeUnknownShapeStatus(ge_node, is_shape_unknown) == GRAPH_SUCCESS) { | |||||
| if (is_shape_unknown) { | |||||
| GELOGI("op:%s is unknown shape, does not need to calc output size.", ge_node.GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| } | |||||
| const string name = ge_node.GetName(); | |||||
| const string type = ge_node.GetType(); | |||||
| GELOGD("Calc op[%s:%s] running param, output size=%zu.", name.c_str(), type.c_str(), op_desc->GetOutputsSize()); | |||||
| for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { | |||||
| GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast<uint32_t>(i)); | |||||
| Format format = output_tensor.GetFormat(); | |||||
| DataType data_type = output_tensor.GetDataType(); | |||||
| int64_t mem_size = 0; | |||||
| // If mem size has been set, no need reset. | |||||
| if ((TensorUtils::GetSize(output_tensor, mem_size) == GRAPH_SUCCESS) && (mem_size > 0)) { | |||||
| GELOGD("Op[%s:%s] out[%zu] mem size has been set, no need calc again, format=%s, data_type=%s, mem_size=%ld.", | |||||
| name.c_str(), type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(data_type).c_str(), mem_size); | |||||
| continue; | |||||
| } | |||||
| int64_t output_mem_size = 0; | |||||
| GeShape output_shape = output_tensor.GetShape(); | |||||
| if ((TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size) != GRAPH_SUCCESS) || | |||||
| (output_mem_size < 0)) { | |||||
| GELOGE(FAILED, | |||||
| "[Calc][TensorMemSize] fail for op[%s:%s] out[%zu] mem size, mem_size=%ld, format=%s, data_type=%s.", | |||||
| name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
| REPORT_CALL_ERROR("E19999", | |||||
| "CalcTensorMemSize failed for op[%s:%s] out[%zu] mem size, mem_size=%ld, format=%s, data_type=%s.", | |||||
| name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| GELOGI("Calc op[%s:%s] out[%zu] mem size is %ld, format=%s, data_type=%s.", | |||||
| name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
| TensorUtils::SetSize(output_tensor, output_mem_size); | |||||
| if (op_desc->UpdateOutputDesc(static_cast<uint32_t>(i), output_tensor) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, | |||||
| "[Update][OutputDesc] fail for op[%s:%s] out[%zu] desc , format=%s, data_type=%s.", | |||||
| name.c_str(), type.c_str(), i, | |||||
| TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
| REPORT_CALL_ERROR("E19999", "UpdateOutputDesc failed for op[%s:%s] out[%zu] desc , format=%s, data_type=%s.", | |||||
| name.c_str(), type.c_str(), i, | |||||
| TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| GELOGD("Calc op[%s:%s] running param success.", name.c_str(), type.c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HostCpuOpsKernelBuilder::GenerateTask(const Node &node, RunContext &context, vector<domi::TaskDef> &tasks) { | |||||
| // no need to generate device task | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| @@ -1,51 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | |||||
| #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | |||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include "common/opskernel/ops_kernel_builder.h" | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| class GE_FUNC_VISIBILITY HostCpuOpsKernelBuilder : public OpsKernelBuilder { | |||||
| public: | |||||
| Status Initialize(const map<std::string, std::string> &options) override; | |||||
| Status Finalize() override; | |||||
| Status CalcOpRunningParam(Node &node) override; | |||||
| Status GenerateTask(const Node &node, RunContext &context, std::vector<domi::TaskDef> &tasks) override; | |||||
| }; | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| #endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | |||||
| @@ -1,67 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h" | |||||
| #include <memory> | |||||
| #include "common/constant/constant.h" | |||||
| #include "ge/ge_api_types.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "graph/utils/node_utils.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "graph/utils/type_utils.h" | |||||
| #include "op/op_factory.h" | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| using domi::TaskDef; | |||||
| using std::map; | |||||
| using std::string; | |||||
| using std::vector; | |||||
| Status HostCpuOpsKernelInfoStore::Initialize(const map<string, string> &options) { | |||||
| GELOGI("HostCpuOpsKernelInfoStore init start."); | |||||
| OpInfo default_op_info = {.engine = kHostCpuEngineName, | |||||
| .opKernelLib = kHostCpuOpKernelLibName, | |||||
| .computeCost = 0, | |||||
| .flagPartial = false, | |||||
| .flagAsync = false, | |||||
| .isAtomic = false}; | |||||
| // Init op_info_map_ | |||||
| auto all_ops = OpFactory::Instance().GetAllOps(); | |||||
| for (auto &op : all_ops) { | |||||
| op_info_map_[op] = default_op_info; | |||||
| } | |||||
| GELOGI("HostCpuOpsKernelInfoStore inited success. op num=%zu", op_info_map_.size()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HostCpuOpsKernelInfoStore::Finalize() { | |||||
| op_info_map_.clear(); | |||||
| return SUCCESS; | |||||
| } | |||||
| void HostCpuOpsKernelInfoStore::GetAllOpsKernelInfo(map<string, OpInfo> &infos) const { infos = op_info_map_; } | |||||
| bool HostCpuOpsKernelInfoStore::CheckSupported(const OpDescPtr &op_desc, std::string &) const { | |||||
| if (op_desc == nullptr) { | |||||
| return false; | |||||
| } | |||||
| return op_info_map_.count(op_desc->GetType()) > 0; | |||||
| } | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| @@ -1,86 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | |||||
| #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | |||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "common/opskernel/ops_kernel_info_store.h" | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| class GE_FUNC_VISIBILITY HostCpuOpsKernelInfoStore : public OpsKernelInfoStore { | |||||
| public: | |||||
| HostCpuOpsKernelInfoStore() {} | |||||
| ~HostCpuOpsKernelInfoStore() override = default; | |||||
| /** | |||||
| * Initialize related resources of the host cpu kernelinfo store | |||||
| * @return status whether this operation success | |||||
| */ | |||||
| Status Initialize(const std::map<std::string, std::string> &options) override; | |||||
| /** | |||||
| * Release related resources of the host cpu kernel info store | |||||
| * @return status whether this operation success | |||||
| */ | |||||
| Status Finalize() override; | |||||
| /** | |||||
| * Check to see if an operator is fully supported or partially supported. | |||||
| * @param op_desc OpDesc information | |||||
| * @param reason unsupported reason | |||||
| * @return bool value indicate whether the operator is fully supported | |||||
| */ | |||||
| bool CheckSupported(const OpDescPtr &op_desc, std::string &reason) const override; | |||||
| /** | |||||
| * Returns the full operator information. | |||||
| * @param infos reference of a map, | |||||
| * contain operator's name and detailed information | |||||
| */ | |||||
| void GetAllOpsKernelInfo(std::map<std::string, ge::OpInfo> &infos) const override; | |||||
| HostCpuOpsKernelInfoStore(const HostCpuOpsKernelInfoStore &ops_kernel_store) = delete; | |||||
| HostCpuOpsKernelInfoStore(const HostCpuOpsKernelInfoStore &&ops_kernel_store) = delete; | |||||
| HostCpuOpsKernelInfoStore &operator=(const HostCpuOpsKernelInfoStore &ops_kernel_store) = delete; | |||||
| HostCpuOpsKernelInfoStore &operator=(HostCpuOpsKernelInfoStore &&ops_kernel_store) = delete; | |||||
| private: | |||||
| // store op name and OpInfo key-value pair | |||||
| std::map<std::string, ge::OpInfo> op_info_map_; | |||||
| }; | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| #endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | |||||
| @@ -1,40 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "host_cpu_engine/ops_kernel_store/op/host_op.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "host_cpu_engine/ops_kernel_store/op/op_factory.h" | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| Status HostOp::Run() { | |||||
| // no need to generate device task | |||||
| return SUCCESS; | |||||
| } | |||||
| REGISTER_OP_CREATOR(NoOp, HostOp); | |||||
| REGISTER_OP_CREATOR(Variable, HostOp); | |||||
| REGISTER_OP_CREATOR(Constant, HostOp); | |||||
| REGISTER_OP_CREATOR(Assign, HostOp); | |||||
| REGISTER_OP_CREATOR(RandomUniform, HostOp); | |||||
| REGISTER_OP_CREATOR(Add, HostOp); | |||||
| REGISTER_OP_CREATOR(Mul, HostOp); | |||||
| REGISTER_OP_CREATOR(ConcatV2, HostOp); | |||||
| REGISTER_OP_CREATOR(Data, HostOp); | |||||
| REGISTER_OP_CREATOR(Fill, HostOp); | |||||
| REGISTER_OP_CREATOR(NetOutput, HostOp); | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| @@ -1,36 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_ | |||||
| #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_ | |||||
| #include "host_cpu_engine/ops_kernel_store/op/op.h" | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| class GE_FUNC_VISIBILITY HostOp : public Op { | |||||
| public: | |||||
| HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | |||||
| ~HostOp() override = default; | |||||
| HostOp &operator=(const HostOp &op) = delete; | |||||
| HostOp(const HostOp &op) = delete; | |||||
| Status Run() override; | |||||
| }; | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| #endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_HOST_OP_H_ | |||||
| @@ -1,45 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_H_ | |||||
| #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_H_ | |||||
| #include <climits> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "common/ge_inner_error_codes.h" | |||||
| #include "common/opskernel/ops_kernel_info_types.h" | |||||
| #include "graph/node.h" | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| /** | |||||
| * The base class for all op. | |||||
| */ | |||||
| class GE_FUNC_VISIBILITY Op { | |||||
| public: | |||||
| Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {} | |||||
| virtual ~Op() = default; | |||||
| virtual Status Run() = 0; | |||||
| protected: | |||||
| const RunContext &run_context_; | |||||
| const Node &node_; | |||||
| }; | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| #endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_H_ | |||||
| @@ -1,55 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "host_cpu_engine/ops_kernel_store/op/op_factory.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "common/ge_inner_error_codes.h" | |||||
| #include "graph/op_desc.h" | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| OpFactory &OpFactory::Instance() { | |||||
| static OpFactory instance; | |||||
| return instance; | |||||
| } | |||||
| std::shared_ptr<Op> OpFactory::CreateOp(const Node &node, RunContext &run_context) { | |||||
| auto iter = op_creator_map_.find(node.GetType()); | |||||
| if (iter != op_creator_map_.end()) { | |||||
| return iter->second(node, run_context); | |||||
| } | |||||
| GELOGE(FAILED, "Not supported OP, type = %s, name = %s", node.GetType().c_str(), node.GetName().c_str()); | |||||
| return nullptr; | |||||
| } | |||||
| void OpFactory::RegisterCreator(const std::string &type, const OP_CREATOR_FUNC &func) { | |||||
| if (func == nullptr) { | |||||
| GELOGW("Func is NULL."); | |||||
| return; | |||||
| } | |||||
| auto iter = op_creator_map_.find(type); | |||||
| if (iter != op_creator_map_.end()) { | |||||
| GELOGW("%s creator already exist", type.c_str()); | |||||
| return; | |||||
| } | |||||
| op_creator_map_[type] = func; | |||||
| all_ops_.emplace_back(type); | |||||
| } | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| @@ -1,94 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_FACTORY_H_ | |||||
| #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_FACTORY_H_ | |||||
| #include <functional> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "common/ge/ge_util.h" | |||||
| #include "host_cpu_engine/ops_kernel_store/op/op.h" | |||||
| namespace ge { | |||||
| namespace host_cpu { | |||||
| using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunContext &)>; | |||||
| /** | |||||
| * manage all the op, support create op. | |||||
| */ | |||||
| class GE_FUNC_VISIBILITY OpFactory { | |||||
| public: | |||||
| static OpFactory &Instance(); | |||||
| /** | |||||
| * @brief create Op. | |||||
| * @param [in] node share ptr of node | |||||
| * @param [in] run_context run context | |||||
| * @return not nullptr success | |||||
| * @return nullptr fail | |||||
| */ | |||||
| std::shared_ptr<Op> CreateOp(const Node &node, RunContext &run_context); | |||||
| /** | |||||
| * @brief Register Op create function. | |||||
| * @param [in] type Op type | |||||
| * @param [in] func Op create func | |||||
| */ | |||||
| void RegisterCreator(const std::string &type, const OP_CREATOR_FUNC &func); | |||||
| const std::vector<std::string> &GetAllOps() const { return all_ops_; } | |||||
| bool CheckSupported(const std::string &type) { return op_creator_map_.find(type) != op_creator_map_.end(); } | |||||
| OpFactory(const OpFactory &) = delete; | |||||
| OpFactory &operator=(const OpFactory &) = delete; | |||||
| OpFactory(OpFactory &&) = delete; | |||||
| OpFactory &operator=(OpFactory &&) = delete; | |||||
| private: | |||||
| OpFactory() = default; | |||||
| ~OpFactory() = default; | |||||
| // the op creator function map | |||||
| std::map<std::string, OP_CREATOR_FUNC> op_creator_map_; | |||||
| std::vector<std::string> all_ops_; | |||||
| }; | |||||
| class GE_FUNC_VISIBILITY OpRegistrar { | |||||
| public: | |||||
| OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { | |||||
| OpFactory::Instance().RegisterCreator(type, func); | |||||
| } | |||||
| ~OpRegistrar() = default; | |||||
| OpRegistrar(const OpRegistrar &) = delete; | |||||
| OpRegistrar &operator=(const OpRegistrar &) = delete; | |||||
| OpRegistrar(OpRegistrar &&) = delete; | |||||
| OpRegistrar &operator=(OpRegistrar &&) = delete; | |||||
| }; | |||||
| #define REGISTER_OP_CREATOR(type, clazz) \ | |||||
| std::shared_ptr<Op> Creator_##type##Op(const Node &node, RunContext &run_context) { \ | |||||
| return MakeShared<clazz>(node, run_context); \ | |||||
| } \ | |||||
| OpRegistrar g_##type##Op_creator(#type, Creator_##type##Op) | |||||
| } // namespace host_cpu | |||||
| } // namespace ge | |||||
| #endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_OP_OP_FACTORY_H_ | |||||
| @@ -1,179 +0,0 @@ | |||||
| /* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. | |||||
| * | |||||
| * This program is free software; you can redistribute it and/or modify | |||||
| * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. | |||||
| * | |||||
| * This program is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
| * Apache License for more details at | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| */ | |||||
| syntax = "proto3"; | |||||
| package domi; | |||||
| message ModelTaskDef { | |||||
| string version = 1; | |||||
| map<string, string> attr = 9; // Extended field | |||||
| repeated TaskDef task = 10; | |||||
| uint64 memory_size = 11; | |||||
| uint32 stream_num = 12; | |||||
| uint32 event_num = 13; | |||||
| uint64 weight_size = 14; | |||||
| repeated bytes op = 15; // input/output opdef in bytes | |||||
| uint64 base_addr = 16; // base addr | |||||
| uint64 weight_addr = 17; // weight addr | |||||
| uint32 batch_num = 18; | |||||
| } | |||||
| message TaskDef { | |||||
| uint32 id = 1; | |||||
| uint32 type = 2; | |||||
| uint32 stream_id = 10; | |||||
| uint32 event_id = 11; | |||||
| KernelDef kernel = 20; | |||||
| KernelExDef kernel_ex = 21; | |||||
| KernelHcclDef kernel_hccl = 25; | |||||
| EventExDef event_ex = 26; | |||||
| LogTimeStampDef log_timestamp = 28; | |||||
| uint32 label_id = 30; | |||||
| MemcpyAsyncDef memcpy_async = 31; | |||||
| StreamSwitchDef stream_switch = 32; | |||||
| StreamActiveDef stream_active = 33; | |||||
| bytes private_def = 34; | |||||
| uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future | |||||
| StreamSwitchNDef stream_switch_n = 36; | |||||
| LabelSetDef label_set = 37; | |||||
| LabelGotoExDef label_goto_ex = 38; | |||||
| LabelSwitchByIndexDef label_switch_by_index = 39; | |||||
| KernelDefWithHandle kernel_with_handle = 40; | |||||
| } | |||||
| message KernelDef { | |||||
| KernelContext context = 1; | |||||
| string stub_func = 10; | |||||
| uint32 block_dim = 11; | |||||
| uint32 args_size = 12; | |||||
| bytes args = 13; | |||||
| bytes sm_desc = 14; | |||||
| bytes flowtable = 15; | |||||
| string so_name = 16; | |||||
| string kernel_name = 17; | |||||
| bytes kernel_ext_info = 18; | |||||
| uint32 kernel_ext_info_size = 19; | |||||
| } | |||||
| message KernelDefWithHandle { | |||||
| KernelContext context = 1; | |||||
| uint64 handle = 10; | |||||
| string dev_func = 11; | |||||
| uint32 block_dim = 12; | |||||
| uint32 args_size = 13; | |||||
| bytes args = 14; | |||||
| bytes sm_desc = 15; | |||||
| string original_kernel_key = 16; | |||||
| string node_info = 17; | |||||
| } | |||||
| message KernelContext { | |||||
| uint32 kernel_type = 1; | |||||
| uint32 op_id = 2; // OP type in CCE | |||||
| uint32 kernel_func_id = 3; | |||||
| uint32 op_index = 4; // TE/Custom operator | |||||
| bool is_flowtable = 5; // Identify whether args is a flowtable structure | |||||
| bytes args_offset = 6; // args offset information | |||||
| uint32 args_count = 7; // args count | |||||
| repeated uint32 origin_op_index = 8; | |||||
| } | |||||
| message KernelExDef { | |||||
| uint32 flags = 1; | |||||
| uint32 op_index = 4; | |||||
| uint32 args_size = 12; | |||||
| bytes args = 13; | |||||
| bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput | |||||
| uint32 task_info_size = 15; | |||||
| bytes kernel_ext_info = 16; | |||||
| uint32 kernel_ext_info_size = 17; | |||||
| } | |||||
| message KernelHcclDef { | |||||
| uint32 op_index = 8; | |||||
| string hccl_type = 9; | |||||
| } | |||||
| message EventExDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 event_type = 2; | |||||
| } | |||||
| message LogTimeStampDef { | |||||
| uint64 logid = 1; | |||||
| bool notify = 2; | |||||
| uint32 flat = 3; | |||||
| } | |||||
| message MemcpyAsyncDef { | |||||
| uint64 dst = 1; | |||||
| uint64 dst_max = 2; | |||||
| uint64 src = 3; | |||||
| uint64 count = 4; | |||||
| uint32 kind = 5; | |||||
| uint32 op_index = 6; | |||||
| } | |||||
| message StreamSwitchDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 true_stream_id = 2; | |||||
| int64 value = 3; | |||||
| uint64 value_ptr = 4; | |||||
| uint32 data_type = 5; | |||||
| } | |||||
| message StreamActiveDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 active_stream_id = 2; | |||||
| } | |||||
| message StreamSwitchNDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 size = 2; | |||||
| repeated int64 target_value = 3; | |||||
| repeated uint32 true_stream_id = 4; | |||||
| uint32 element_size = 5; | |||||
| uint32 data_type = 6; | |||||
| } | |||||
| message LabelSetDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 label_id = 2; | |||||
| uint32 model_id = 3; | |||||
| } | |||||
| message LabelGotoExDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 label_id = 2; | |||||
| uint32 model_id = 3; | |||||
| } | |||||
| message LabelSwitchByIndexDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 label_max = 2; | |||||
| } | |||||
| @@ -1282,7 +1282,8 @@ Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const | |||||
| } | } | ||||
| Status HybridModelBuilder::IndexTaskDefs() { | Status HybridModelBuilder::IndexTaskDefs() { | ||||
| const auto &root_graph = ge_root_model_->GetRootGraph(); | |||||
| const auto root_graph = ge_root_model_->GetRootGraph(); | |||||
| const auto &root_graph_name = root_graph->GetName(); | |||||
| if (SetOutputNameAttr(*root_graph) != SUCCESS) { | if (SetOutputNameAttr(*root_graph) != SUCCESS) { | ||||
| GELOGW("Set output name attr failed."); | GELOGW("Set output name attr failed."); | ||||
| } | } | ||||
| @@ -1292,62 +1293,22 @@ Status HybridModelBuilder::IndexTaskDefs() { | |||||
| auto &ge_model = it.second; | auto &ge_model = it.second; | ||||
| GE_CHECK_NOTNULL(ge_model); | GE_CHECK_NOTNULL(ge_model); | ||||
| const auto &sub_graph = root_graph->GetSubgraph(name); | |||||
| if (sub_graph == nullptr) { | |||||
| continue; | |||||
| } | |||||
| bool is_unknown_shape = sub_graph->GetGraphUnknownFlag(); | |||||
| if (!is_unknown_shape) { | |||||
| GE_CHK_STATUS_RET_NOLOG(LoadGeModel(*sub_graph, ge_model)); | |||||
| continue; | |||||
| } | |||||
| // index task defs | |||||
| GELOGD("To index tasks for subgraph: %s", name.c_str()); | |||||
| std::unordered_map<int64_t, NodePtr> node_map; | |||||
| for (const auto &node : sub_graph->GetDirectNode()) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| auto node_id = node->GetOpDesc()->GetId(); | |||||
| GELOGD("op_index = %ld, node_name = %s", node_id, node->GetName().c_str()); | |||||
| node_map.emplace(node_id, node); | |||||
| } | |||||
| auto tasks = ge_model->GetModelTaskDefPtr()->task(); | |||||
| for (int i = 0; i < tasks.size(); ++i) { | |||||
| const domi::TaskDef &task_def = tasks[i]; | |||||
| GELOGI("Task id = %d, task type = %d", i, task_def.type()); | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
| uint32_t op_index = -1; | |||||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
| op_index = task_def.kernel().context().op_index(); | |||||
| } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||||
| op_index = task_def.kernel_ex().op_index(); | |||||
| } else if (task_type == RT_MODEL_TASK_HCCL) { | |||||
| op_index = task_def.kernel_hccl().op_index(); | |||||
| } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| op_index = task_def.kernel_with_handle().context().op_index(); | |||||
| } else { | |||||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||||
| auto sub_graph = root_graph->GetSubgraph(name); | |||||
| if (name != root_graph_name) { | |||||
| if (sub_graph == nullptr) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto iter = node_map.find(op_index); | |||||
| if (iter == node_map.end()) { | |||||
| GELOGE(INTERNAL_ERROR, "[Find][Node]Failed to get node by index = %u.", op_index); | |||||
| REPORT_INNER_ERROR("E19999", "Failed to get node by index = %u.", op_index); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| auto &node = iter->second; | |||||
| if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); | |||||
| bool is_unknown_shape = sub_graph->GetGraphUnknownFlag(); | |||||
| if (!is_unknown_shape) { | |||||
| GE_CHK_STATUS_RET_NOLOG(LoadGeModel(*sub_graph, ge_model)); | |||||
| continue; | |||||
| } | } | ||||
| GELOGD("Task loaded for node: %s, task type = %d, op_index = %u", node->GetName().c_str(), task_type, op_index); | |||||
| hybrid_model_.task_defs_[node].emplace_back(task_def); | |||||
| } else { | |||||
| sub_graph = root_graph; | |||||
| } | } | ||||
| GE_CHK_STATUS_RET_NOLOG(IndexTaskDefs(sub_graph, ge_model)); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -166,7 +166,7 @@ class AicpuNodeTask : public AicpuNodeTaskBase { | |||||
| Status UpdateIoAddr(TaskContext &context) override; | Status UpdateIoAddr(TaskContext &context) override; | ||||
| private: | |||||
| protected: | |||||
| // host mem | // host mem | ||||
| std::unique_ptr<uint8_t[]> args_; | std::unique_ptr<uint8_t[]> args_; | ||||
| @@ -15,60 +15,38 @@ | |||||
| */ | */ | ||||
| #include "hybrid/node_executor/host_cpu/host_cpu_node_executor.h" | #include "hybrid/node_executor/host_cpu/host_cpu_node_executor.h" | ||||
| #include "hybrid/node_executor/host_cpu/kernel_factory.h" | |||||
| #include "graph/passes/folding_pass.h" | #include "graph/passes/folding_pass.h" | ||||
| #include "hybrid/model/hybrid_model.h" | #include "hybrid/model/hybrid_model.h" | ||||
| #include "graph/manager/graph_mem_manager.h" | #include "graph/manager/graph_mem_manager.h" | ||||
| #include "ge_local_engine/engine/host_cpu_engine.h" | #include "ge_local_engine/engine/host_cpu_engine.h" | ||||
| #include "aicpu/common/aicpu_task_struct.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::HOST_CPU, HostCpuNodeExecutor); | REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::HOST_CPU, HostCpuNodeExecutor); | ||||
| Status HostNodeTaskBase::UpdateArgs(TaskContext &) { | |||||
| // no need update args | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HostNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) { | |||||
| GELOGD("[%s] Start execute.", context.GetNodeName()); | |||||
| GE_CHK_STATUS_RET(Execute(context), "[Invoke][Execute] failed for node:%s type:%s.", | |||||
| node_->GetName().c_str(), node_->GetType().c_str()) | |||||
| if (done_callback) { | |||||
| GELOGD("[%s] Start invoke callback.", context.GetNodeName()); | |||||
| done_callback(); | |||||
| Status HostAicpuNodeTask::UpdateArgs(TaskContext &context) { | |||||
| if (context.NumInputs() == 0 && context.NumOutputs() == 0) { | |||||
| GELOGD("Node[%s] has no input and output, no need to update args.", node_name_.c_str()); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| GELOGD("[%s] Done execute successfully.", context.GetNodeName()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status CpuKernelNodeTask::Execute(TaskContext &context) { | |||||
| const auto &op_desc = node_->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| std::vector<ConstGeTensorPtr> inputs; | |||||
| vector<uint64_t> io_addrs; | |||||
| io_addrs.reserve(context.NumInputs() + context.NumOutputs()); | |||||
| for (int32_t i = 0; i < context.NumInputs(); ++i) { | for (int32_t i = 0; i < context.NumInputs(); ++i) { | ||||
| auto input_desc_ptr = context.GetInputDesc(i); | |||||
| GE_CHECK_NOTNULL(input_desc_ptr); | |||||
| const auto &input_desc = *input_desc_ptr; | |||||
| auto tensor = context.GetInput(i); | auto tensor = context.GetInput(i); | ||||
| GE_CHECK_NOTNULL(tensor); | GE_CHECK_NOTNULL(tensor); | ||||
| auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | ||||
| GE_CHECK_NOTNULL(item.second); | GE_CHECK_NOTNULL(item.second); | ||||
| auto in_tensor = MakeShared<GeTensor>(input_desc, item.second, item.first); | |||||
| GE_CHECK_NOTNULL(in_tensor); | |||||
| in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); | |||||
| in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape()); | |||||
| inputs.emplace_back(in_tensor); | |||||
| GELOGD("node:%s allocate input %d, size=%zu", op_desc->GetName().c_str(), i, in_tensor->GetData().size()); | |||||
| io_addrs.emplace_back(reinterpret_cast<uintptr_t>(item.second->MutableGet())); | |||||
| } | } | ||||
| std::vector<GeTensorPtr> outputs; | |||||
| for (int32_t i = 0; i < context.NumOutputs(); ++i) { | for (int32_t i = 0; i < context.NumOutputs(); ++i) { | ||||
| const auto &output_desc = op_desc->GetOutputDesc(i); | |||||
| const auto &output_desc = context.GetOutputDesc(i); | |||||
| GE_CHECK_NOTNULL(output_desc); | |||||
| AllocationAttr attr; | AllocationAttr attr; | ||||
| attr.SetMemType(HOST_DDR); | attr.SetMemType(HOST_DDR); | ||||
| if (context.AllocateOutput(i, output_desc, nullptr, &attr) != SUCCESS) { | |||||
| if (context.AllocateOutput(i, *output_desc, nullptr, &attr) != SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "node:%s Failed to allocate output %d", context.GetNodeName(), i); | REPORT_CALL_ERROR("E19999", "node:%s Failed to allocate output %d", context.GetNodeName(), i); | ||||
| GELOGE(FAILED, "[Invoke][AllocateOutput]node:%s Failed to allocate output %d", context.GetNodeName(), i); | GELOGE(FAILED, "[Invoke][AllocateOutput]node:%s Failed to allocate output %d", context.GetNodeName(), i); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -77,37 +55,61 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { | |||||
| GE_CHECK_NOTNULL(tensor); | GE_CHECK_NOTNULL(tensor); | ||||
| auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | ||||
| GE_CHECK_NOTNULL(item.second); | GE_CHECK_NOTNULL(item.second); | ||||
| auto out_tensor = MakeShared<GeTensor>(output_desc, item.second, item.first); | |||||
| GE_CHECK_NOTNULL(out_tensor); | |||||
| out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); | |||||
| out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); | |||||
| outputs.emplace_back(out_tensor); | |||||
| GELOGD("node:%s allocate output %d, size=%zu", op_desc->GetName().c_str(), i, out_tensor->GetData().size()); | |||||
| io_addrs.emplace_back(reinterpret_cast<uintptr_t>(item.second->MutableGet())); | |||||
| } | } | ||||
| auto io_addr = args_.get() + sizeof(aicpu::AicpuParamHead); | |||||
| return HostCpuEngine::GetInstance().Run(node_, inputs, outputs); | |||||
| // if has input and output, need copy to ioaddr | |||||
| int cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead), | |||||
| &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); | |||||
| if (cpy_ret != EOK) { | |||||
| REPORT_INNER_ERROR("E19999", "Node[%s] memcpy io addr to AicpuParamHead failed," | |||||
| "ret=%d, args_size=%u, io nums=%zu.", | |||||
| node_name_.c_str(), cpy_ret, args_size_, io_addrs.size()); | |||||
| GELOGE(INTERNAL_ERROR, "[Update][io_addr]Node[%s] memcpy io addr to AicpuParamHead failed," | |||||
| "ret=%d, args_size=%u, io nums=%zu.", | |||||
| node_name_.c_str(), cpy_ret, args_size_, io_addrs.size()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | } | ||||
| Status HostCpuNodeTask::Execute(TaskContext &context) { | |||||
| RunContext run_context; | |||||
| auto host_kernel = hybrid::host_cpu::KernelFactory::Instance().CreateKernel(node_); | |||||
| if (host_kernel == nullptr) { | |||||
| REPORT_CALL_ERROR("E19999", "CreateKernel failed for node %s type %s is not supported by host kernel.", | |||||
| node_->GetName().c_str(), node_->GetType().c_str()); | |||||
| GELOGE(UNSUPPORTED, "[Create][Kernel]node %s type %s is not supported by host kernel.", | |||||
| node_->GetName().c_str(), node_->GetType().c_str()); | |||||
| return UNSUPPORTED; | |||||
| Status HostAicpuNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) { | |||||
| GELOGD("[%s] Start execute.", context.GetNodeName()); | |||||
| GE_CHK_STATUS_RET(Execute(context), "[Invoke][Execute] failed for node:%s.", node_name_.c_str()); | |||||
| if (done_callback) { | |||||
| GELOGD("[%s] Start invoke callback.", context.GetNodeName()); | |||||
| done_callback(); | |||||
| } | } | ||||
| GELOGD("[%s] Done execute successfully.", context.GetNodeName()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status compute_ret = host_kernel->Compute(context); | |||||
| if (compute_ret != SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "node %s type %s compute failed.", | |||||
| node_->GetName().c_str(), node_->GetType().c_str()); | |||||
| GELOGE(compute_ret, "[Invoke][Compute]node %s type %s compute failed or not imply.", | |||||
| node_->GetName().c_str(), node_->GetType().c_str()); | |||||
| return compute_ret; | |||||
| Status HostAicpuNodeTask::Execute(TaskContext &context) { | |||||
| GELOGD("Node[%s] launch task start.", node_name_.c_str()); | |||||
| if (run_cpu_kernel_) { | |||||
| GE_CHK_STATUS_RET(run_cpu_kernel_(args_.get()), "[Run][CpuKernel] failed for node:%s.", node_name_.c_str()); | |||||
| } else { | |||||
| REPORT_CALL_ERROR("E19999", "Run cpu kernel failed node:%s, cpu kernel is not initialized.", node_name_.c_str()); | |||||
| GELOGE(INTERNAL_ERROR, | |||||
| "[Run][Kernel]Run cpu kernel failed node:%s, cpu kernel is not initialized.", node_name_.c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | } | ||||
| GELOGD("Node[%s] launch task successfully.", node_name_.c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HostAicpuNodeTask::SetHostExtInfo() { | |||||
| if (aicpu_ext_handle_.GetExtInfoLen() == 0) { | |||||
| GELOGD("Node[%s] don't have ext info, no need update.", node_name_.c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_.get()); | |||||
| GE_CHECK_NOTNULL(aicpu_param_head); | |||||
| aicpu_param_head->extInfoLength = aicpu_ext_handle_.GetExtInfoLen(); | |||||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_handle_.GetExtInfo()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -115,32 +117,71 @@ Status HostCpuNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) co | |||||
| return task.UpdateArgs(context); | return task.UpdateArgs(context); | ||||
| } | } | ||||
| Status HostCpuNodeExecutor::ValidateTaskDef(const domi::TaskDef &task_def) { | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
| if (task_type != RT_MODEL_TASK_KERNEL) { | |||||
| REPORT_CALL_ERROR("E19999", "[Check][TaskType]Invalid task type (%d) in host cpu excutor.", | |||||
| static_cast<int>(task_type)); | |||||
| GELOGE(INTERNAL_ERROR, | |||||
| "[Check][TaskType]Invalid task type (%d) in host cpu excutor.", static_cast<int>(task_type)); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| auto kernel_type = static_cast<ccKernelType>(task_def.kernel().context().kernel_type()); | |||||
| if (kernel_type != ccKernelType::HOST_CPU) { | |||||
| REPORT_INNER_ERROR("E19999", "Invalid kernel type(%d) in host cpu excutor.", | |||||
| static_cast<int>(kernel_type)); | |||||
| GELOGE(INTERNAL_ERROR, | |||||
| "[Check][TaskType]Invalid kernel type(%d) in host cpu excutor.", static_cast<int>(kernel_type)); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HostCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, | Status HostCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, | ||||
| std::shared_ptr<NodeTask> &task) const { | std::shared_ptr<NodeTask> &task) const { | ||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| auto mem_type = static_cast<uint32_t>(HOST_DDR); | |||||
| for (size_t i = 0; i < op_desc->GetOutputsSize(); i++) { | |||||
| (void)AttrUtils::SetInt(op_desc->MutableOutputDesc(i), ATTR_OUTPUT_MEMORY_TYPE, mem_type); | |||||
| auto node_item = model.GetNodeItem(node); | |||||
| GE_CHECK_NOTNULL(node_item); | |||||
| auto task_defs = model.GetTaskDefs(node); | |||||
| GE_CHECK_NOTNULL(task_defs); | |||||
| if ((*task_defs).size() != 1) { | |||||
| REPORT_CALL_ERROR("E19999", "[Check][Size]Node[%s] task_def num[%zu] != 1", | |||||
| node->GetName().c_str(), (*task_defs).size()); | |||||
| GELOGE(PARAM_INVALID, "[Check][Size]Node[%s] task_def num[%zu] != 1", | |||||
| node->GetName().c_str(), (*task_defs).size()); | |||||
| return PARAM_INVALID; | |||||
| } | } | ||||
| const std::string &name = node->GetName(); | |||||
| const std::string &type = node->GetType(); | |||||
| if (HostCpuEngine::GetInstance().CheckSupported(type)) { | |||||
| GELOGI("create CpuKernelNodeTask for node %s, type %s.", name.c_str(), type.c_str()); | |||||
| task = MakeShared<CpuKernelNodeTask>(node); | |||||
| GE_CHECK_NOTNULL(task); | |||||
| } else if (hybrid::host_cpu::KernelFactory::Instance().CreateKernel(node) != nullptr) { | |||||
| GELOGI("create HostCpuNodeTask for node %s, type %s.", name.c_str(), type.c_str()); | |||||
| task = MakeShared<HostCpuNodeTask>(node); | |||||
| GE_CHECK_NOTNULL(task); | |||||
| const auto &task_def = (*task_defs)[0]; | |||||
| GE_CHK_STATUS_RET(ValidateTaskDef(task_def), | |||||
| "[Validate][TaskDef] failed for Node[%s].", node->GetName().c_str()); | |||||
| auto host_aicpu_task = MakeShared<HostAicpuNodeTask>(node_item, task_def); | |||||
| GE_CHK_BOOL_RET_STATUS(host_aicpu_task != nullptr, MEMALLOC_FAILED, | |||||
| "[Check][State]Load task for node %s failed.", node->GetName().c_str()); | |||||
| GE_CHK_STATUS_RET(host_aicpu_task->Init(model), | |||||
| "[Init][AicpuNodeTaskBase] failed for Node[%s].", node->GetName().c_str()); | |||||
| GE_CHK_STATUS_RET(host_aicpu_task->SetHostExtInfo(), | |||||
| "[Set][HostExtInfo] failed for Node[%s].", node->GetName().c_str()); | |||||
| auto handle = HostCpuEngine::GetInstance().GetConstantFoldingHandle(); | |||||
| if (handle == nullptr) { | |||||
| REPORT_CALL_ERROR("E19999", "Get constant folding handle failed."); | |||||
| GELOGE(INTERNAL_ERROR, "[Get][Handle]Get constant folding handle failed."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| auto run_cpu_kernel = (uint32_t (*)(void *))mmDlsym(handle, "RunHostCpuKernel"); | |||||
| if (run_cpu_kernel != nullptr) { | |||||
| host_aicpu_task->SetRunKernel(run_cpu_kernel); | |||||
| } else { | } else { | ||||
| REPORT_INNER_ERROR("E19999", "Create NodeTask failed for node %s type %s.", | |||||
| name.c_str(), type.c_str()); | |||||
| GELOGE(UNSUPPORTED, "[Create][NodeTask]node %s type %s is not support in HostCpuNodeExecutor now.", | |||||
| name.c_str(), type.c_str()); | |||||
| return UNSUPPORTED; | |||||
| REPORT_CALL_ERROR("E19999", "Get run cpu kernel failed."); | |||||
| GELOGE(INTERNAL_ERROR, "[Get][Kernel]Get run cpu kernel failed."); | |||||
| return INTERNAL_ERROR; | |||||
| } | } | ||||
| task = std::move(host_aicpu_task); | |||||
| GELOGD("Node[%s] load task end.", node->GetName().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| @@ -19,39 +19,29 @@ | |||||
| #include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
| #include "inc/kernel.h" | #include "inc/kernel.h" | ||||
| #include "hybrid/node_executor/aicpu/aicpu_node_executor.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| class HostNodeTaskBase : public NodeTask { | |||||
| class HostAicpuNodeTask : public AicpuNodeTask { | |||||
| public: | public: | ||||
| explicit HostNodeTaskBase(const NodePtr &node) : node_(node) {} | |||||
| ~HostNodeTaskBase() override = default; | |||||
| Status UpdateArgs(TaskContext &context) override; | |||||
| HostAicpuNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def) | |||||
| : AicpuNodeTask(node_item, task_def) {} | |||||
| ~HostAicpuNodeTask() override = default; | |||||
| Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | ||||
| protected: | |||||
| NodePtr node_; | |||||
| Status UpdateArgs(TaskContext &context) override; | |||||
| private: | |||||
| virtual Status Execute(TaskContext &context) = 0; | |||||
| }; | |||||
| void SetRunKernel(std::function<uint32_t(void *)> run_cpu_kernel) { run_cpu_kernel_ = run_cpu_kernel; } | |||||
| class CpuKernelNodeTask : public HostNodeTaskBase { | |||||
| public: | |||||
| explicit CpuKernelNodeTask(const NodePtr &node) : HostNodeTaskBase(node) {} | |||||
| ~CpuKernelNodeTask() override = default; | |||||
| Status SetHostExtInfo(); | |||||
| private: | private: | ||||
| Status Execute(TaskContext &context) override; | |||||
| }; | |||||
| class HostCpuNodeTask : public HostNodeTaskBase { | |||||
| public: | |||||
| explicit HostCpuNodeTask(const NodePtr &node) : HostNodeTaskBase(node) {} | |||||
| ~HostCpuNodeTask() override = default; | |||||
| Status Execute(TaskContext &context); | |||||
| private: | |||||
| Status Execute(TaskContext &context) override; | |||||
| std::function<uint32_t(void *)> run_cpu_kernel_ = nullptr; | |||||
| }; | }; | ||||
| class HostCpuNodeExecutor : public NodeExecutor { | class HostCpuNodeExecutor : public NodeExecutor { | ||||
| @@ -61,6 +51,9 @@ class HostCpuNodeExecutor : public NodeExecutor { | |||||
| Status LoadTask(const HybridModel &model, | Status LoadTask(const HybridModel &model, | ||||
| const NodePtr &node, | const NodePtr &node, | ||||
| std::shared_ptr<NodeTask> &task) const override; | std::shared_ptr<NodeTask> &task) const override; | ||||
| private: | |||||
| static Status ValidateTaskDef(const domi::TaskDef &task_def); | |||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -1,60 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "hybrid/node_executor/host_cpu/kernel/assign_kernel.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "hybrid/node_executor/host_cpu/kernel_factory.h" | |||||
| namespace { | |||||
| const size_t kAssignRefInputIndex = 0; | |||||
| const size_t kAssignValueInputIndex = 1; | |||||
| const size_t kAssignRefOutputIndex = 0; | |||||
| } | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| Status AssignKernel::Compute(TaskContext& context) { | |||||
| auto ref_tensor = context.MutableInput(kAssignRefInputIndex); | |||||
| GE_CHECK_NOTNULL(ref_tensor); | |||||
| const auto value_tensor = context.GetInput(kAssignValueInputIndex); | |||||
| GE_CHECK_NOTNULL(value_tensor); | |||||
| if (value_tensor->GetSize() > ref_tensor->GetSize()) { | |||||
| REPORT_INNER_ERROR("E19999", "[%s] value_input_size=%zu bigger than ref_input_size=%zu. check invalid", | |||||
| node_->GetName().c_str(), value_tensor->GetSize(), ref_tensor->GetSize()); | |||||
| GELOGE(INTERNAL_ERROR, "[Check][Size][%s] value_input_size=%zu, but ref_input_size=%zu.", | |||||
| node_->GetName().c_str(), value_tensor->GetSize(), ref_tensor->GetSize()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| GELOGI("[%s] value_input_data=%p, ref_input_size=%zu, value_input_size=%zu.", | |||||
| node_->GetName().c_str(), ref_tensor->GetData(), ref_tensor->GetSize(), value_tensor->GetSize()); | |||||
| if (value_tensor->GetSize() > 0) { | |||||
| GE_CHK_RT_RET(rtMemcpy(ref_tensor->MutableData(), ref_tensor->GetSize(), value_tensor->GetData(), | |||||
| value_tensor->GetSize(), RT_MEMCPY_HOST_TO_HOST)); | |||||
| } | |||||
| GE_CHK_STATUS_RET(context.SetOutput(kAssignRefOutputIndex, *ref_tensor), | |||||
| "[Set][Output] failed for[%s].", context.GetNodeName()); | |||||
| GELOGD("[%s] compute success.", node_->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| REGISTER_KERNEL_CREATOR(Assign, AssignKernel); | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| @@ -1,42 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HYBRID_HOST_CPU_KERNEL_ASSIGN_KERNEL_H_ | |||||
| #define GE_HYBRID_HOST_CPU_KERNEL_ASSIGN_KERNEL_H_ | |||||
| #include "hybrid/node_executor/host_cpu/kernel/kernel.h" | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| class AssignKernel : public Kernel { | |||||
| public: | |||||
| AssignKernel(const NodePtr &node) : Kernel(node) {} | |||||
| ~AssignKernel() override = default; | |||||
| AssignKernel &operator=(const AssignKernel &op) = delete; | |||||
| AssignKernel(const AssignKernel &op) = delete; | |||||
| /** | |||||
| * @brief compute for node_task. | |||||
| * @return result | |||||
| */ | |||||
| Status Compute(TaskContext& context) override; | |||||
| }; | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| #endif // GE_HYBRID_HOST_CPU_KERNEL_ASSIGN_KERNEL_H_ | |||||
| @@ -1,42 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "hybrid/node_executor/host_cpu/kernel/data_kernel.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "hybrid/node_executor/host_cpu/kernel_factory.h" | |||||
| namespace { | |||||
| constexpr size_t kDataInputIndex = 0; | |||||
| constexpr size_t kDataOutputIndex = 0; | |||||
| } | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| Status DataKernel::Compute(TaskContext& context) { | |||||
| auto input = context.MutableInput(kDataInputIndex); | |||||
| GE_CHECK_NOTNULL(input); | |||||
| GE_CHK_STATUS_RET(context.SetOutput(kDataOutputIndex, *input), | |||||
| "[Set][Output] failed for [%s].", context.GetNodeName()) | |||||
| GELOGD("[%s] compute success.", node_->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| REGISTER_KERNEL_CREATOR(Data, DataKernel); | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| @@ -1,42 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ | |||||
| #define GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ | |||||
| #include "hybrid/node_executor/host_cpu/kernel/kernel.h" | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| class DataKernel : public Kernel { | |||||
| public: | |||||
| DataKernel(const NodePtr &node) : Kernel(node) {} | |||||
| ~DataKernel() override = default; | |||||
| DataKernel &operator=(const DataKernel &op) = delete; | |||||
| DataKernel(const DataKernel &op) = delete; | |||||
| /** | |||||
| * @brief compute for node_task. | |||||
| * @return result | |||||
| */ | |||||
| Status Compute(TaskContext& context) override; | |||||
| }; | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| #endif // GE_HYBRID_HOST_CPU_KERNEL_DATA_KERNEL_H_ | |||||
| @@ -1,43 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HYBRID_HOST_CPU_KERNEL_KERNEL_H_ | |||||
| #define GE_HYBRID_HOST_CPU_KERNEL_KERNEL_H_ | |||||
| #include "common/ge_inner_error_codes.h" | |||||
| #include "graph/node.h" | |||||
| #include "hybrid/node_executor/task_context.h" | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| /** | |||||
| * The base class for all host_kernel. | |||||
| */ | |||||
| class Kernel { | |||||
| public: | |||||
| Kernel(const NodePtr &node) : node_(node) {} | |||||
| virtual ~Kernel() = default; | |||||
| virtual Status Compute(TaskContext& context) = 0; | |||||
| protected: | |||||
| const NodePtr &node_; | |||||
| }; | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| #endif // GE_HYBRID_HOST_CPU_KERNEL_KERNEL_H_ | |||||
| @@ -1,34 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "hybrid/node_executor/host_cpu/kernel/no_op_kernel.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "hybrid/node_executor/host_cpu/kernel_factory.h" | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| Status NoOpKernel::Compute(TaskContext& context) { | |||||
| GELOGD("[%s] no need to compute.", node_->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| REGISTER_KERNEL_CREATOR(NoOp, NoOpKernel); | |||||
| REGISTER_KERNEL_CREATOR(NetOutput, NoOpKernel); | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| @@ -1,42 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HYBRID_HOST_CPU_KERNEL_NO_OP_KERNEL_H_ | |||||
| #define GE_HYBRID_HOST_CPU_KERNEL_NO_OP_KERNEL_H_ | |||||
| #include "hybrid/node_executor/host_cpu/kernel/kernel.h" | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| class NoOpKernel : public Kernel { | |||||
| public: | |||||
| NoOpKernel(const NodePtr &node) : Kernel(node) {} | |||||
| ~NoOpKernel() override = default; | |||||
| NoOpKernel &operator=(const NoOpKernel &op) = delete; | |||||
| NoOpKernel(const NoOpKernel &op) = delete; | |||||
| /** | |||||
| * @brief compute for node_task. | |||||
| * @return result | |||||
| */ | |||||
| Status Compute(TaskContext& context) override; | |||||
| }; | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| #endif // GE_HYBRID_HOST_CPU_KERNEL_NO_OP_KERNEL_H_ | |||||
| @@ -1,157 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h" | |||||
| #include <random> | |||||
| #include "common/fp16_t.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "graph/utils/type_utils.h" | |||||
| #include "hybrid/node_executor/host_cpu/kernel_factory.h" | |||||
| namespace { | |||||
| const char *const kAttrDtype = "dtype"; | |||||
| } | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| Status RandomUniformKernel::Compute(TaskContext& context) { | |||||
| int64_t seed = 0; | |||||
| int64_t seed2 = 0; | |||||
| (void)AttrUtils::GetInt(node_->GetOpDesc(), "seed", seed); | |||||
| (void)AttrUtils::GetInt(node_->GetOpDesc(), "seed2", seed2); | |||||
| DataType data_type = DT_FLOAT; | |||||
| if (!AttrUtils::GetDataType(node_->GetOpDesc(), kAttrDtype, data_type)) { | |||||
| REPORT_CALL_ERROR("E19999", "GetDataType failed for [%s].", node_->GetName().c_str()); | |||||
| GELOGE(PARAM_INVALID, "[Get][DataType] failed for [%s].", node_->GetName().c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| switch (data_type) { | |||||
| case DT_FLOAT16: | |||||
| if (GenerateFP16(node_->GetOpDesc(), seed, seed2, context) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Invoke][GenerateFP16]Generate random_distribution failed for %s, data_type=DT_FLOAT16", | |||||
| node_->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| break; | |||||
| case DT_FLOAT: | |||||
| if (Generate<float>(node_->GetOpDesc(), seed, seed2, context) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Invoke][Generate]Generate random_distribution failed for %s, data_type=DT_FLOAT", | |||||
| node_->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| break; | |||||
| case DT_DOUBLE: | |||||
| if (Generate<double>(node_->GetOpDesc(), seed, seed2, context) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Invoke][Generate]Generate random_distribution failed for %s, data_type=DT_DOUBLE", | |||||
| node_->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| break; | |||||
| default: | |||||
| REPORT_INNER_ERROR("E19999", "[Check][DataType]Supported DataType is DT_FLOAT16 / DT_FLOAT / DT_DOUBLE," | |||||
| "but data_type=%s, node:%s", | |||||
| TypeUtils::DataTypeToSerialString(data_type).c_str(), | |||||
| node_->GetName().c_str()); | |||||
| GELOGE(UNSUPPORTED, "[Check][DataType]Supported DataType is DT_FLOAT16 / DT_FLOAT / DT_DOUBLE," | |||||
| "but data_type=%s, node:%s", | |||||
| TypeUtils::DataTypeToSerialString(data_type).c_str(), | |||||
| node_->GetName().c_str()); | |||||
| return UNSUPPORTED; | |||||
| } | |||||
| GELOGD("[%s] compute success.", node_->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| template <typename T> | |||||
| Status RandomUniformKernel::Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, | |||||
| TaskContext& context) { | |||||
| GE_CHECK_NOTNULL(op_desc_ptr); | |||||
| // RandomUniformOp has and only has one output | |||||
| int64_t data_num = op_desc_ptr->GetOutputDesc(0).GetShape().GetShapeSize(); | |||||
| AllocationAttr attr; | |||||
| attr.SetMemType(HOST_DDR); | |||||
| auto tensor_size = data_num * sizeof(T); | |||||
| TensorValue tensor; | |||||
| GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr), | |||||
| "[Invoke][AllocateTensor][%s] Failed to allocate output of size %zu", | |||||
| context.GetNodeName(), | |||||
| tensor_size); | |||||
| auto *buf = reinterpret_cast<T *>(tensor.MutableData()); | |||||
| int64_t final_seed; | |||||
| if (seed == 0) { | |||||
| if (seed2 == 0) { | |||||
| std::random_device rd; | |||||
| final_seed = rd(); | |||||
| } else { | |||||
| final_seed = seed2; | |||||
| } | |||||
| } else { | |||||
| final_seed = seed; | |||||
| } | |||||
| std::mt19937_64 gen(final_seed); | |||||
| std::uniform_real_distribution<T> distribution(0, 1); | |||||
| for (int64_t i = 0; i < data_num; i++) { | |||||
| *(buf + i) = distribution(gen); | |||||
| } | |||||
| GE_CHK_STATUS_RET(context.SetOutput(0, tensor), "[Set][Output] failed for [%s].", context.GetNodeName()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status RandomUniformKernel::GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, | |||||
| TaskContext& context) { | |||||
| GE_CHECK_NOTNULL(op_desc_ptr); | |||||
| // RandomUniformOp has and only has one output | |||||
| int64_t data_num = op_desc_ptr->GetOutputDesc(0).GetShape().GetShapeSize(); | |||||
| AllocationAttr attr; | |||||
| attr.SetMemType(HOST_DDR); | |||||
| auto tensor_size = data_num * sizeof(fp16_t); | |||||
| TensorValue tensor; | |||||
| GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr), | |||||
| "[Invoke][AllocateTensor][%s] Failed to allocate output of size %zu", | |||||
| context.GetNodeName(), | |||||
| tensor_size); | |||||
| auto *buf = reinterpret_cast<fp16_t *>(tensor.MutableData()); | |||||
| int64_t final_seed; | |||||
| if (seed == 0) { | |||||
| if (seed2 == 0) { | |||||
| std::random_device rd; | |||||
| final_seed = rd(); | |||||
| } else { | |||||
| final_seed = seed2; | |||||
| } | |||||
| } else { | |||||
| final_seed = seed; | |||||
| } | |||||
| std::mt19937_64 gen(final_seed); | |||||
| std::uniform_real_distribution<float> distribution(0, 1); | |||||
| for (int64_t i = 0; i < data_num; i++) { | |||||
| *(buf + i) = static_cast<fp16_t>(distribution(gen)); | |||||
| } | |||||
| GE_CHK_STATUS_RET(context.SetOutput(0, tensor), "[Set][Output]failed for [%s].", context.GetNodeName()); | |||||
| return SUCCESS; | |||||
| } | |||||
| REGISTER_KERNEL_CREATOR(RandomUniform, RandomUniformKernel); | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| @@ -1,48 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HYBRID_HOST_CPU_KERNEL_RANDOM_UNIFORM_KERNEL_H_ | |||||
| #define GE_HYBRID_HOST_CPU_KERNEL_RANDOM_UNIFORM_KERNEL_H_ | |||||
| #include "hybrid/node_executor/host_cpu/kernel/kernel.h" | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| class RandomUniformKernel : public Kernel { | |||||
| public: | |||||
| RandomUniformKernel(const NodePtr &node) : Kernel(node) {} | |||||
| ~RandomUniformKernel() override = default; | |||||
| RandomUniformKernel &operator=(const RandomUniformKernel &op) = delete; | |||||
| RandomUniformKernel(const RandomUniformKernel &op) = delete; | |||||
| /** | |||||
| * @brief compute for node_task. | |||||
| * @return result | |||||
| */ | |||||
| Status Compute(TaskContext& context) override; | |||||
| private: | |||||
| template <typename T> | |||||
| Status Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext& context); | |||||
| static Status GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext& context); | |||||
| }; | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| #endif // GE_HYBRID_HOST_CPU_KERNEL_RANDOM_UNIFORM_KERNEL_H_ | |||||
| @@ -1,42 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "hybrid/node_executor/host_cpu/kernel/variable_kernel.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "hybrid/node_executor/host_cpu/kernel_factory.h" | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| Status VariableKernel::Compute(TaskContext& context) { | |||||
| auto tensor = context.GetVariable(node_->GetName()); | |||||
| if (tensor == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", "Get Variable from task context for node:%s failed.", context.GetNodeName()); | |||||
| GELOGE(PARAM_INVALID, "[Check][Param]Get Variable from task context for node:%s failed.", context.GetNodeName()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| // Constant & Variable Op has and only has one output | |||||
| GE_CHK_STATUS_RET(context.SetOutput(0, *tensor), "[Set][Output] failed for [%s].", context.GetNodeName()); | |||||
| GELOGD("[%s] compute success.", node_->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| REGISTER_KERNEL_CREATOR(Variable, VariableKernel); | |||||
| REGISTER_KERNEL_CREATOR(Constant, VariableKernel); | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| @@ -1,42 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HYBRID_HOST_CPU_KERNEL_VARIABLE_KERNEL_H_ | |||||
| #define GE_HYBRID_HOST_CPU_KERNEL_VARIABLE_KERNEL_H_ | |||||
| #include "hybrid/node_executor/host_cpu/kernel/kernel.h" | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| class VariableKernel : public Kernel { | |||||
| public: | |||||
| VariableKernel(const NodePtr &node) : Kernel(node) {} | |||||
| ~VariableKernel() override = default; | |||||
| VariableKernel &operator=(const VariableKernel &op) = delete; | |||||
| VariableKernel(const VariableKernel &op) = delete; | |||||
| /** | |||||
| * @brief compute for node_task. | |||||
| * @return result | |||||
| */ | |||||
| Status Compute(TaskContext& context) override; | |||||
| }; | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| #endif // GE_HYBRID_HOST_CPU_KERNEL_VARIABLE_KERNEL_H_ | |||||
| @@ -1,58 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "hybrid/node_executor/host_cpu/kernel_factory.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| KernelFactory &KernelFactory::Instance() { | |||||
| static KernelFactory instance; | |||||
| return instance; | |||||
| } | |||||
| std::shared_ptr<Kernel> KernelFactory::CreateKernel(const NodePtr &node) { | |||||
| if (node == nullptr) { | |||||
| GELOGW("node is NULL."); | |||||
| return nullptr; | |||||
| } | |||||
| auto iter = kernel_creator_map_.find(node->GetType()); | |||||
| if (iter != kernel_creator_map_.end()) { | |||||
| return iter->second(node); | |||||
| } | |||||
| REPORT_INNER_ERROR("E19999", "Not supported because kernel_creator_map_ not contain type:%s, name = %s", | |||||
| node->GetType().c_str(), node->GetName().c_str()); | |||||
| GELOGE(FAILED, "[Find][NodeType]Not supported because kernel_creator_map_ not contain type = %s, name = %s", | |||||
| node->GetType().c_str(), node->GetName().c_str()); | |||||
| return nullptr; | |||||
| } | |||||
| void KernelFactory::RegisterCreator(const std::string &type, const KERNEL_CREATOR_FUNC &func) { | |||||
| if (func == nullptr) { | |||||
| GELOGW("Func is NULL."); | |||||
| return; | |||||
| } | |||||
| auto iter = kernel_creator_map_.find(type); | |||||
| if (iter != kernel_creator_map_.end()) { | |||||
| GELOGW("%s creator already exist", type.c_str()); | |||||
| return; | |||||
| } | |||||
| kernel_creator_map_[type] = func; | |||||
| } | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| @@ -1,88 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_HYBRID_NODE_EXECUTOR_HOST_CPU_KERNEL_FACTORY_H_ | |||||
| #define GE_HYBRID_NODE_EXECUTOR_HOST_CPU_KERNEL_FACTORY_H_ | |||||
| #include <functional> | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include "common/ge/ge_util.h" | |||||
| #include "hybrid/node_executor/host_cpu/kernel/kernel.h" | |||||
| namespace ge { | |||||
| namespace hybrid { | |||||
| namespace host_cpu { | |||||
| using KERNEL_CREATOR_FUNC = std::function<std::shared_ptr<Kernel>(const NodePtr &)>; | |||||
| /** | |||||
| * manage all the host_cpu_kernel, support create kernel. | |||||
| */ | |||||
| class KernelFactory { | |||||
| public: | |||||
| static KernelFactory &Instance(); | |||||
| /** | |||||
| * @brief create Kernel. | |||||
| * @param [in] node | |||||
| * @return not nullptr success | |||||
| * @return nullptr fail | |||||
| */ | |||||
| std::shared_ptr<Kernel> CreateKernel(const NodePtr &node); | |||||
| /** | |||||
| * @brief Register Kernel create function. | |||||
| * @param [in] type: Kernel type | |||||
| * @param [in] func: Kernel create func | |||||
| */ | |||||
| void RegisterCreator(const std::string &type, const KERNEL_CREATOR_FUNC &func); | |||||
| KernelFactory(const KernelFactory &) = delete; | |||||
| KernelFactory &operator=(const KernelFactory &) = delete; | |||||
| KernelFactory(KernelFactory &&) = delete; | |||||
| KernelFactory &operator=(KernelFactory &&) = delete; | |||||
| private: | |||||
| KernelFactory() = default; | |||||
| ~KernelFactory() = default; | |||||
| // the kernel creator function map | |||||
| std::map<std::string, KERNEL_CREATOR_FUNC> kernel_creator_map_; | |||||
| }; | |||||
| class KernelRegistrar { | |||||
| public: | |||||
| KernelRegistrar(const std::string &type, const KERNEL_CREATOR_FUNC &func) { | |||||
| KernelFactory::Instance().RegisterCreator(type, func); | |||||
| } | |||||
| ~KernelRegistrar() = default; | |||||
| KernelRegistrar(const KernelRegistrar &) = delete; | |||||
| KernelRegistrar &operator=(const KernelRegistrar &) = delete; | |||||
| KernelRegistrar(KernelRegistrar &&) = delete; | |||||
| KernelRegistrar &operator=(KernelRegistrar &&) = delete; | |||||
| }; | |||||
| #define REGISTER_KERNEL_CREATOR(type, clazz) \ | |||||
| std::shared_ptr<Kernel> Creator_##type##Kernel(const NodePtr &node) { \ | |||||
| return MakeShared<clazz>(node); \ | |||||
| } \ | |||||
| KernelRegistrar g_##type##Kernel_creator(#type, Creator_##type##Kernel) | |||||
| } // namespace host_cpu | |||||
| } // namespace hybrid | |||||
| } // namespace ge | |||||
| #endif // GE_HYBRID_NODE_EXECUTOR_HOST_CPU_KERNEL_FACTORY_H_ | |||||
| @@ -1 +1 @@ | |||||
| optimizer:["aicpu_tf_optimizer","aicpu_ascend_optimizer","AIcoreEngine","VectorEngine","hccl_graph_optimizer", "hvd_graph_optimizer", "DNN_VM_RTS_GRAPH_OPTIMIZER_STORE"] | |||||
| optimizer:["aicpu_tf_optimizer","aicpu_ascend_optimizer","AIcoreEngine","VectorEngine","hccl_graph_optimizer", "hvd_graph_optimizer", "DNN_VM_RTS_GRAPH_OPTIMIZER_STORE", "DNN_VM_HOST_CPU_OPTIMIZER"] | |||||
| @@ -45,7 +45,8 @@ typedef enum tagccKernelType { | |||||
| TE_AI_CPU = 5, /* te aicpu operator */ | TE_AI_CPU = 5, /* te aicpu operator */ | ||||
| AI_CPU = 6, /* aicpu */ | AI_CPU = 6, /* aicpu */ | ||||
| CUST_AI_CPU = 7, /* custom aicpu*/ | CUST_AI_CPU = 7, /* custom aicpu*/ | ||||
| INVALID = 8, /* unknown kernel type */ | |||||
| HOST_CPU = 8, /* host cpu */ | |||||
| INVALID = 10000 /* unknown kernel type */ | |||||
| } ccKernelType; | } ccKernelType; | ||||
| typedef struct tagOpContext { | typedef struct tagOpContext { | ||||
| @@ -615,12 +615,6 @@ set(SINGLE_OP_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | "${GE_CODE_DIR}/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | ||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc" | "${GE_CODE_DIR}/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc" | ||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" | "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" | ||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel_factory.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/controlop/control_op_executor.cc" | "${GE_CODE_DIR}/ge/hybrid/node_executor/controlop/control_op_executor.cc" | ||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | "${GE_CODE_DIR}/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | ||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/hccl/hccl_node_executor.cc" | "${GE_CODE_DIR}/ge/hybrid/node_executor/hccl/hccl_node_executor.cc" | ||||
| @@ -842,6 +836,7 @@ set(HYBRID_TEST_FILES | |||||
| "hybrid/executor/worker/execution_engine_unittest.cc" | "hybrid/executor/worker/execution_engine_unittest.cc" | ||||
| "hybrid/model/hybrid_model_builder_unittest.cc" | "hybrid/model/hybrid_model_builder_unittest.cc" | ||||
| "hybrid/node_executor/rts/rts_node_task_unittest.cc" | "hybrid/node_executor/rts/rts_node_task_unittest.cc" | ||||
| "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" | |||||
| "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" | "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" | ||||
| "hybrid/executor/hybrid_model_async_executor_unittest.cc" | "hybrid/executor/hybrid_model_async_executor_unittest.cc" | ||||
| "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" | "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" | ||||
| @@ -0,0 +1,137 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #include <gmock/gmock.h> | |||||
| #include <vector> | |||||
| #define private public | |||||
| #define protected public | |||||
| #include "hybrid/executor/subgraph_context.h" | |||||
| #include "hybrid/node_executor/host_cpu/host_cpu_node_executor.h" | |||||
| #include "model/ge_root_model.h" | |||||
| #include "graph/passes/graph_builder_utils.h" | |||||
| #include "aicpu/common/aicpu_task_struct.h" | |||||
| #include "graph/manager/graph_mem_manager.h" | |||||
| #include "ge_local_engine/engine/host_cpu_engine.h" | |||||
| #undef private | |||||
| #undef protected | |||||
| using namespace std; | |||||
| using namespace testing; | |||||
| namespace ge { | |||||
| using namespace hybrid; | |||||
| namespace { | |||||
| struct AicpuTaskStruct { | |||||
| aicpu::AicpuParamHead head; | |||||
| uint64_t io_addrp[2]; | |||||
| }__attribute__((packed)); | |||||
| } // namespace | |||||
| class UtestHostCpuNodeTask : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| TEST_F(UtestHostCpuNodeTask, test_load) { | |||||
| ut::GraphBuilder builder = ut::GraphBuilder("graph"); | |||||
| auto node = builder.AddNode("Data", "Data", 1, 1); | |||||
| auto graph = builder.GetGraph(); | |||||
| GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph); | |||||
| HybridModel hybrid_model(ge_root_model); | |||||
| std::unique_ptr<NodeItem> node_item; | |||||
| ASSERT_EQ(NodeItem::Create(node, node_item), SUCCESS); | |||||
| hybrid_model.node_items_[node] = std::move(node_item); | |||||
| hybrid_model.task_defs_[node] = {}; | |||||
| NodeTaskPtr task = nullptr; | |||||
| HostCpuNodeExecutor node_executor; | |||||
| ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), PARAM_INVALID); | |||||
| AicpuTaskStruct args; | |||||
| args.head.length = sizeof(args); | |||||
| args.head.ioAddrNum = 2; | |||||
| domi::TaskDef task_def; | |||||
| task_def.set_type(RT_MODEL_TASK_ALL_KERNEL); | |||||
| task_def.mutable_kernel()->set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||||
| task_def.mutable_kernel()->set_args_size(args.head.length); | |||||
| hybrid_model.task_defs_[node] = {task_def}; | |||||
| hybrid_model.node_items_[node]->num_inputs = 1; | |||||
| hybrid_model.node_items_[node]->num_outputs = 1; | |||||
| ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), INTERNAL_ERROR); | |||||
| domi::TaskDef &host_task_def = hybrid_model.task_defs_[node][0]; | |||||
| host_task_def.set_type(RT_MODEL_TASK_KERNEL); | |||||
| ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), INTERNAL_ERROR); | |||||
| domi::KernelContext *context = host_task_def.mutable_kernel()->mutable_context(); | |||||
| context->set_kernel_type(8); // ccKernelType::HOST_CPU | |||||
| ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), INTERNAL_ERROR); | |||||
| HostCpuEngine::GetInstance().constant_folding_handle_ = (void *)0x01; | |||||
| ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), INTERNAL_ERROR); | |||||
| } | |||||
| TEST_F(UtestHostCpuNodeTask, test_execute) { | |||||
| ut::GraphBuilder builder = ut::GraphBuilder("graph"); | |||||
| auto node = builder.AddNode("Data", "Data", 1, 1); | |||||
| std::unique_ptr<NodeItem> node_item; | |||||
| ASSERT_EQ(NodeItem::Create(node, node_item), SUCCESS); | |||||
| domi::TaskDef task_def; | |||||
| HostAicpuNodeTask task(node_item.get(), task_def); | |||||
| std::function<void()> call_back = []{}; | |||||
| NodeState node_state(*node_item, nullptr); | |||||
| TaskContext context(nullptr, &node_state, nullptr); | |||||
| ASSERT_EQ(task.ExecuteAsync(context, call_back), INTERNAL_ERROR); | |||||
| std::function<uint32_t (void *)> run_cpu_kernel = [](void *){ return 0; }; | |||||
| task.SetRunKernel(run_cpu_kernel); | |||||
| ASSERT_EQ(task.ExecuteAsync(context, call_back), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestHostCpuNodeTask, test_update_args) { | |||||
| ut::GraphBuilder builder = ut::GraphBuilder("graph"); | |||||
| auto node = builder.AddNode("Data", "Data", 1, 1); | |||||
| std::unique_ptr<NodeItem> node_item; | |||||
| ASSERT_EQ(NodeItem::Create(node, node_item), SUCCESS); | |||||
| NodeState node_state(*node_item, nullptr); | |||||
| TaskContext context(nullptr, &node_state, nullptr); | |||||
| auto *in_addr = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(1); | |||||
| auto tmp = TensorBuffer::Create(in_addr, 1); | |||||
| std::shared_ptr<TensorBuffer> input_buffer(tmp.release()); | |||||
| TensorValue input_start[1] = {TensorValue(input_buffer)}; | |||||
| context.inputs_start_ = input_start; | |||||
| auto *out_addr = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(1); | |||||
| tmp = TensorBuffer::Create(out_addr, 1); | |||||
| std::shared_ptr<TensorBuffer> output_buffer(tmp.release()); | |||||
| TensorValue output_start[1] = {TensorValue(output_buffer)}; | |||||
| context.outputs_start_ = output_start; | |||||
| domi::TaskDef task_def; | |||||
| HostAicpuNodeTask task(node_item.get(), task_def); | |||||
| ASSERT_EQ(task.UpdateArgs(context), INTERNAL_ERROR); | |||||
| task.args_size_ = sizeof(AicpuTaskStruct); | |||||
| task.args_.reset(new(std::nothrow) uint8_t[task.args_size_]()); | |||||
| ASSERT_EQ(task.UpdateArgs(context), SUCCESS); | |||||
| } | |||||
| } // namespace ge | |||||