| @@ -88,4 +88,8 @@ void OpTilingManager::LoadSo() { | |||
| } | |||
| } | |||
| OpTilingManager &OpTilingManager::GetInstance() { | |||
| static OpTilingManager instance; | |||
| return instance; | |||
| } | |||
| } // namespace ge | |||
| @@ -25,6 +25,7 @@ using SoToHandleMap = std::map<std::string, void *>; | |||
| class OpTilingManager { | |||
| public: | |||
| OpTilingManager() = default; | |||
| static OpTilingManager &GetInstance(); | |||
| ~OpTilingManager(); | |||
| void LoadSo(); | |||
| @@ -72,7 +72,89 @@ set(SRC_LIST | |||
| "../single_op/task/tbe_task_builder.cc" | |||
| "../single_op/task/aicpu_task_builder.cc" | |||
| "../single_op/task/aicpu_kernel_task_builder.cc" | |||
| "../hybrid/hybrid_davinci_model_stub.cc" | |||
| "../hybrid/common/tensor_value.cc" | |||
| "../hybrid/common/npu_memory_allocator.cc" | |||
| "../hybrid/executor/rt_callback_manager.cc" | |||
| "../hybrid/executor/node_state.cc" | |||
| "../hybrid/executor/node_done_manager.cc" | |||
| "../hybrid/executor/hybrid_profiler.cc" | |||
| "../hybrid/executor/hybrid_model_executor.cc" | |||
| "../hybrid/executor/hybrid_model_async_executor.cc" | |||
| "../hybrid/executor/hybrid_execution_context.cc" | |||
| "../hybrid/executor/subgraph_context.cc" | |||
| "../hybrid/executor/subgraph_executor.cc" | |||
| "../hybrid/executor/worker/task_compile_engine.cc" | |||
| "../hybrid/executor/worker/shape_inference_engine.cc" | |||
| "../hybrid/executor/worker/execution_engine.cc" | |||
| "../hybrid/model/hybrid_model.cc" | |||
| "../hybrid/model/hybrid_model_builder.cc" | |||
| "../hybrid/model/node_item.cc" | |||
| "../hybrid/model/graph_item.cc" | |||
| "../hybrid/node_executor/aicore/aicore_node_executor.cc" | |||
| "../hybrid/node_executor/aicore/aicore_op_task.cc" | |||
| "../hybrid/node_executor/aicore/aicore_task_builder.cc" | |||
| "../hybrid/node_executor/aicpu/aicpu_node_executor.cc" | |||
| "../hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | |||
| "../hybrid/node_executor/ge_local/ge_local_node_executor.cc" | |||
| "../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" | |||
| "../hybrid/node_executor/host_cpu/kernel_factory.cc" | |||
| "../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" | |||
| "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | |||
| "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | |||
| "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | |||
| "../hybrid/node_executor/controlop/control_op_executor.cc" | |||
| "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | |||
| "../hybrid/node_executor/rts/rts_node_executor.cc" | |||
| "../hybrid/node_executor/node_executor.cc" | |||
| "../hybrid/node_executor/task_context.cc" | |||
| "../hybrid/hybrid_davinci_model.cc" | |||
| "../ge_local_engine/engine/host_cpu_engine.cc" | |||
| "../graph/common/omg_util.cc" | |||
| "../graph/manager/host_mem_manager.cc" | |||
| "../graph/build/memory/var_mem_assign_util.cc" | |||
| "../host_kernels/transpose_kernel.cc" | |||
| "../host_kernels/add_kernel.cc" | |||
| "../host_kernels/broadcast_args_kernel.cc" | |||
| "../host_kernels/broadcast_gradient_args_kernel.cc" | |||
| "../host_kernels/cast_kernel.cc" | |||
| "../host_kernels/concat_offset_kernel.cc" | |||
| "../host_kernels/concat_v2_kernel.cc" | |||
| "../host_kernels/dynamic_stitch_kernel.cc" | |||
| "../host_kernels/identity_kernel.cc" | |||
| "../host_kernels/empty_kernel.cc" | |||
| "../host_kernels/expanddims_kernel.cc" | |||
| "../host_kernels/fill_kernel.cc" | |||
| "../host_kernels/floordiv_kernel.cc" | |||
| "../host_kernels/floormod_kernel.cc" | |||
| "../host_kernels/gather_v2_kernel.cc" | |||
| "../host_kernels/greater_kernel.cc" | |||
| "../host_kernels/kernel_utils.cc" | |||
| "../host_kernels/maximum_kernel.cc" | |||
| "../host_kernels/mul_kernel.cc" | |||
| "../host_kernels/pack_kernel.cc" | |||
| "../host_kernels/permute_kernel.cc" | |||
| "../host_kernels/range_kernel.cc" | |||
| "../host_kernels/rank_kernel.cc" | |||
| "../host_kernels/reduce_prod_kernel.cc" | |||
| "../host_kernels/reshape_kernel.cc" | |||
| "../host_kernels/rsqrt_kernel.cc" | |||
| "../host_kernels/shape_kernel.cc" | |||
| "../host_kernels/shape_n_kernel.cc" | |||
| "../host_kernels/size_kernel.cc" | |||
| "../host_kernels/slice_d_kernel.cc" | |||
| "../host_kernels/slice_kernel.cc" | |||
| "../host_kernels/squeeze_kernel.cc" | |||
| "../host_kernels/unsqueeze_kernel.cc" | |||
| "../host_kernels/ssd_prior_box_kernel.cc" | |||
| "../host_kernels/strided_slice_kernel.cc" | |||
| "../host_kernels/sub_kernel.cc" | |||
| "../host_kernels/transdata_kernel.cc" | |||
| "../host_kernels/unpack_kernel.cc" | |||
| "../graph/passes/pass_utils.cc" | |||
| "../graph/common/bcast.cc" | |||
| "../common/fp16_t.cc" | |||
| "../common/formats/format_transfers/format_transfer_transpose.cc" | |||
| "../common/formats/utils/formats_trans_utils.cc" | |||
| ) | |||
| ######## libge_executor.a ######## | |||
| @@ -103,9 +185,9 @@ target_include_directories(ge_executor PRIVATE | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| ${GE_CODE_DIR}/../inc/cce | |||
| ${GE_CODE_DIR}/../inc/cce | |||
| #### blue zone #### | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
| ) | |||
| target_link_libraries(ge_executor PRIVATE | |||
| @@ -145,9 +227,9 @@ target_include_directories(ge_executor_shared PRIVATE | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| ${GE_CODE_DIR}/../inc/cce | |||
| ${GE_CODE_DIR}/../inc/cce | |||
| #### blue zone #### | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||
| ) | |||
| target_link_libraries(ge_executor_shared PRIVATE | |||
| @@ -156,7 +238,7 @@ target_link_libraries(ge_executor_shared PRIVATE | |||
| -Wl,--no-as-needed | |||
| ge_common | |||
| runtime | |||
| slog | |||
| slog | |||
| mmpa | |||
| graph | |||
| register | |||
| @@ -39,6 +39,8 @@ | |||
| #include "graph/manager/graph_var_manager.h" | |||
| #include "graph/load/new_model_manager/davinci_model.h" | |||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | |||
| #include "graph/opsproto_manager.h" | |||
| #include "ge_local_engine/engine/host_cpu_engine.h" | |||
| using std::string; | |||
| using std::vector; | |||
| @@ -221,6 +223,33 @@ class ModelListenerAdapter : public ModelListener { | |||
| std::shared_ptr<ge::ModelListener> listener; | |||
| }; | |||
| static void InitOpsProtoManger() { | |||
| string opsproto_path; | |||
| const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||
| if (path_env != nullptr) { | |||
| string path = path_env; | |||
| string file_path = RealPath(path.c_str()); | |||
| if (file_path.empty()) { | |||
| GELOGE(FAILED, "File path %s is invalid.", path.c_str()); | |||
| return; | |||
| } | |||
| opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); | |||
| GELOGI("Get opsproto so path from env : %s", path.c_str()); | |||
| } else { | |||
| string path_base = PluginManager::GetPath(); | |||
| GELOGI("path_base is %s", path_base.c_str()); | |||
| path_base = path_base.substr(0, path_base.rfind('/')); | |||
| path_base = path_base.substr(0, path_base.rfind('/') + 1); | |||
| opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | |||
| } | |||
| GELOGI("Get opsproto path is %s", opsproto_path.c_str()); | |||
| OpsProtoManager *manager = OpsProtoManager::Instance(); | |||
| map<string, string> option_tmp; | |||
| option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path)); | |||
| (void)manager->Initialize(option_tmp); | |||
| } | |||
| GeExecutor::GeExecutor() {} | |||
| Status GeExecutor::Initialize() { | |||
| @@ -230,6 +259,16 @@ Status GeExecutor::Initialize() { | |||
| return ge::SUCCESS; | |||
| } | |||
| OpTilingManager::GetInstance().LoadSo(); | |||
| Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize(); | |||
| if (initHostCpuEngineStatus != SUCCESS) { | |||
| GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine"); | |||
| return initHostCpuEngineStatus; | |||
| } | |||
| InitOpsProtoManger(); | |||
| std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM); | |||
| mem_type.push_back(RT_MEMORY_P2P_DDR); | |||
| auto ret = MemManager::Instance().Initialize(mem_type); | |||
| @@ -599,10 +638,16 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id); | |||
| if (davinci_model != nullptr) { | |||
| uint64_t session_id = davinci_model->GetSessionId(); | |||
| std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id); | |||
| if (hybrid_davinci_model != nullptr) { | |||
| uint64_t session_id = hybrid_davinci_model->GetSessionId(); | |||
| VarManagerPool::Instance().RemoveVarManager(session_id); | |||
| } else { | |||
| std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id); | |||
| if (davinci_model != nullptr) { | |||
| uint64_t session_id = davinci_model->GetSessionId(); | |||
| VarManagerPool::Instance().RemoveVarManager(session_id); | |||
| } | |||
| } | |||
| ret = GraphLoader::UnloadModel(model_id); | |||
| if (ret != SUCCESS) { | |||
| @@ -932,6 +977,26 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat | |||
| */ | |||
| Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, | |||
| ge::RunModelData &run_output_data, bool async_mode) { | |||
| std::vector<GeTensorDesc> input_desc = {}; | |||
| std::vector<GeTensorDesc> output_desc = {}; | |||
| return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode); | |||
| } | |||
| /** | |||
| * @ingroup ge | |||
| * @brief Synchronous execution of offline model(Do not create thread) | |||
| * @param [in] uint32_t model_id: Model ID to execute | |||
| void* stream: stream to execute | |||
| const domi::InputData *input_data: Model input data | |||
| const std::vector<GeTensorDesc> &input_desc: Description of model input data | |||
| bool async_mode: is asynchronize mode | |||
| * @param [out] domi::OutputData *output_data: Model output data | |||
| * @param [out] std::vector<GeTensorDesc> &output_desc: Description of model output data | |||
| * @return SUCCESS handle successfully / others handle failed | |||
| */ | |||
| Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, | |||
| const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data, | |||
| std::vector<GeTensorDesc> &output_desc, bool async_mode) { | |||
| if (!isInit_) { | |||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
| @@ -956,7 +1021,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||
| } | |||
| } | |||
| return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, output_data); | |||
| return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); | |||
| } | |||
| /** | |||
| @@ -61,9 +61,91 @@ local_ge_executor_src_files := \ | |||
| ../single_op/task/tbe_task_builder.cc \ | |||
| ../single_op/task/aicpu_task_builder.cc \ | |||
| ../single_op/task/aicpu_kernel_task_builder.cc \ | |||
| ../hybrid/hybrid_davinci_model_stub.cc\ | |||
| ../hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | |||
| ../graph/common/local_context.cc \ | |||
| ../hybrid/common/tensor_value.cc \ | |||
| ../hybrid/common/npu_memory_allocator.cc \ | |||
| ../hybrid/executor/rt_callback_manager.cc \ | |||
| ../hybrid/executor/node_state.cc \ | |||
| ../hybrid/executor/node_done_manager.cc \ | |||
| ../hybrid/executor/hybrid_profiler.cc \ | |||
| ../hybrid/executor/hybrid_model_executor.cc \ | |||
| ../hybrid/executor/hybrid_model_async_executor.cc \ | |||
| ../hybrid/executor/hybrid_execution_context.cc \ | |||
| ../hybrid/executor/subgraph_context.cc \ | |||
| ../hybrid/executor/subgraph_executor.cc \ | |||
| ../hybrid/executor/worker/task_compile_engine.cc \ | |||
| ../hybrid/executor/worker/shape_inference_engine.cc \ | |||
| ../hybrid/executor/worker/execution_engine.cc \ | |||
| ../hybrid/model/hybrid_model.cc \ | |||
| ../hybrid/model/hybrid_model_builder.cc \ | |||
| ../hybrid/model/node_item.cc \ | |||
| ../hybrid/model/graph_item.cc \ | |||
| ../hybrid/node_executor/aicore/aicore_node_executor.cc \ | |||
| ../hybrid/node_executor/aicore/aicore_op_task.cc \ | |||
| ../hybrid/node_executor/aicore/aicore_task_builder.cc \ | |||
| ../hybrid/node_executor/aicpu/aicpu_node_executor.cc \ | |||
| ../hybrid/node_executor/compiledsubgraph/known_node_executor.cc \ | |||
| ../hybrid/node_executor/ge_local/ge_local_node_executor.cc \ | |||
| ../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc \ | |||
| ../hybrid/node_executor/host_cpu/kernel_factory.cc \ | |||
| ../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc \ | |||
| ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ | |||
| ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ | |||
| ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ | |||
| ../hybrid/node_executor/controlop/control_op_executor.cc \ | |||
| ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | |||
| ../hybrid/node_executor/rts/rts_node_executor.cc \ | |||
| ../hybrid/node_executor/node_executor.cc \ | |||
| ../hybrid/node_executor/task_context.cc \ | |||
| ../hybrid/hybrid_davinci_model.cc \ | |||
| ../ge_local_engine/engine/host_cpu_engine.cc \ | |||
| ../graph/common/omg_util.cc \ | |||
| ../graph/manager/host_mem_manager.cc \ | |||
| ../graph/build/memory/var_mem_assign_util.cc \ | |||
| ../host_kernels/transpose_kernel.cc \ | |||
| ../host_kernels/add_kernel.cc \ | |||
| ../host_kernels/broadcast_args_kernel.cc \ | |||
| ../host_kernels/broadcast_gradient_args_kernel.cc \ | |||
| ../host_kernels/cast_kernel.cc \ | |||
| ../host_kernels/concat_offset_kernel.cc \ | |||
| ../host_kernels/concat_v2_kernel.cc \ | |||
| ../host_kernels/dynamic_stitch_kernel.cc \ | |||
| ../host_kernels/identity_kernel.cc \ | |||
| ../host_kernels/empty_kernel.cc \ | |||
| ../host_kernels/expanddims_kernel.cc \ | |||
| ../host_kernels/fill_kernel.cc \ | |||
| ../host_kernels/floordiv_kernel.cc \ | |||
| ../host_kernels/floormod_kernel.cc \ | |||
| ../host_kernels/gather_v2_kernel.cc \ | |||
| ../host_kernels/greater_kernel.cc \ | |||
| ../host_kernels/kernel_utils.cc \ | |||
| ../host_kernels/maximum_kernel.cc \ | |||
| ../host_kernels/mul_kernel.cc \ | |||
| ../host_kernels/pack_kernel.cc \ | |||
| ../host_kernels/permute_kernel.cc \ | |||
| ../host_kernels/range_kernel.cc \ | |||
| ../host_kernels/rank_kernel.cc \ | |||
| ../host_kernels/reduce_prod_kernel.cc \ | |||
| ../host_kernels/reshape_kernel.cc \ | |||
| ../host_kernels/rsqrt_kernel.cc \ | |||
| ../host_kernels/shape_kernel.cc \ | |||
| ../host_kernels/shape_n_kernel.cc \ | |||
| ../host_kernels/size_kernel.cc \ | |||
| ../host_kernels/slice_d_kernel.cc \ | |||
| ../host_kernels/slice_kernel.cc \ | |||
| ../host_kernels/squeeze_kernel.cc \ | |||
| ../host_kernels/unsqueeze_kernel.cc \ | |||
| ../host_kernels/ssd_prior_box_kernel.cc \ | |||
| ../host_kernels/strided_slice_kernel.cc \ | |||
| ../host_kernels/sub_kernel.cc \ | |||
| ../host_kernels/transdata_kernel.cc \ | |||
| ../host_kernels/unpack_kernel.cc \ | |||
| ../graph/passes/pass_utils.cc \ | |||
| ../graph/common/bcast.cc \ | |||
| ../common/fp16_t.cc \ | |||
| ../common/formats/format_transfers/format_transfer_transpose.cc \ | |||
| ../common/formats/utils/formats_trans_utils.cc \ | |||
| local_ge_executor_c_include := \ | |||
| proto/insert_op.proto \ | |||
| @@ -195,7 +195,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES | |||
| ) | |||
| ############ libge_local_opskernel_builder.a ############ | |||
| add_library(ge_local_opskernel_builder_static SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||
| target_compile_options(ge_local_opskernel_builder_static PRIVATE | |||
| -Werror | |||
| @@ -20,7 +20,7 @@ | |||
| #include "framework/common/ge_inner_error_codes.h" | |||
| #include "graph/node.h" | |||
| #include "graph/operator.h" | |||
| #include "register/register.h" | |||
| #include "external/../register/register.h" | |||
| namespace ge { | |||
| class HostCpuEngine { | |||
| @@ -30,6 +30,7 @@ | |||
| #include "model/ge_model.h" | |||
| #include "graph/ge_context.h" | |||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | |||
| #include "graph/utils/op_desc_utils.h" | |||
| using domi::BuildMode; | |||
| @@ -311,6 +312,53 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt | |||
| return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); | |||
| } | |||
| static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, | |||
| const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) { | |||
| GE_CHECK_NOTNULL(out_anchor); | |||
| NodePtr in_node = out_anchor->GetOwnerNode(); | |||
| GE_CHECK_NOTNULL(in_node); | |||
| OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC); | |||
| OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) | |||
| .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) | |||
| .Build(); | |||
| (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); | |||
| if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) { | |||
| for (auto &node : graph->GetDirectNode()) { | |||
| // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT | |||
| auto op_desc = node->GetOpDesc(); | |||
| if (op_desc == nullptr) { | |||
| continue; | |||
| } | |||
| auto op_type = op_desc->GetType(); | |||
| if (op_type == NETOUTPUT) { | |||
| for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { | |||
| const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||
| NodePtr in_node = peer_out_anchor->GetOwnerNode(); | |||
| GE_CHECK_NOTNULL(in_node); | |||
| std::string in_node_op_type = in_node->GetType(); | |||
| if (in_node_op_type == CONSTANT) { | |||
| GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); | |||
| std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; | |||
| if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { | |||
| GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||
| std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||
| GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | |||
| @@ -332,6 +380,9 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||
| if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | |||
| continue; | |||
| } | |||
| GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed."); | |||
| if (sub_graph->GetGraphUnknownFlag()) { | |||
| // unknown shape build flow | |||
| GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | |||
| @@ -274,13 +274,16 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da | |||
| /// @param [in] stream stream to execute model on | |||
| /// @param [in] async_mode is asynchronize mode. | |||
| /// @param [in] input_data model input data | |||
| /// @param [in] input_desc description of model input data | |||
| /// @param [out] output_data model output data | |||
| /// @param [out] output_desc description of model output data | |||
| /// | |||
| Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | |||
| OutputData &output_data) { | |||
| const std::vector<GeTensorDesc> &input_desc, OutputData &output_data, | |||
| std::vector<GeTensorDesc> &output_desc) { | |||
| auto model_manager = ModelManager::GetInstance(); | |||
| GE_CHECK_NOTNULL(model_manager); | |||
| Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, output_data); | |||
| Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Execute model failed, model_id:%u.", model_id); | |||
| return ret; | |||
| @@ -65,7 +65,8 @@ class GraphLoader { | |||
| const std::vector<uint32_t> &output_queue_ids); | |||
| static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | |||
| OutputData &output_data); | |||
| const std::vector<GeTensorDesc> &input_desc, OutputData &output_data, | |||
| std::vector<GeTensorDesc> &output_desc); | |||
| static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id); | |||
| @@ -118,7 +118,8 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener | |||
| load_end_time_(0), | |||
| time_info_(), | |||
| dataInputTid(0), | |||
| is_model_has_inited_(false), | |||
| is_weight_mem_has_inited_(false), | |||
| is_feature_map_mem_has_inited_(false), | |||
| model_id_(0), | |||
| runtime_model_id_(0), | |||
| version_(0), | |||
| @@ -264,34 +265,65 @@ void DavinciModel::Shrink() { | |||
| ge_model_.reset(); // delete object. | |||
| } | |||
| Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | |||
| if (is_model_has_inited_) { | |||
| GELOGE(FAILED, "call InitModelMem more than once ."); | |||
| Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) { | |||
| if (is_weight_mem_has_inited_) { | |||
| GELOGE(FAILED, "call InitWeightMem more than once."); | |||
| return FAILED; | |||
| } | |||
| is_model_has_inited_ = true; | |||
| is_weight_mem_has_inited_ = true; | |||
| std::size_t data_size = TotalMemSize(); | |||
| std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; | |||
| const Buffer &weights = ge_model_->GetWeight(); | |||
| std::size_t weights_size = weights.GetSize(); | |||
| GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE); | |||
| if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { | |||
| GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); | |||
| if ((weight_ptr != nullptr) && (weight_size < weights_size)) { | |||
| GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size); | |||
| return FAILED; | |||
| } | |||
| if ((weight_ptr != nullptr) && (weight_size < weights_size)) { | |||
| GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size); | |||
| weights_mem_base_ = static_cast<uint8_t *>(dev_ptr); | |||
| is_inner_weight_base_ = false; | |||
| if (weights_size != 0) { | |||
| weights_mem_base_ = static_cast<uint8_t *>(weight_ptr); | |||
| is_inner_weight_base_ = false; | |||
| if (weight_ptr == nullptr) { | |||
| weights_mem_base_ = MallocWeightsMem(weights_size); | |||
| if (weights_mem_base_ == nullptr) { | |||
| GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); | |||
| return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; | |||
| } | |||
| is_inner_weight_base_ = true; | |||
| } | |||
| GELOGI("[IMAS]InitWeightMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||
| weights_mem_base_, weights_size); | |||
| GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||
| GELOGI("copy weights data to device"); | |||
| } | |||
| runtime_param_.weight_base = weights_mem_base_; | |||
| return SUCCESS; | |||
| } | |||
| Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||
| if (is_feature_map_mem_has_inited_) { | |||
| GELOGE(FAILED, "call InitFeatureMapMem more than once ."); | |||
| return FAILED; | |||
| } | |||
| is_feature_map_mem_has_inited_ = true; | |||
| std::size_t data_size = TotalMemSize(); | |||
| std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; | |||
| if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { | |||
| GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); | |||
| return FAILED; | |||
| } | |||
| mem_base_ = static_cast<uint8_t *>(dev_ptr); | |||
| p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr); | |||
| weights_mem_base_ = static_cast<uint8_t *>(dev_ptr); | |||
| is_inner_mem_base_ = false; | |||
| is_inner_weight_base_ = false; | |||
| if (TotalMemSize() && mem_base_ == nullptr) { | |||
| mem_base_ = MallocFeatureMapMem(data_size); | |||
| @@ -299,12 +331,14 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p | |||
| GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); | |||
| return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; | |||
| } | |||
| GEEVENT("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||
| GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||
| mem_base_, data_size); | |||
| weights_mem_base_ = mem_base_; | |||
| if (!is_inner_weight_base_) { | |||
| weights_mem_base_ = mem_base_; | |||
| is_inner_weight_base_ = true; | |||
| } | |||
| is_inner_mem_base_ = true; | |||
| is_inner_weight_base_ = true; | |||
| } | |||
| if (p2p_data_size != 0) { | |||
| @@ -313,27 +347,11 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p | |||
| GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); | |||
| return GE_EXEC_ALLOC_P2P_MEM_FAILED; | |||
| } | |||
| GELOGI("InitModelMem graph_%u MallocMemory type[P] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||
| GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||
| p2p_mem_base_, p2p_data_size); | |||
| is_inner_p2p_mem_base_ = true; | |||
| } | |||
| if (weights_size != 0) { | |||
| weights_mem_base_ = static_cast<uint8_t *>(weight_ptr); | |||
| is_inner_weight_base_ = false; | |||
| if (weight_ptr == nullptr) { | |||
| weights_mem_base_ = MallocWeightsMem(weights_size); | |||
| if (weights_mem_base_ == nullptr) { | |||
| GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); | |||
| return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; | |||
| } | |||
| is_inner_weight_base_ = true; | |||
| } | |||
| GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||
| weights_mem_base_, weights_size); | |||
| GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||
| } | |||
| GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); | |||
| runtime_param_.mem_base = mem_base_; | |||
| runtime_param_.weight_base = weights_mem_base_; | |||
| @@ -643,8 +661,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
| GE_TIMESTAMP_START(InitModelMem); | |||
| GELOGD("Known node is %d", known_node_); | |||
| GE_CHK_STATUS_RET_NOLOG(InitWeightMem(dev_ptr, weight_ptr, weight_size)); | |||
| if (!known_node_) { | |||
| GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size)); | |||
| GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size)); | |||
| data_inputer_ = new (std::nothrow) DataInputer(); | |||
| GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr."); | |||
| } | |||
| @@ -1141,6 +1160,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||
| GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, | |||
| GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -584,7 +584,8 @@ class DavinciModel { | |||
| Status SyncVarData(); | |||
| Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize); | |||
| Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size); | |||
| Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size); | |||
| void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); | |||
| @@ -850,7 +851,9 @@ class DavinciModel { | |||
| Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node); | |||
| Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc); | |||
| bool is_model_has_inited_; | |||
| bool is_weight_mem_has_inited_; | |||
| bool is_feature_map_mem_has_inited_; | |||
| uint32_t model_id_; | |||
| uint32_t runtime_model_id_; | |||
| string name_; | |||
| @@ -31,6 +31,7 @@ | |||
| #include "model/ge_root_model.h" | |||
| #include "graph/common/local_context.h" | |||
| #include "common/formats/utils/formats_trans_utils.h" | |||
| #include "hybrid/hybrid_davinci_model.h" | |||
| namespace ge { | |||
| thread_local uint32_t device_count = 0; | |||
| @@ -204,6 +205,13 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { | |||
| ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||
| auto hybrid_davinci_model = hybrid_model_map_.find(model_id); | |||
| if (hybrid_davinci_model != hybrid_model_map_.end()) { | |||
| uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); | |||
| DestroyAicpuSession(session_id); | |||
| return SUCCESS; | |||
| } | |||
| auto it = model_map_.find(model_id); | |||
| if (it == model_map_.end()) { | |||
| GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); | |||
| @@ -925,6 +933,12 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||
| vector<InputOutputDescInfo> &output_desc, | |||
| std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats, | |||
| bool new_model_desc) { | |||
| std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id); | |||
| if (hybrid_davinci_model != nullptr) { | |||
| hybrid_davinci_model->SetModelDescVersion(new_model_desc); | |||
| return hybrid_davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats); | |||
| } | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, | |||
| "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); | |||
| @@ -943,6 +957,11 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||
| /// | |||
| Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||
| int32_t &dynamic_type) { | |||
| std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id); | |||
| if (hybrid_davinci_model != nullptr) { | |||
| return hybrid_davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type); | |||
| } | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||
| "GetDynamicBatchInfo failed, Invalid model id %u!", model_id); | |||
| @@ -975,6 +994,12 @@ Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector<vect | |||
| /// | |||
| Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, | |||
| std::vector<std::string> &user_input_shape_order) { | |||
| auto hybrid_davinci_model = GetHybridModel(model_id); | |||
| if (hybrid_davinci_model != nullptr) { | |||
| hybrid_davinci_model->GetUserDesignateShapeOrder(user_input_shape_order); | |||
| return SUCCESS; | |||
| } | |||
| auto davinci_model = GetModel(model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||
| "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id) | |||
| @@ -990,6 +1015,12 @@ Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> & | |||
| } | |||
| Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) { | |||
| std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id); | |||
| if (hybrid_davinci_model != nullptr) { | |||
| hybrid_davinci_model->GetModelAttr(dynamic_output_shape_info); | |||
| return SUCCESS; | |||
| } | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHECK_NOTNULL(davinci_model); | |||
| davinci_model->GetModelAttr(dynamic_output_shape_info); | |||
| @@ -1201,10 +1232,25 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d | |||
| /// @param [in] stream model stream | |||
| /// @param [in] async_mode is asynchronize mode. | |||
| /// @param [in] input_data input data | |||
| /// @param [in] input_desc description of input data | |||
| /// @param [out] output_data output data | |||
| /// @param [out] output_desc description of output data | |||
| /// | |||
| Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | |||
| OutputData &output_data) { | |||
| const std::vector<GeTensorDesc> &input_desc, OutputData &output_data, | |||
| std::vector<GeTensorDesc> &output_desc) { | |||
| std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id); | |||
| if (hybrid_davinci_model != nullptr) { | |||
| auto inputs = input_data.blobs; | |||
| auto outputs = output_data.blobs; | |||
| Status status = hybrid_davinci_model->Execute(inputs, input_desc, outputs, output_desc, stream); | |||
| if (status == SUCCESS) { | |||
| GELOGI("Execute model %u success.", model_id); | |||
| } | |||
| return status; | |||
| } | |||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); | |||
| @@ -148,10 +148,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
| /// @param [in] stream model stream | |||
| /// @param [in] async_mode is asynchronize mode. | |||
| /// @param [in] input_data model input data | |||
| /// @param [in] input_desc description of model input data | |||
| /// @param [out] output_data model output data | |||
| /// @param [out] output_desc description of model output data | |||
| /// | |||
| ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | |||
| OutputData &output_data); | |||
| const std::vector<GeTensorDesc> &input_desc, OutputData &output_data, | |||
| std::vector<GeTensorDesc> &output_desc); | |||
| ge::Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs); | |||
| @@ -26,6 +26,7 @@ | |||
| #include <vector> | |||
| #include "common/ge/ge_util.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/debug/log.h" | |||
| #include "framework/common/types.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| @@ -72,7 +73,7 @@ Status DynamicShapePartitioner::Partition() { | |||
| } | |||
| REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true), | |||
| "Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str()); | |||
| REQUIRE_SUCCESS(CtrlEdgeTransfer(), "Failed do ctrl edge transfer!"); | |||
| DumpGraph("_Before_DSP"); | |||
| auto status = PartitionImpl(); | |||
| GELOGD("%s.", DebugString().c_str()); | |||
| @@ -86,6 +87,50 @@ Status DynamicShapePartitioner::Partition() { | |||
| return status; | |||
| } | |||
| Status DynamicShapePartitioner::CtrlEdgeTransfer() { | |||
| GELOGD("Do ctrl edge transfer start!"); | |||
| GE_CHECK_NOTNULL(root_graph_); | |||
| bool is_dynamic_shape = false; | |||
| (void)AttrUtils::GetBool(root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); | |||
| if (!is_dynamic_shape) { | |||
| return SUCCESS; | |||
| } | |||
| for (auto &subgraph : root_graph_->GetAllSubgraphs()) { | |||
| for (ge::NodePtr &n : subgraph->GetDirectNode()) { | |||
| auto op_desc = n->GetOpDesc(); | |||
| if (op_desc == nullptr) { | |||
| continue; | |||
| } | |||
| auto op_type = op_desc->GetType(); | |||
| if (op_type == CONSTANT || op_type == CONSTANTOP) { | |||
| if (n->GetInAllNodes().empty()) { | |||
| GELOGD("[CtrlEdgeTransferPass] node [%s] in nodes is empty", n->GetName().c_str()); | |||
| continue; | |||
| } | |||
| GELOGD("start to tranfer ctrl edge for const node [%s]", n->GetName().c_str()); | |||
| for (auto &in_control_node : n->GetInControlNodes()) { | |||
| GE_CHECK_NOTNULL(in_control_node); | |||
| GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(), | |||
| n->GetInControlAnchor()), "remove edge failed"); | |||
| for (auto &out_node : n->GetOutNodes()) { | |||
| if (out_node == nullptr) { | |||
| continue; | |||
| } | |||
| GE_CHK_STATUS_RET(ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(), | |||
| out_node->GetInControlAnchor()), "add edge failed."); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| GELOGD("Do ctrl edge transfer end!"); | |||
| return SUCCESS; | |||
| } | |||
| Status DynamicShapePartitioner::PartitionImpl() { | |||
| REQUIRE_SUCCESS(root_graph_->TopologicalSorting(), "Graph topological sort failed."); | |||
| REQUIRE_SUCCESS(InitClusters(), "Failed init cluster nodes."); | |||
| @@ -151,6 +151,7 @@ class DynamicShapePartitioner { | |||
| Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow); | |||
| Status IsUnknownShapeNode(ge::NodePtr node, bool &is_unknow); | |||
| bool IsUnknownShapeTensor(const ge::GeTensorDesc &tensor); | |||
| Status CtrlEdgeTransfer(); | |||
| ge::ComputeGraphPtr root_graph_; // The original graph to partition | |||
| std::unordered_map<NodePtr, std::shared_ptr<Cluster>> node_2_cluster_; // Record nodes and the cluster it belongs to | |||
| // topological sorted clusters, this field will change with the splitting. | |||
| @@ -63,7 +63,7 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No | |||
| GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, GELOGE(FAILED, "node is null"); return ""); | |||
| if (node->GetType() == CAST) { | |||
| trans_data_type = true; | |||
| } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED) { | |||
| } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED || node->GetType() == EXPANDDIMS) { | |||
| trans_format = true; | |||
| trans_shape = true; | |||
| } else if (node->GetType() == TRANSDATA) { | |||
| @@ -8,7 +8,7 @@ set(SRC_LIST | |||
| "engine/host_cpu_engine.cc" | |||
| "ops_kernel_store/host_cpu_ops_kernel_info.cc" | |||
| "ops_kernel_store/op/op_factory.cc" | |||
| "ops_kernel_store/op/host_op.cc" | |||
| "ops_kernel_store/op/host_op.cc" | |||
| ) | |||
| set(CPU_OPS_KERNEL_LIST | |||
| @@ -98,7 +98,7 @@ target_link_libraries(atc_host_cpu_engine PRIVATE | |||
| set_target_properties(atc_host_cpu_engine PROPERTIES | |||
| OUTPUT_NAME host_cpu_engine | |||
| LIBRARY_OUTPUT_DIRECTORY atclib | |||
| LIBRARY_OUTPUT_DIRECTORY atclib | |||
| ) | |||
| ############ libhost_cpu_opskernel_builder.so ############ | |||
| @@ -185,7 +185,7 @@ set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES | |||
| ) | |||
| ############ libhost_cpu_opskernel_builder.a ############ | |||
| add_library(host_cpu_opskernel_builder_static SHARED ${CPU_OPS_KERNEL_LIST}) | |||
| add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST}) | |||
| target_compile_options(host_cpu_opskernel_builder_static PRIVATE | |||
| -Werror | |||
| @@ -353,6 +353,44 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a | |||
| return SUCCESS; | |||
| } | |||
| Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs, | |||
| const std::vector<GeTensorDesc> &input_desc, | |||
| std::vector<DataBuffer> &outputs, | |||
| std::vector<GeTensorDesc> &output_desc) { | |||
| GELOGI("Start to execute model."); | |||
| HybridModelExecutor::ExecuteArgs args; | |||
| args.inputs.resize(inputs.size()); | |||
| for (size_t i = 0; i < inputs.size(); ++i) { | |||
| TensorValue tensor_value(inputs[i].data, inputs[i].length); | |||
| args.inputs[i] = tensor_value; | |||
| } | |||
| GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); | |||
| for (const auto &output_tensor_desc : args.output_desc) { | |||
| output_desc.emplace_back(*output_tensor_desc); | |||
| } | |||
| for (size_t i = 0; i < args.outputs.size(); ++i) { | |||
| int64_t output_real_size = 0; | |||
| ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc[i], output_real_size); | |||
| if (graph_status != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "Get tensor size in bytes failed."); | |||
| return FAILED; | |||
| } | |||
| if (output_real_size > 0) { | |||
| if (outputs[i].length < static_cast<uint64_t>(output_real_size)) { | |||
| GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by user should be greater than or equal to the real size of output[%ld]", | |||
| i, outputs[i].length, output_real_size); | |||
| return FAILED; | |||
| } | |||
| GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE)); | |||
| } | |||
| outputs[i].length = output_real_size; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | |||
| GELOGD("Start to execute model."); | |||
| // prepare inputs | |||
| @@ -35,6 +35,11 @@ class HybridModelAsyncExecutor { | |||
| Status Init(); | |||
| Status Execute(const std::vector<DataBuffer> &inputs, | |||
| const std::vector<GeTensorDesc> &input_desc, | |||
| std::vector<DataBuffer> &outputs, | |||
| std::vector<GeTensorDesc> &output_desc); | |||
| Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | |||
| Status Start(const std::shared_ptr<ModelListener> &listener); | |||
| @@ -38,6 +38,14 @@ class HybridDavinciModel::Impl { | |||
| return SUCCESS; | |||
| } | |||
| Status Execute(const std::vector<DataBuffer> &inputs, | |||
| const std::vector<GeTensorDesc> &input_desc, | |||
| std::vector<DataBuffer> &outputs, | |||
| std::vector<GeTensorDesc> &output_desc, | |||
| rtStream_t stream) { | |||
| return executor_.Execute(inputs, input_desc, outputs, output_desc); | |||
| } | |||
| Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | |||
| return executor_.Execute(inputs, outputs); | |||
| } | |||
| @@ -68,6 +76,33 @@ class HybridDavinciModel::Impl { | |||
| executor_.SetDeviceId(device_id); | |||
| } | |||
| uint64_t GetSessionId() { | |||
| return model_.GetSessionId(); | |||
| } | |||
| Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | |||
| return model_.GetDynamicBatchInfo(batch_info, dynamic_type); | |||
| } | |||
| void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) { | |||
| model_.GetUserDesignateShapeOrder(user_input_shape_order); | |||
| } | |||
| void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) { | |||
| model_.GetModelAttr(dynamic_output_shape_info); | |||
| } | |||
| Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||
| vector<InputOutputDescInfo> &output_desc, | |||
| std::vector<uint32_t> &input_formats, | |||
| std::vector<uint32_t> &output_formats) { | |||
| return model_.GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats); | |||
| } | |||
| void SetModelDescVersion(bool is_new_model_desc) { | |||
| model_.SetModelDescVersion(is_new_model_desc); | |||
| } | |||
| private: | |||
| std::shared_ptr<ModelListener> listener_; | |||
| HybridModel model_; | |||
| @@ -95,6 +130,14 @@ Status HybridDavinciModel::Init() { | |||
| return impl_->Init(); | |||
| } | |||
| Status HybridDavinciModel::Execute(const std::vector<DataBuffer> &inputs, | |||
| const std::vector<GeTensorDesc> &input_desc, | |||
| std::vector<DataBuffer> &outputs, | |||
| std::vector<GeTensorDesc> &output_desc, rtStream_t stream) { | |||
| GE_CHECK_NOTNULL(impl_); | |||
| return impl_->Execute(inputs, input_desc, outputs, output_desc, stream); | |||
| } | |||
| Status HybridDavinciModel::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | |||
| GE_CHECK_NOTNULL(impl_); | |||
| return impl_->Execute(inputs, outputs); | |||
| @@ -132,5 +175,41 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) { | |||
| impl_->SetDeviceId(device_id); | |||
| } | |||
| } | |||
| Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | |||
| GE_CHECK_NOTNULL(impl_); | |||
| return impl_->GetDynamicBatchInfo(batch_info, dynamic_type); | |||
| } | |||
| void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) { | |||
| if (impl_ != nullptr) { | |||
| impl_->GetUserDesignateShapeOrder(user_input_shape_order); | |||
| } | |||
| } | |||
| void HybridDavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) { | |||
| if (impl_ != nullptr) { | |||
| impl_->GetModelAttr(dynamic_output_shape_info); | |||
| } | |||
| } | |||
| Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||
| vector<InputOutputDescInfo> &output_desc, | |||
| std::vector<uint32_t> &input_formats, | |||
| std::vector<uint32_t> &output_formats) { | |||
| GE_CHECK_NOTNULL(impl_); | |||
| return impl_->GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats); | |||
| } | |||
| void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) { | |||
| if (impl_ != nullptr) { | |||
| impl_->SetModelDescVersion(is_new_model_desc); | |||
| } | |||
| } | |||
| uint64_t HybridDavinciModel::GetSessionId() { | |||
| GE_CHECK_NOTNULL(impl_); | |||
| return impl_->GetSessionId(); | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -37,6 +37,12 @@ class HybridDavinciModel { | |||
| Status Init(); | |||
| Status Execute(const std::vector<DataBuffer> &inputs, | |||
| const std::vector<GeTensorDesc> &input_desc, | |||
| std::vector<DataBuffer> &outputs, | |||
| std::vector<GeTensorDesc> &output_desc, | |||
| rtStream_t stream); | |||
| Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | |||
| Status ModelRunStart(); | |||
| @@ -51,6 +57,21 @@ class HybridDavinciModel { | |||
| void SetDeviceId(uint32_t device_id); | |||
| uint64_t GetSessionId(); | |||
| Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type); | |||
| void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order); | |||
| void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | |||
| Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||
| vector<InputOutputDescInfo> &output_desc, | |||
| std::vector<uint32_t> &input_formats, | |||
| std::vector<uint32_t> &output_formats); | |||
| void SetModelDescVersion(bool is_new_model_desc); | |||
| private: | |||
| HybridDavinciModel() = default; | |||
| class Impl; | |||
| @@ -28,6 +28,14 @@ Status HybridDavinciModel::Init() { | |||
| return UNSUPPORTED; | |||
| } | |||
| Status HybridDavinciModel::Execute(const std::vector<DataBuffer> &inputs, | |||
| const std::vector<GeTensorDesc> &input_desc, | |||
| std::vector<DataBuffer> &outputs, | |||
| std::vector<GeTensorDesc> &output_desc, | |||
| rtStream_t stream) { | |||
| return UNSUPPORTED; | |||
| } | |||
| Status HybridDavinciModel::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | |||
| return UNSUPPORTED; | |||
| } | |||
| @@ -52,5 +60,29 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) { | |||
| void HybridDavinciModel::SetDeviceId(uint32_t device_id) { | |||
| } | |||
| uint64_t HybridDavinciModel::GetSessionId() { | |||
| return 0; | |||
| } | |||
| Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | |||
| return UNSUPPORTED; | |||
| } | |||
| void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) { | |||
| } | |||
| void HybridDavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) { | |||
| } | |||
| Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||
| vector<InputOutputDescInfo> &output_desc, | |||
| std::vector<uint32_t> &input_formats, | |||
| std::vector<uint32_t> &output_formats) { | |||
| return UNSUPPORTED; | |||
| } | |||
| void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) { | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -21,12 +21,18 @@ | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "graph/utils/node_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "hybrid/common/npu_memory_allocator.h" | |||
| #include "hybrid/model/hybrid_model_builder.h" | |||
| #include "hybrid/node_executor/node_executor.h" | |||
| #include "common/op/ge_op_utils.h" | |||
| namespace ge { | |||
| namespace hybrid { | |||
| namespace { | |||
| const int64_t kMemSizeUnknownShape = -1; // Unknown shape mem size | |||
| } | |||
| HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) { | |||
| } | |||
| @@ -128,7 +134,214 @@ const GraphItem *HybridModel::GetSubgraphItem(const ComputeGraphPtr &subgraph) c | |||
| } | |||
| const string &HybridModel::GetModelName() const { | |||
| return model_name_; | |||
| return model_name_; | |||
| } | |||
| Status HybridModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | |||
| // dynamic shape do not need dynamic batch | |||
| batch_info = {}; | |||
| dynamic_type = -1; | |||
| return SUCCESS; | |||
| } | |||
| void HybridModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) { | |||
| // dynamic shape do not need dynamic batch | |||
| user_input_shape_order = {}; | |||
| } | |||
| void HybridModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) { | |||
| dynamic_output_shape_info = {}; | |||
| } | |||
| Status HybridModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||
| vector<InputOutputDescInfo> &output_desc, | |||
| std::vector<uint32_t> &input_formats, | |||
| std::vector<uint32_t> &output_formats) { | |||
| auto node_item_list = root_graph_item_->GetInputNodes(); | |||
| if (node_item_list.empty()) { | |||
| GELOGE(FAILED, "node item list is empty!"); | |||
| return FAILED; | |||
| } | |||
| GE_CHECK_NOTNULL(node_item_list[0]->node); | |||
| GE_CHECK_NOTNULL(node_item_list[0]->node->GetOpDesc()); | |||
| if (node_item_list[0]->node->GetOpDesc()->GetInputsSize() != 1) { | |||
| GELOGE(FAILED, "input size of op is not 1!"); | |||
| return FAILED; | |||
| } | |||
| GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); | |||
| GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); | |||
| return SUCCESS; | |||
| } | |||
| void HybridModel::SetInputDimsAndShapeRangesInfo(const vector<int64_t> &model_input_dims, std::vector<std::pair<int64_t,int64_t>> &shape_ranges, | |||
| Format &format, InputOutputDescInfo &input) { | |||
| uint32_t n, c, h, w; | |||
| n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N; | |||
| c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C; | |||
| h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H; | |||
| w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W; | |||
| if (model_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||
| input.shape_info.num = model_input_dims[n]; | |||
| input.shape_info.height = model_input_dims[h]; | |||
| input.shape_info.width = model_input_dims[w]; | |||
| input.shape_info.channel = model_input_dims[c]; | |||
| } | |||
| for (auto model_input_dim : model_input_dims) { | |||
| input.shape_info.dims.push_back(model_input_dim); | |||
| } | |||
| input.shape_info.shape_ranges = shape_ranges; | |||
| return; | |||
| } | |||
| void HybridModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) { | |||
| std::vector<std::pair<int64_t,int64_t>> shape_ranges; | |||
| if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) { | |||
| // When static aipp is set, need to get the model input dims which processed by aipp | |||
| vector<int64_t> model_input_dims; | |||
| (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims); | |||
| SetInputDimsAndShapeRangesInfo(model_input_dims, shape_ranges, format, input); | |||
| return; | |||
| } | |||
| // judge if this data is linked dynamic aipp first, multiply batch has been considered | |||
| if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { | |||
| vector<int64_t> dynamic_aipp_input_dims; | |||
| (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); | |||
| SetInputDimsAndShapeRangesInfo(dynamic_aipp_input_dims, shape_ranges, format, input); | |||
| return; | |||
| } else { | |||
| vector<int64_t> input_dims = op_desc->GetInputDescPtr(0)->GetShape().GetDims(); | |||
| op_desc->GetInputDescPtr(0)->GetShapeRange(shape_ranges); | |||
| SetInputDimsAndShapeRangesInfo(input_dims, shape_ranges, format, input); | |||
| return; | |||
| } | |||
| } | |||
| Status HybridModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) { | |||
| auto node_item_list = root_graph_item_->GetInputNodes(); | |||
| for (auto &node_item : node_item_list) { | |||
| InputOutputDescInfo input; | |||
| GE_CHECK_NOTNULL(node_item->node); | |||
| auto op_desc = node_item->node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); | |||
| Format format = op_desc->GetInputDescPtr(0)->GetFormat(); | |||
| input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); | |||
| input.name = op_desc->GetName(); | |||
| int64_t input_size = 0; | |||
| GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); | |||
| // support dynamic shape | |||
| if (input_size < 0) { | |||
| GELOGD("dynamic shape scene, input size is unknown. " | |||
| "format=%d, data_type=%d, input_size=%ld", | |||
| format, input.data_type, input_size); | |||
| input_size = kMemSizeUnknownShape; // -1 | |||
| } | |||
| // not support dynamic shape input for now, so input_size here will be not less than zero. | |||
| input.size = input_size; | |||
| CreateInputDimsInfo(op_desc, format, input); | |||
| formats.push_back(format); | |||
| input_desc.push_back(input); | |||
| } | |||
| is_new_model_desc_ = false; | |||
| return SUCCESS; | |||
| } | |||
| void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output_desc_info, uint32_t &format_result) { | |||
| GE_IF_BOOL_EXEC(output_desc == nullptr, GELOGE(FAILED, "output desc ptr is nullptr"); return ); | |||
| Format format = output_desc->GetFormat(); | |||
| GeShape shape = output_desc->GetShape(); | |||
| std::vector<std::pair<int64_t,int64_t>> shape_ranges; | |||
| output_desc->GetShapeRange(shape_ranges); | |||
| DataType data_type = output_desc->GetDataType(); | |||
| int64_t dims[] = {1, 1, 1, 1}; | |||
| format_result = format; | |||
| if (format == FORMAT_ND) { // for ND tensor | |||
| for (size_t i = 0; i < shape.GetDimNum() && i < (sizeof(dims) / sizeof(dims[0])); i++) { | |||
| dims[i] = shape.GetDim(i); | |||
| } | |||
| } else { // FOR FORMAT_NHWC or FORMAT_NCHW | |||
| dims[0] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N); // 0: first dim | |||
| dims[1] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C); // 1: second dim | |||
| dims[2] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H); // 2: third dim | |||
| dims[3] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W); // 3: forth dim | |||
| } | |||
| output_desc_info.shape_info.num = dims[0]; // 0: first dim | |||
| output_desc_info.shape_info.channel = dims[1]; // 1: second dim | |||
| output_desc_info.shape_info.height = dims[2]; // 2: third dim | |||
| output_desc_info.shape_info.width = dims[3]; // 3: forth dim | |||
| if (format == FORMAT_FRACTAL_Z) { // FraczToHWCK | |||
| int64_t k = shape.GetDim(0); // 0: first dim | |||
| int64_t c = shape.GetDim(1); // 1: second dim | |||
| int64_t h = shape.GetDim(2); // 2: third dim | |||
| int64_t w = shape.GetDim(3); // 3: forth dim | |||
| output_desc_info.shape_info.dims.push_back(h); | |||
| output_desc_info.shape_info.dims.push_back(w); | |||
| output_desc_info.shape_info.dims.push_back(c); | |||
| output_desc_info.shape_info.dims.push_back(k); | |||
| if (shape_ranges.size() == 4) { // 4 dims | |||
| output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[2]); // h:2 | |||
| output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[3]); // w:3 | |||
| output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[1]); // c:1 | |||
| output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[0]); // k:0 | |||
| } | |||
| format_result = FORMAT_HWCN; | |||
| } else { | |||
| for (size_t j = 0; j < shape.GetDimNum(); j++) { | |||
| output_desc_info.shape_info.dims.push_back(shape.GetDim(j)); | |||
| } | |||
| output_desc_info.shape_info.shape_ranges = shape_ranges; | |||
| } | |||
| int64_t tensor_size = 0; | |||
| (void)TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); | |||
| output_desc_info.size = static_cast<uint64_t>(tensor_size); | |||
| output_desc_info.data_type = output_desc->GetDataType(); | |||
| } | |||
| Status HybridModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) { | |||
| std::vector<ConstGeTensorDescPtr> output_desc_list; | |||
| GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed"); // output_desc_list contains vaild input desc | |||
| vector<std::string> out_node_names; | |||
| (void)ge::AttrUtils::GetListStr(ge_root_model_->GetRootGraph(), ATTR_MODEL_OUT_NODES_NAME, out_node_names); | |||
| GE_CHECK_NOTNULL(root_graph_item_->GetOutputNode()); | |||
| auto op_desc = root_graph_item_->GetOutputNode()->op_desc; | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| auto out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||
| GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(), FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size()); | |||
| for (uint32_t index = 0; index < out_size; ++index) { | |||
| string output_name; | |||
| std::vector<std::string> src_name = op_desc->GetSrcName(); | |||
| std::vector<int64_t> src_index = op_desc->GetSrcIndex(); | |||
| if (out_size == out_node_names.size()) { | |||
| bool contains_colon = out_node_names[index].find(":") != std::string::npos; | |||
| output_name = contains_colon ? out_node_names[index] : out_node_names[index] + ":" + std::to_string(src_index[index]); | |||
| } else { | |||
| output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); | |||
| } | |||
| InputOutputDescInfo output_desc_info; | |||
| output_desc_info.name = output_name; | |||
| uint32_t format_result; | |||
| CreateOutput(output_desc_list[index], output_desc_info, format_result); | |||
| output_desc.push_back(output_desc_info); | |||
| formats.push_back(format_result); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -83,6 +83,30 @@ class HybridModel { | |||
| const string &GetModelName() const; | |||
| Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type); | |||
| void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order); | |||
| void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | |||
| Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||
| vector<InputOutputDescInfo> &output_desc, | |||
| std::vector<uint32_t> &input_formats, | |||
| std::vector<uint32_t> &outputFormats); | |||
| Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats); | |||
| void CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output, uint32_t &format_result); | |||
| Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats); | |||
| void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); | |||
| void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } | |||
| void SetInputDimsAndShapeRangesInfo(const vector<int64_t> &model_input_dims, std::vector<std::pair<int64_t, int64_t>> &shape_ranges, | |||
| Format &format, InputOutputDescInfo &input); | |||
| private: | |||
| friend class HybridModelBuilder; | |||
| friend class HybridModelAsyncExecutor; | |||
| @@ -101,6 +125,8 @@ class HybridModel { | |||
| std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_; | |||
| std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | |||
| bool is_new_model_desc_ = false; // support aipp | |||
| // runtime fields | |||
| uint32_t device_id_ = 0; | |||
| uint32_t model_id_ = 0; | |||
| @@ -27,6 +27,8 @@ | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "hybrid/common/npu_memory_allocator.h" | |||
| #include "hybrid/node_executor/node_executor.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| namespace ge { | |||
| namespace hybrid { | |||
| @@ -37,6 +39,30 @@ const uint32_t kAlignment = 32; | |||
| const int kBytes = 8; | |||
| const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; | |||
| Status SetOutputNameAttr(ComputeGraph &graph) { | |||
| vector<string> output_names; | |||
| for (const auto &node : graph.GetDirectNode()) { | |||
| auto op_desc = node->GetOpDesc(); | |||
| if (op_desc == nullptr) { | |||
| continue; | |||
| } | |||
| auto op_type = op_desc->GetType(); | |||
| if (op_type == NETOUTPUT) { | |||
| for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { | |||
| const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||
| NodePtr in_node = peer_out_anchor->GetOwnerNode(); | |||
| GE_CHECK_NOTNULL(in_node); | |||
| output_names.push_back(in_node->GetName()); | |||
| } | |||
| } | |||
| } | |||
| GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&graph, ATTR_MODEL_OUT_NODES_NAME, output_names), | |||
| GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed."); | |||
| return FAILED); | |||
| return SUCCESS; | |||
| } | |||
| int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { | |||
| int64_t var_size = 0; | |||
| auto data_type = desc.GetDataType(); | |||
| @@ -939,6 +965,10 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr | |||
| Status HybridModelBuilder::IndexTaskDefs() { | |||
| const auto &root_graph = ge_root_model_->GetRootGraph(); | |||
| if (SetOutputNameAttr(*root_graph) != SUCCESS) { | |||
| GELOGW("Set output name attr failed."); | |||
| } | |||
| for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { | |||
| auto &name = it.first; | |||
| auto &ge_model = it.second; | |||
| @@ -19,6 +19,7 @@ | |||
| #include "framework/common/debug/log.h" | |||
| #include "hybrid/executor/hybrid_execution_context.h" | |||
| #include "hybrid/node_executor/aicore/aicore_task_builder.h" | |||
| #include "graph/load/new_model_manager/tbe_handle_store.h" | |||
| using optiling::OpRunInfo; | |||
| @@ -36,6 +37,58 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) | |||
| return SUCCESS; | |||
| } | |||
| Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||
| auto op_desc_ptr = make_shared<OpDesc>(op_desc); | |||
| GE_CHECK_NOTNULL(op_desc_ptr); | |||
| auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||
| if (tbe_kernel == nullptr) { | |||
| GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | |||
| rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| void *bin_handle = nullptr; | |||
| if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | |||
| GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | |||
| rtDevBinary_t binary; | |||
| std::string json_string; | |||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), | |||
| GELOGI("Get original type of session_graph_id.")); | |||
| if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | |||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | |||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { | |||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF; | |||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { | |||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; | |||
| } else { | |||
| GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| binary.version = 0; | |||
| binary.data = tbe_kernel->GetBinData(); | |||
| binary.length = tbe_kernel->GetBinDataSize(); | |||
| GELOGI("TBE: binary.length: %lu", binary.length); | |||
| GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); | |||
| std::string meta_data; | |||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data), | |||
| GELOGI("Get original type of json_string")); | |||
| GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | |||
| GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | |||
| kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); | |||
| } else { | |||
| GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | |||
| kernel_store.ReferTBEHandle(stub_name_.c_str()); | |||
| } | |||
| std::string kernel_name; | |||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name), | |||
| GELOGI("Get original type of kernel_name")); | |||
| GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); | |||
| GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0)); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||
| GE_CHK_STATUS_RET(ValidateTaskDef(task_def), | |||
| "[%s] Failed to validate task def: [%s]", | |||
| @@ -45,6 +98,9 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| const domi::KernelContext &context = kernel_def.context(); | |||
| stub_name_ = kernel_def.stub_func(); | |||
| GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); | |||
| GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); | |||
| args_size_ = kernel_def.args_size(); | |||
| block_dim_ = kernel_def.block_dim(); | |||
| @@ -62,6 +62,7 @@ class AiCoreOpTask { | |||
| static Status ValidateTaskDef(const domi::TaskDef &task_def); | |||
| Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); | |||
| Status InitTilingInfo(const OpDesc &op_desc); | |||
| Status RegisterTbeHandle(const OpDesc &op_desc); | |||
| std::string stub_name_; | |||
| void *stub_func_ = nullptr; | |||
| @@ -234,6 +234,22 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||
| ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data, | |||
| ge::RunModelData &output_data, bool async_mode = false); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Synchronous execution of offline model(Do not create thread) | |||
| /// @param [in] uint32_t model_id: Model ID to execute | |||
| /// @param [in] void* stream: stream to execute | |||
| /// @param [in] bool async_mode: is asynchronize mode. | |||
| /// @param [in] const domi::InputData *input_data: Model input data | |||
| /// @param [in] const std::vector<GeTensorDesc> &input_desc: description of model input data | |||
| /// @param [out] domi::OutputData *output_data: Model output data | |||
| /// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data | |||
| /// @return SUCCESS handle successfully / others handle failed | |||
| /// | |||
| ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, | |||
| const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data, | |||
| std::vector<GeTensorDesc> &output_desc, bool async_mode = false); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Get weight memory size from model file | |||