| @@ -639,15 +639,6 @@ set(INFER_SRC_LIST | |||||
| "graph/load/model_manager/task_info/model_exit_task_info.cc" | "graph/load/model_manager/task_info/model_exit_task_info.cc" | ||||
| "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | ||||
| "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | ||||
| "single_op/task/op_task.cc" | |||||
| "single_op/task/build_task_utils.cc" | |||||
| "single_op/task/tbe_task_builder.cc" | |||||
| "single_op/task/aicpu_task_builder.cc" | |||||
| "single_op/task/aicpu_kernel_task_builder.cc" | |||||
| "single_op/single_op.cc" | |||||
| "single_op/single_op_model.cc" | |||||
| "single_op/stream_resource.cc" | |||||
| "single_op/single_op_manager.cc" | |||||
| "hybrid/hybrid_davinci_model_stub.cc" | "hybrid/hybrid_davinci_model_stub.cc" | ||||
| "ir_build/ge_ir_build.cc" | "ir_build/ge_ir_build.cc" | ||||
| "ir_build/atc_ir_common.cc" | "ir_build/atc_ir_common.cc" | ||||
| @@ -71,7 +71,7 @@ TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_s | |||||
| TensorValue::~TensorValue() { Destroy(); } | TensorValue::~TensorValue() { Destroy(); } | ||||
| void TensorValue::Destroy() { | void TensorValue::Destroy() { | ||||
| if (buffer_ != nullptr || ref_buffer_ != nullptr) { | |||||
| if (buffer_ != nullptr) { | |||||
| GELOGD("Unref tensor: %s", DebugString().c_str()); | GELOGD("Unref tensor: %s", DebugString().c_str()); | ||||
| buffer_.reset(); | buffer_.reset(); | ||||
| } | } | ||||
| @@ -71,12 +71,14 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||||
| GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | ||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | ||||
| HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call."); | |||||
| HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs), | |||||
| "Failed to execute partitioned call."); | |||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | ||||
| HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | ||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | ||||
| args.outputs.clear(); | |||||
| HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | ||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -131,10 +131,14 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue> | |||||
| } | } | ||||
| Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | ||||
| const std::vector<ConstGeTensorDescPtr> &input_desc) { | |||||
| const std::vector<ConstGeTensorDescPtr> &input_desc, | |||||
| const std::vector<TensorValue> &outputs) { | |||||
| GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false"); | GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false"); | ||||
| GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str()); | GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str()); | ||||
| if (!outputs.empty()) { | |||||
| GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs), | |||||
| "Failed to enable output zero copy by user provided outputs."); | |||||
| } | |||||
| if (!graph_item_->IsDynamic()) { | if (!graph_item_->IsDynamic()) { | ||||
| return ExecuteAsyncForKnownShape(inputs); | return ExecuteAsyncForKnownShape(inputs); | ||||
| } | } | ||||
| @@ -144,6 +148,11 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
| const std::vector<ConstGeTensorDescPtr> &input_desc) { | |||||
| return ExecuteAsync(inputs, input_desc, {}); | |||||
| } | |||||
| Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) { | Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) { | ||||
| GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str()); | GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str()); | ||||
| if (graph_item_->GetAllNodes().size() != 1) { | if (graph_item_->GetAllNodes().size() != 1) { | ||||
| @@ -440,5 +449,37 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs) { | |||||
| GELOGD("To enable zero copy, output number = %zu", outputs.size()); | |||||
| const auto &output_edges = graph_item_->GetOutputEdges(); | |||||
| // Op -> MetOutput, set the output tensor of Op that output to the NetOutput node | |||||
| if (outputs.size() != output_edges.size()) { | |||||
| GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu", | |||||
| output_edges.size(), | |||||
| outputs.size()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| for (size_t i = 0; i < outputs.size(); ++i) { | |||||
| auto &output_tensor = outputs[i]; | |||||
| auto &output_node = output_edges[i].first; | |||||
| int output_idx = output_edges[i].second; | |||||
| GELOGD("[%s] Set output tensor[%zu] to [%s]'s output[%d], tensor = %s", | |||||
| graph_item_->GetName().c_str(), | |||||
| i, | |||||
| output_node->NodeName().c_str(), | |||||
| output_idx, | |||||
| output_tensor.DebugString().c_str()); | |||||
| GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor), | |||||
| "[%s] Failed to set input tensor[%zu]", | |||||
| graph_item_->GetName().c_str(), | |||||
| i); | |||||
| } | |||||
| GELOGD("Done enabling zero copy for outputs successfully."); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -43,7 +43,19 @@ class SubgraphExecutor { | |||||
| * @param input_desc input tensor descriptions | * @param input_desc input tensor descriptions | ||||
| * @return SUCCESS on success, error code otherwise | * @return SUCCESS on success, error code otherwise | ||||
| */ | */ | ||||
| Status ExecuteAsync(const std::vector<TensorValue> &inputs, const std::vector<ConstGeTensorDescPtr> &input_desc); | |||||
| Status ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
| const std::vector<ConstGeTensorDescPtr> &input_desc); | |||||
| /** | |||||
| * Execute subgraph async, output tensor address(not data) and output tensor descriptions are | |||||
| * valid after this method returned | |||||
| * @param inputs input tensors | |||||
| * @param input_desc input tensor descriptions | |||||
| * @return SUCCESS on success, error code otherwise | |||||
| */ | |||||
| Status ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
| const std::vector<ConstGeTensorDescPtr> &input_desc, | |||||
| const std::vector<TensorValue> &outputs); | |||||
| /** | /** | ||||
| * Execute subgraph async, output tensor address(not data) and output tensor descriptions are | * Execute subgraph async, output tensor address(not data) and output tensor descriptions are | ||||
| @@ -75,6 +87,7 @@ class SubgraphExecutor { | |||||
| Status GetOutputs(std::vector<TensorValue> &outputs, std::vector<ConstGeTensorDescPtr> &output_desc); | Status GetOutputs(std::vector<TensorValue> &outputs, std::vector<ConstGeTensorDescPtr> &output_desc); | ||||
| private: | private: | ||||
| Status EnableOutputZeroCopy(const std::vector<TensorValue> &outputs); | |||||
| Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); | Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); | ||||
| static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); | static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); | ||||
| Status Init(const std::vector<TensorValue> &inputs, | Status Init(const std::vector<TensorValue> &inputs, | ||||
| @@ -40,9 +40,13 @@ HybridModel::~HybridModel() { | |||||
| GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); | GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); | ||||
| } | } | ||||
| Status HybridModel::Init() { | |||||
| Status HybridModel::Init(bool is_single_op) { | |||||
| GELOGD("Start to init hybrid model."); | GELOGD("Start to init hybrid model."); | ||||
| GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model."); | |||||
| if (is_single_op) { | |||||
| GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model."); | |||||
| } else { | |||||
| GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model."); | |||||
| } | |||||
| GELOGD("HybridModel initialized successfully."); | GELOGD("HybridModel initialized successfully."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -37,7 +37,7 @@ class HybridModel { | |||||
| ~HybridModel(); | ~HybridModel(); | ||||
| Status Init(); | |||||
| Status Init(bool is_single_op = false); | |||||
| const NodeItem *GetNodeItem(const NodePtr &node) const; | const NodeItem *GetNodeItem(const NodePtr &node) const; | ||||
| @@ -136,6 +136,7 @@ class HybridModel { | |||||
| uint32_t device_id_ = 0; | uint32_t device_id_ = 0; | ||||
| uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
| uint8_t *var_mem_base_ = nullptr; | uint8_t *var_mem_base_ = nullptr; | ||||
| std::unique_ptr<TensorBuffer> weight_buffer_; | |||||
| RuntimeParam root_runtime_param_; | RuntimeParam root_runtime_param_; | ||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| @@ -147,6 +147,21 @@ Status HybridModelBuilder::Build() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelBuilder::BuildForSingleOp() { | |||||
| GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel"); | |||||
| hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName(); | |||||
| GELOGI("[%s] Start to build hybrid model.", GetGraphName()); | |||||
| auto ret = ge_root_model_->GetSubgraphsInstanceNameToModel(); | |||||
| const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()]; | |||||
| GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model), | |||||
| "[%s] Failed to index task defs", GetGraphName()); | |||||
| GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName()); | |||||
| GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName()); | |||||
| GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); | |||||
| GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName()); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::ValidateParams() { | Status HybridModelBuilder::ValidateParams() { | ||||
| GE_CHECK_NOTNULL(ge_root_model_); | GE_CHECK_NOTNULL(ge_root_model_); | ||||
| GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph()); | GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph()); | ||||
| @@ -951,46 +966,71 @@ Status HybridModelBuilder::InitVariableTensors() { | |||||
| } | } | ||||
| Status HybridModelBuilder::InitWeights() { | Status HybridModelBuilder::InitWeights() { | ||||
| // For constant in root graph | |||||
| const auto &root_graph = ge_root_model_->GetRootGraph(); | |||||
| const auto &subgraph_models = ge_root_model_->GetSubgraphInstanceNameToModel(); | |||||
| auto iter = subgraph_models.find(root_graph->GetName()); | |||||
| if (iter == subgraph_models.end()) { | |||||
| GELOGD("Root graph model not found"); | |||||
| return SUCCESS; | |||||
| } | |||||
| auto &root_model = iter->second; | |||||
| const auto &weight_buffer = root_model->GetWeight(); | |||||
| if (weight_buffer.GetSize == 0) { | |||||
| GELOGD("weight is empty"); | |||||
| return SUCCESS; | |||||
| } | |||||
| auto allocator = NpuMemoryAllocator::GetAllocator(); | auto allocator = NpuMemoryAllocator::GetAllocator(); | ||||
| GE_CHECK_NOTNULL(allocator); | GE_CHECK_NOTNULL(allocator); | ||||
| for (auto &it : hybrid_model_.node_items_) { | |||||
| auto &node_item = it.second; | |||||
| if (node_item->node_type != CONSTANT) { | |||||
| hybrid_model_.weight_buffer_ = TensorBuffer::Create(allocator, weight_buffer.size()); | |||||
| GE_CHECK_NOTNULL(hybrid_model_.weight_buffer_); | |||||
| auto weight_base = reinterpret_cast<uint8_t *>(hybrid_model_.weight_buffer_->GetData()); | |||||
| GE_CHK_RT_RET(rtMemcpy(weight_base, | |||||
| hybrid_model_.weight_buffer_->GetSize(), | |||||
| weight_buffer.GetData(), | |||||
| weight_buffer.GetSize(), | |||||
| RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| GELOGI("Init weight mem successfully, weight base %p, weight size = %zu", | |||||
| weright_base, | |||||
| hybrid_model_.weight_buffer_->GetSize()); | |||||
| for (auto &node : root_graph->GetDirectNode()) { | |||||
| if (node->GetType() != CONSTANT) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| const auto &constant_node = node_item->node; | |||||
| auto op_desc = constant_node->GetOpDesc(); | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| auto v_weights = ModelUtils::GetWeights(op_desc); | auto v_weights = ModelUtils::GetWeights(op_desc); | ||||
| if (v_weights.empty()) { | if (v_weights.empty()) { | ||||
| GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", constant_node->GetName().c_str()); | |||||
| GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); | auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); | ||||
| auto output_desc = op_desc->MutableOutputDesc(0); | |||||
| GE_CHECK_NOTNULL(output_desc); | |||||
| auto tensor_size = ge_tensor->GetData().GetSize(); | |||||
| GELOGD("[%s] Start to init Constant node [%s], size = %ld", | |||||
| GE_CHECK_NOTNULL(ge_tensor); | |||||
| const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc(); | |||||
| int64_t tensor_size = 0; | |||||
| GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(&op_desc->MutableOutputDesc(0), tensor_size), | |||||
| "[%s] Failed to get tensor size", | |||||
| node->GetName().c_str()); | |||||
| int64_t data_offset = 0; | |||||
| GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset), | |||||
| "[%s] Failed to get data offset", | |||||
| node->GetName().c_str()); | |||||
| GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld", | |||||
| GetGraphName(), | GetGraphName(), | ||||
| constant_node->GetName().c_str(), | constant_node->GetName().c_str(), | ||||
| tensor_size); | |||||
| tensor_size, | |||||
| data_offset); | |||||
| auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); | |||||
| auto tensor_buffer = TensorBuffer::Create(weight_base + data_offset, tensor_size); | |||||
| GE_CHECK_NOTNULL(tensor_buffer); | GE_CHECK_NOTNULL(tensor_buffer); | ||||
| std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); | std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); | ||||
| GE_CHECK_NOTNULL(constant_tensor); | GE_CHECK_NOTNULL(constant_tensor); | ||||
| constant_tensor->SetName("Constant_" + op_desc->GetName()); | constant_tensor->SetName("Constant_" + op_desc->GetName()); | ||||
| if (tensor_size > 0) { | |||||
| GE_CHK_RT_RET(rtMemcpy(constant_tensor->MutableData(), | |||||
| constant_tensor->GetSize(), | |||||
| ge_tensor->GetData().data(), | |||||
| ge_tensor->GetData().size(), | |||||
| RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| } | |||||
| hybrid_model_.constant_tensors_.emplace(constant_node, std::move(constant_tensor)); | |||||
| GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), constant_node->GetName().c_str(), tensor_size); | |||||
| hybrid_model_.constant_tensors_.emplace(node, std::move(constant_tensor)); | |||||
| GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), node->GetName().c_str(), tensor_size); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1038,6 +1078,53 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtrs &sub_graph, const GeModelPtr &ge_model) { | |||||
| // index task defs | |||||
| GELOGD("To index tasks for subgraph: %s", sub_graph->GetName().c_str()); | |||||
| std::unordered_map<int64_t, NodePtr> node_map; | |||||
| for (const auto &node : sub_graph->GetDirectNode()) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| auto node_id = node->GetOpDesc()->GetId(); | |||||
| GELOGD("op_index = %ld, node_name = %s", node_id, node->GetName().c_str()); | |||||
| node_map.emplace(node_id, node); | |||||
| } | |||||
| auto tasks = ge_model->GetModelTaskDefPtr()->task(); | |||||
| for (int i = 0; i < tasks.size(); ++i) { | |||||
| const domi::TaskDef &task_def = tasks[i]; | |||||
| GELOGI("Task id = %d, task type = %d", i, task_def.type()); | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
| uint32_t op_index = -1; | |||||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
| op_index = task_def.kernel().context().op_index(); | |||||
| } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||||
| op_index = task_def.kernel_ex().op_index(); | |||||
| } else if (task_type == RT_MODEL_TASK_HCCL) { | |||||
| op_index = task_def.kernel_hccl().op_index(); | |||||
| } else { | |||||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||||
| continue; | |||||
| } | |||||
| auto iter = node_map.find(op_index); | |||||
| if (iter == node_map.end()) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to get node by index = %u", op_index); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| auto &node = iter->second; | |||||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
| ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); | |||||
| } | |||||
| GELOGD("Task loaded for node: %s, task type = %d, op_index = %u", node->GetName().c_str(), task_type, op_index); | |||||
| hybrid_model_.task_defs_[node].emplace_back(task_def); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::IndexTaskDefs() { | Status HybridModelBuilder::IndexTaskDefs() { | ||||
| const auto &root_graph = ge_root_model_->GetRootGraph(); | const auto &root_graph = ge_root_model_->GetRootGraph(); | ||||
| if (SetOutputNameAttr(*root_graph) != SUCCESS) { | if (SetOutputNameAttr(*root_graph) != SUCCESS) { | ||||
| @@ -35,6 +35,7 @@ class HybridModelBuilder { | |||||
| explicit HybridModelBuilder(HybridModel &hybrid_model); | explicit HybridModelBuilder(HybridModel &hybrid_model); | ||||
| ~HybridModelBuilder() = default; | ~HybridModelBuilder() = default; | ||||
| Status Build(); | Status Build(); | ||||
| Status BuildForSingleOp(); | |||||
| private: | private: | ||||
| static Status UpdateAnchorStatus(const NodePtr &node); | static Status UpdateAnchorStatus(const NodePtr &node); | ||||
| @@ -64,6 +65,7 @@ class HybridModelBuilder { | |||||
| Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies); | Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies); | ||||
| Status ParseDependentForFusedSubgraph(NodeItem &node_item); | Status ParseDependentForFusedSubgraph(NodeItem &node_item); | ||||
| Status IndexTaskDefs(); | Status IndexTaskDefs(); | ||||
| Status IndexTaskDefs(const ComputeGraphPtrs &sub_graph, const GeModelPtr &ge_model); | |||||
| Status IndexSpecialNodes(); | Status IndexSpecialNodes(); | ||||
| Status InitRuntimeParams(); | Status InitRuntimeParams(); | ||||
| Status InitModelMem(); | Status InitModelMem(); | ||||
| @@ -251,6 +251,10 @@ Status TaskContext::AllocateOutput(int index, | |||||
| } | } | ||||
| } | } | ||||
| if (outputs_start_[index].GetSize() > 0) { | |||||
| reMemset(output_start_[index].MutableData(), outputs_start_[index].GetSize(), 0, outputs_start_[index].GetSize()); | |||||
| } | |||||
| if (execution_context_->trace_enabled) { | if (execution_context_->trace_enabled) { | ||||
| outputs_start_[index].SetName(node_item_->NodeName() + "_out_" + std::to_string(index)); | outputs_start_[index].SetName(node_item_->NodeName() + "_out_" + std::to_string(index)); | ||||
| } | } | ||||
| @@ -397,7 +401,7 @@ Status TaskContext::PropagateOutputs() { | |||||
| subgraph_context_->all_inputs_[input_offset] = *tensor; | subgraph_context_->all_inputs_[input_offset] = *tensor; | ||||
| if (execution_context_->trace_enabled) { | if (execution_context_->trace_enabled) { | ||||
| subgraph_context_->all_inputs_[input_offset].SetName( | subgraph_context_->all_inputs_[input_offset].SetName( | ||||
| node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx)); | |||||
| dst_node_item->NodeName() + "_in_" + std::to_string(dst_input_idx)); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -256,10 +256,27 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||||
| const vector<DataBuffer> &input_buffers, | const vector<DataBuffer> &input_buffers, | ||||
| vector<GeTensorDesc> &output_desc, | vector<GeTensorDesc> &output_desc, | ||||
| vector<DataBuffer> &output_buffers) { | vector<DataBuffer> &output_buffers) { | ||||
| GE_CHECK_NOTNULL(op_task_); | |||||
| GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | ||||
| std::lock_guard<std::mutex> lk(*stream_mutex_); | |||||
| if (hybrid_model_executor_ != nullptr) { | |||||
| GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | |||||
| hybrid::HybridModelExecutor::ExecuteArgs args; | |||||
| for (auto &input : input_buffers) { | |||||
| args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); | |||||
| } | |||||
| for (auto &output : output_buffers) { | |||||
| args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); | |||||
| } | |||||
| for (auto &tensor_desc : input_desc) { | |||||
| auto desc = MakeShared<GeTensorDesc>(tensor_desc); | |||||
| GE_CHECK_NOTNULL(desc); | |||||
| args.input_desc.emplace_back(desc); | |||||
| } | |||||
| return hybrid_model_executor_->Execute(args); | |||||
| } | |||||
| std::lock_guard<std::mutex> lk(*stream_mutex_); | |||||
| GE_CHECK_NOTNULL(op_task_); | |||||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | ||||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -28,6 +28,7 @@ | |||||
| #include "runtime/stream.h" | #include "runtime/stream.h" | ||||
| #include "task/op_task.h" | #include "task/op_task.h" | ||||
| #include "cce/aicpu_engine_struct.h" | #include "cce/aicpu_engine_struct.h" | ||||
| #include "hybrid/executor/hybrid_model_executor.h" | |||||
| namespace ge { | namespace ge { | ||||
| class StreamResource; | class StreamResource; | ||||
| @@ -46,7 +47,7 @@ class SingleOp { | |||||
| Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
| friend class SingleOpModel; | friend class SingleOpModel; | ||||
| StreamResource *stream_resource_; | |||||
| StreamResource *stream_resource_ = nullptr; | |||||
| std::mutex *stream_mutex_; | std::mutex *stream_mutex_; | ||||
| rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
| std::vector<void *> input_addr_list_; | std::vector<void *> input_addr_list_; | ||||
| @@ -77,6 +78,8 @@ class DynamicSingleOp { | |||||
| std::vector<DataBuffer> &outputs) const; | std::vector<DataBuffer> &outputs) const; | ||||
| std::unique_ptr<OpTask> op_task_; | std::unique_ptr<OpTask> op_task_; | ||||
| std::unique_ptr<hybrid::HybridModel> hybrid_model_; | |||||
| std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | |||||
| uintptr_t resource_id_ = 0; | uintptr_t resource_id_ = 0; | ||||
| std::mutex *stream_mutex_; | std::mutex *stream_mutex_; | ||||
| rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
| @@ -31,6 +31,8 @@ | |||||
| #include "task/aicpu_task_builder.h" | #include "task/aicpu_task_builder.h" | ||||
| #include "task/aicpu_kernel_task_builder.h" | #include "task/aicpu_kernel_task_builder.h" | ||||
| #include "task/tbe_task_builder.h" | #include "task/tbe_task_builder.h" | ||||
| #include "hybrid/executor/hybrid_model_executor.h" | |||||
| #include "hybrid/node_executor/node_executor.h" | |||||
| static std::atomic<std::uint64_t> aicpu_kernel_id(0); | static std::atomic<std::uint64_t> aicpu_kernel_id(0); | ||||
| @@ -42,6 +44,20 @@ namespace ge { | |||||
| namespace { | namespace { | ||||
| const size_t kDataOutputNum = 1; | const size_t kDataOutputNum = 1; | ||||
| } // namespace | } // namespace | ||||
| static Status IfInferDepend(HeModelPtr &ge_model, bool &flag) { | |||||
| auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph); | |||||
| for (const auto &node : comp_graph->GetAllNodes()) { | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| const auto &depends = op_desc->GetOpInferDepends(); | |||||
| if (!depends.empty()) { | |||||
| flag = true; | |||||
| return SUCCESS; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) | SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) | ||||
| : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {} | : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {} | ||||
| @@ -478,6 +494,26 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||||
| single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | ||||
| GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | ||||
| model_params_.memory_size = UINT_MAX; | model_params_.memory_size = UINT_MAX; | ||||
| auto ge_model = model_helper_.GetGeModel(); | |||||
| GE_CHECK_NOTNULL(ge_model); | |||||
| bool infer_depend_flag = false; | |||||
| GE_CHK_STATUS_RET_NOLOG(IfInferDepend(ge_model, infer_depend_flag)); | |||||
| if (ge_model->GetModelTaskDefPtr()->task_size() > 1 || infer_depend_flag) { | |||||
| GELOGD("Build single op HybridModel."); | |||||
| GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); | |||||
| single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(model_helper_.GetGeRootModel())); | |||||
| GE_CHECK_NOTNULL(single_op.hybrid_model_); | |||||
| GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "Failed to init hybrid model"); | |||||
| int32_t device_id = 0; | |||||
| GE_CHK_RT_RET(rtGetDevice(&device_id)); | |||||
| single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), | |||||
| device_id, | |||||
| resource.GetStream())); | |||||
| GE_CHECK_NOTNULL(single_op.hybrid_model_executor_); | |||||
| GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model"); | |||||
| return SUCCESS; | |||||
| } | |||||
| return BuildTaskListForDynamicOp(single_op); | return BuildTaskListForDynamicOp(single_op); | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -61,6 +61,10 @@ DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) { | |||||
| return it->second.get(); | return it->second.get(); | ||||
| } | } | ||||
| rtStream_t StreamResource::GetStream() const { | |||||
| return stream_; | |||||
| } | |||||
| void StreamResource::SetStream(rtStream_t stream) { | void StreamResource::SetStream(rtStream_t stream) { | ||||
| stream_ = stream; | stream_ = stream; | ||||
| } | } | ||||
| @@ -37,6 +37,7 @@ class StreamResource { | |||||
| StreamResource(StreamResource &&) = delete; | StreamResource(StreamResource &&) = delete; | ||||
| StreamResource &operator=(const StreamResource &) = delete; | StreamResource &operator=(const StreamResource &) = delete; | ||||
| StreamResource &operator=(StreamResource &&) = delete; | StreamResource &operator=(StreamResource &&) = delete; | ||||
| rtStream_t GetStream() const; | |||||
| void SetStream(rtStream_t stream); | void SetStream(rtStream_t stream); | ||||
| SingleOp *GetOperator(const void *key); | SingleOp *GetOperator(const void *key); | ||||
| @@ -562,6 +562,46 @@ set(SINGLE_OP_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/single_op/single_op_manager.cc" | "${GE_CODE_DIR}/ge/single_op/single_op_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/single_op/task/aicpu_task_builder.cc" | "${GE_CODE_DIR}/ge/single_op/task/aicpu_task_builder.cc" | ||||
| "${GE_CODE_DIR}/ge/single_op/task/aicpu_kernel_task_builder.cc" | "${GE_CODE_DIR}/ge/single_op/task/aicpu_kernel_task_builder.cc" | ||||
| "${GE_CODE_DIR}/ge/hybrid/common/tensor_value.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/common/npu_memory_allocator.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/rt_callback_manager.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/node_state.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/node_done_manager.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_profiler.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_model_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_model_async_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/hybrid_execution_context.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/subgraph_context.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/subgraph_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/worker/task_compile_engine.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/worker/shape_inference_engine.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/executor/worker/execution_engine.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/model/hybrid_model.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/model/hybrid_model_builder.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/model/node_item.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/model/graph_item.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_node_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_op_task.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_task_builder.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel_factory.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/host_cpu/kernel/data_kernel.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/controlop/control_op_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/hccl/hccl_node_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/rts/rts_node_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/node_executor.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/task_context.cc" | |||||
| "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model.cc" | |||||
| ) | ) | ||||
| # test files | # test files | ||||
| @@ -17,7 +17,6 @@ | |||||
| #include <gtest/gtest.h> | #include <gtest/gtest.h> | ||||
| #include <vector> | #include <vector> | ||||
| #include "cce/taskdown_common.hpp" | |||||
| #include "runtime/rt.h" | #include "runtime/rt.h" | ||||
| #define protected public | #define protected public | ||||