From: @zhengyuanhua Reviewed-by: @youui,@xchu42 Signed-off-by: @youuitags/v1.2.0
| @@ -4021,14 +4021,18 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des | |||
| } else { | |||
| compute_graph_info.model_name = name_; | |||
| } | |||
| std::vector<Format> format = { FORMAT_NULL }; | |||
| std::vector<std::vector<int64_t>> shape = { {0} }; | |||
| std::vector<DataType> data_type = { DT_UNDEFINED }; | |||
| compute_graph_info.op_name = op_desc.op_name; | |||
| compute_graph_info.op_type = op_desc.op_type; | |||
| compute_graph_info.input_format = op_desc.input_format; | |||
| compute_graph_info.input_shape = op_desc.input_shape; | |||
| compute_graph_info.input_data_type = op_desc.input_data_type; | |||
| compute_graph_info.output_format = op_desc.output_format; | |||
| compute_graph_info.output_shape = op_desc.output_shape; | |||
| compute_graph_info.output_data_type = op_desc.output_data_type; | |||
| compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; | |||
| compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; | |||
| compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; | |||
| compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; | |||
| compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; | |||
| compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| auto iter = profiler_report_op_info_.find(op_desc.op_name); | |||
| @@ -171,43 +171,9 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel | |||
| GE_CHECK_NOTNULL(model); | |||
| GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); | |||
| compute_graph_info = context_->GetProfilingGraphDescInfo(); | |||
| context_->ClearProfilingGraphDescInfo(); | |||
| std::string dynamic_model_name = model->GetModelName(); | |||
| auto op_desc = node->GetOpDesc(); | |||
| if (op_desc == nullptr) { | |||
| GELOGE(PARAM_INVALID, "op_desc is nullptr."); | |||
| return PARAM_INVALID; | |||
| } | |||
| auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||
| if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||
| op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||
| ComputeGraphDescInfo tmp_compute_graph_info; | |||
| tmp_compute_graph_info.model_name = dynamic_model_name; | |||
| tmp_compute_graph_info.op_name = op_desc->GetName(); | |||
| tmp_compute_graph_info.op_type = op_desc->GetType(); | |||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
| GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||
| if (input_desc == nullptr) { | |||
| continue; | |||
| } | |||
| tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||
| tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||
| tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||
| } | |||
| for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||
| GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||
| tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||
| tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||
| tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||
| } | |||
| tmp_compute_graph_info.task_id = context_->GetTaskId(); | |||
| tmp_compute_graph_info.stream_id = context_->GetStreamId(); | |||
| compute_graph_info.emplace_back(tmp_compute_graph_info); | |||
| GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str()); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -183,7 +183,16 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | |||
| GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | |||
| // save profiling data | |||
| (void)context.SaveProfilingTaskDescInfo(kTaskTypeAicore, (*it)->GetBlockDim()); | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
| return FAILED; | |||
| } | |||
| GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | |||
| (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
| } | |||
| @@ -191,8 +191,16 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||
| HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str()); | |||
| // save profiling data | |||
| (void)context.SaveProfilingTaskDescInfo(kTaskTypeAicpu, 0); | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
| return FAILED; | |||
| } | |||
| GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | |||
| (void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||
| auto callback = [=, &context]() { | |||
| GELOGD("Node[%s] callback start.", node_name_.c_str()); | |||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | |||
| @@ -508,21 +508,12 @@ Status TaskContext::Synchronize() { | |||
| return execution_context_->Synchronize(GetStream()); | |||
| } | |||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim) { | |||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||
| uint32_t task_type, uint32_t block_dim) { | |||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||
| const NodeItem &node_item = GetNodeItem(); | |||
| auto op_desc = node_item.GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
| return rt_ret; | |||
| } | |||
| GELOGD("Node[%s] task_id: %u, stream_id: %u.", GetNodeName(), task_id, stream_id); | |||
| const GraphExecutionContext * graph_context = GetExecutionContext(); | |||
| GE_CHECK_NOTNULL(graph_context); | |||
| const HybridModel *model = graph_context->model; | |||
| @@ -544,5 +535,59 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block | |||
| return SUCCESS; | |||
| } | |||
| Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { | |||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||
| const NodeItem &node_item = GetNodeItem(); | |||
| auto op_desc = node_item.GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| const GraphExecutionContext * graph_context = GetExecutionContext(); | |||
| GE_CHECK_NOTNULL(graph_context); | |||
| const HybridModel *model = graph_context->model; | |||
| GE_CHECK_NOTNULL(model); | |||
| std::string dynamic_model_name = model->GetModelName(); | |||
| auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||
| if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||
| op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||
| ComputeGraphDescInfo tmp_compute_graph_info; | |||
| tmp_compute_graph_info.model_name = dynamic_model_name; | |||
| tmp_compute_graph_info.op_name = op_desc->GetName(); | |||
| tmp_compute_graph_info.op_type = op_desc->GetType(); | |||
| // default | |||
| if (op_desc->GetAllInputsSize() == 0) { | |||
| tmp_compute_graph_info.input_format = { FORMAT_NULL }; | |||
| tmp_compute_graph_info.input_shape = { {0} }; | |||
| tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; | |||
| } | |||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
| GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||
| if (input_desc == nullptr) { | |||
| continue; | |||
| } | |||
| tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||
| tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||
| tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||
| } | |||
| if (op_desc->GetOutputsSize() == 0) { | |||
| tmp_compute_graph_info.output_format = { FORMAT_NULL }; | |||
| tmp_compute_graph_info.output_shape = { {0} }; | |||
| tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; | |||
| } | |||
| for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||
| GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||
| tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||
| tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||
| tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||
| } | |||
| tmp_compute_graph_info.task_id = task_id; | |||
| tmp_compute_graph_info.stream_id = stream_id; | |||
| compute_graph_info.emplace_back(tmp_compute_graph_info); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -110,9 +110,13 @@ class TaskContext { | |||
| void *handle_ = nullptr; | |||
| const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | |||
| Status SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim); | |||
| Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); | |||
| void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | |||
| const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; } | |||
| Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); | |||
| void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } | |||
| private: | |||
| TaskContext(GraphExecutionContext *execution_context, | |||
| const NodeItem *node_item, | |||
| @@ -133,6 +137,7 @@ class TaskContext { | |||
| uint32_t task_id_ = 0; | |||
| uint32_t stream_id_ = 0; | |||
| std::vector<TaskDescInfo> task_desc_info; | |||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||
| }; | |||
| } // namespace hybrid | |||
| } // namespace ge | |||