@@ -4021,14 +4021,18 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des | |||||
} else { | } else { | ||||
compute_graph_info.model_name = name_; | compute_graph_info.model_name = name_; | ||||
} | } | ||||
std::vector<Format> format = { FORMAT_NULL }; | |||||
std::vector<std::vector<int64_t>> shape = { {0} }; | |||||
std::vector<DataType> data_type = { DT_UNDEFINED }; | |||||
compute_graph_info.op_name = op_desc.op_name; | compute_graph_info.op_name = op_desc.op_name; | ||||
compute_graph_info.op_type = op_desc.op_type; | compute_graph_info.op_type = op_desc.op_type; | ||||
compute_graph_info.input_format = op_desc.input_format; | |||||
compute_graph_info.input_shape = op_desc.input_shape; | |||||
compute_graph_info.input_data_type = op_desc.input_data_type; | |||||
compute_graph_info.output_format = op_desc.output_format; | |||||
compute_graph_info.output_shape = op_desc.output_shape; | |||||
compute_graph_info.output_data_type = op_desc.output_data_type; | |||||
compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; | |||||
compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; | |||||
compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; | |||||
compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; | |||||
compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; | |||||
compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; | |||||
uint32_t task_id = 0; | uint32_t task_id = 0; | ||||
uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
auto iter = profiler_report_op_info_.find(op_desc.op_name); | auto iter = profiler_report_op_info_.find(op_desc.op_name); | ||||
@@ -171,43 +171,9 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel | |||||
GE_CHECK_NOTNULL(model); | GE_CHECK_NOTNULL(model); | ||||
GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); | GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); | ||||
compute_graph_info = context_->GetProfilingGraphDescInfo(); | |||||
context_->ClearProfilingGraphDescInfo(); | |||||
std::string dynamic_model_name = model->GetModelName(); | |||||
auto op_desc = node->GetOpDesc(); | |||||
if (op_desc == nullptr) { | |||||
GELOGE(PARAM_INVALID, "op_desc is nullptr."); | |||||
return PARAM_INVALID; | |||||
} | |||||
auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||||
op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||||
ComputeGraphDescInfo tmp_compute_graph_info; | |||||
tmp_compute_graph_info.model_name = dynamic_model_name; | |||||
tmp_compute_graph_info.op_name = op_desc->GetName(); | |||||
tmp_compute_graph_info.op_type = op_desc->GetType(); | |||||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||||
if (input_desc == nullptr) { | |||||
continue; | |||||
} | |||||
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||||
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||||
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||||
} | |||||
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||||
GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||||
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||||
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||||
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||||
} | |||||
tmp_compute_graph_info.task_id = context_->GetTaskId(); | |||||
tmp_compute_graph_info.stream_id = context_->GetStreamId(); | |||||
compute_graph_info.emplace_back(tmp_compute_graph_info); | |||||
GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str()); | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -183,7 +183,16 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | ||||
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | ||||
// save profiling data | // save profiling data | ||||
(void)context.SaveProfilingTaskDescInfo(kTaskTypeAicore, (*it)->GetBlockDim()); | |||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
return FAILED; | |||||
} | |||||
GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | |||||
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
} | } | ||||
@@ -191,8 +191,16 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str()); | HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str()); | ||||
// save profiling data | // save profiling data | ||||
(void)context.SaveProfilingTaskDescInfo(kTaskTypeAicpu, 0); | |||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
return FAILED; | |||||
} | |||||
GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | |||||
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||||
auto callback = [=, &context]() { | auto callback = [=, &context]() { | ||||
GELOGD("Node[%s] callback start.", node_name_.c_str()); | GELOGD("Node[%s] callback start.", node_name_.c_str()); | ||||
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | ||||
@@ -500,21 +500,12 @@ Status TaskContext::Synchronize() { | |||||
return execution_context_->Synchronize(GetStream()); | return execution_context_->Synchronize(GetStream()); | ||||
} | } | ||||
Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim) { | |||||
Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||||
uint32_t task_type, uint32_t block_dim) { | |||||
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | ||||
const NodeItem &node_item = GetNodeItem(); | const NodeItem &node_item = GetNodeItem(); | ||||
auto op_desc = node_item.GetOpDesc(); | auto op_desc = node_item.GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
return rt_ret; | |||||
} | |||||
GELOGD("Node[%s] task_id: %u, stream_id: %u.", GetNodeName(), task_id, stream_id); | |||||
const GraphExecutionContext * graph_context = GetExecutionContext(); | const GraphExecutionContext * graph_context = GetExecutionContext(); | ||||
GE_CHECK_NOTNULL(graph_context); | GE_CHECK_NOTNULL(graph_context); | ||||
const HybridModel *model = graph_context->model; | const HybridModel *model = graph_context->model; | ||||
@@ -536,5 +527,59 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { | |||||
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||||
const NodeItem &node_item = GetNodeItem(); | |||||
auto op_desc = node_item.GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
const GraphExecutionContext * graph_context = GetExecutionContext(); | |||||
GE_CHECK_NOTNULL(graph_context); | |||||
const HybridModel *model = graph_context->model; | |||||
GE_CHECK_NOTNULL(model); | |||||
std::string dynamic_model_name = model->GetModelName(); | |||||
auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||||
op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||||
ComputeGraphDescInfo tmp_compute_graph_info; | |||||
tmp_compute_graph_info.model_name = dynamic_model_name; | |||||
tmp_compute_graph_info.op_name = op_desc->GetName(); | |||||
tmp_compute_graph_info.op_type = op_desc->GetType(); | |||||
// default | |||||
if (op_desc->GetAllInputsSize() == 0) { | |||||
tmp_compute_graph_info.input_format = { FORMAT_NULL }; | |||||
tmp_compute_graph_info.input_shape = { {0} }; | |||||
tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; | |||||
} | |||||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||||
if (input_desc == nullptr) { | |||||
continue; | |||||
} | |||||
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||||
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||||
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||||
} | |||||
if (op_desc->GetOutputsSize() == 0) { | |||||
tmp_compute_graph_info.output_format = { FORMAT_NULL }; | |||||
tmp_compute_graph_info.output_shape = { {0} }; | |||||
tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; | |||||
} | |||||
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||||
GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||||
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||||
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||||
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||||
} | |||||
tmp_compute_graph_info.task_id = task_id; | |||||
tmp_compute_graph_info.stream_id = stream_id; | |||||
compute_graph_info.emplace_back(tmp_compute_graph_info); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge |
@@ -110,9 +110,13 @@ class TaskContext { | |||||
void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | ||||
Status SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim); | |||||
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); | |||||
void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | ||||
const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; } | |||||
Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); | |||||
void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } | |||||
private: | private: | ||||
TaskContext(GraphExecutionContext *execution_context, | TaskContext(GraphExecutionContext *execution_context, | ||||
const NodeItem *node_item, | const NodeItem *node_item, | ||||
@@ -133,6 +137,7 @@ class TaskContext { | |||||
uint32_t task_id_ = 0; | uint32_t task_id_ = 0; | ||||
uint32_t stream_id_ = 0; | uint32_t stream_id_ = 0; | ||||
std::vector<TaskDescInfo> task_desc_info; | std::vector<TaskDescInfo> task_desc_info; | ||||
std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
}; | }; | ||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge | ||||