@@ -4021,14 +4021,18 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des | |||
} else { | |||
compute_graph_info.model_name = name_; | |||
} | |||
std::vector<Format> format = { FORMAT_NULL }; | |||
std::vector<std::vector<int64_t>> shape = { {0} }; | |||
std::vector<DataType> data_type = { DT_UNDEFINED }; | |||
compute_graph_info.op_name = op_desc.op_name; | |||
compute_graph_info.op_type = op_desc.op_type; | |||
compute_graph_info.input_format = op_desc.input_format; | |||
compute_graph_info.input_shape = op_desc.input_shape; | |||
compute_graph_info.input_data_type = op_desc.input_data_type; | |||
compute_graph_info.output_format = op_desc.output_format; | |||
compute_graph_info.output_shape = op_desc.output_shape; | |||
compute_graph_info.output_data_type = op_desc.output_data_type; | |||
compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; | |||
compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; | |||
compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; | |||
compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; | |||
compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; | |||
compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; | |||
uint32_t task_id = 0; | |||
uint32_t stream_id = 0; | |||
auto iter = profiler_report_op_info_.find(op_desc.op_name); | |||
@@ -171,43 +171,9 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel | |||
GE_CHECK_NOTNULL(model); | |||
GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); | |||
compute_graph_info = context_->GetProfilingGraphDescInfo(); | |||
context_->ClearProfilingGraphDescInfo(); | |||
std::string dynamic_model_name = model->GetModelName(); | |||
auto op_desc = node->GetOpDesc(); | |||
if (op_desc == nullptr) { | |||
GELOGE(PARAM_INVALID, "op_desc is nullptr."); | |||
return PARAM_INVALID; | |||
} | |||
auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||
op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||
ComputeGraphDescInfo tmp_compute_graph_info; | |||
tmp_compute_graph_info.model_name = dynamic_model_name; | |||
tmp_compute_graph_info.op_name = op_desc->GetName(); | |||
tmp_compute_graph_info.op_type = op_desc->GetType(); | |||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||
if (input_desc == nullptr) { | |||
continue; | |||
} | |||
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||
} | |||
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||
GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||
} | |||
tmp_compute_graph_info.task_id = context_->GetTaskId(); | |||
tmp_compute_graph_info.stream_id = context_->GetStreamId(); | |||
compute_graph_info.emplace_back(tmp_compute_graph_info); | |||
GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str()); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -183,7 +183,16 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | |||
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | |||
// save profiling data | |||
(void)context.SaveProfilingTaskDescInfo(kTaskTypeAicore, (*it)->GetBlockDim()); | |||
uint32_t task_id = 0; | |||
uint32_t stream_id = 0; | |||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
return FAILED; | |||
} | |||
GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | |||
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | |||
} | |||
@@ -191,8 +191,16 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||
HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str()); | |||
// save profiling data | |||
(void)context.SaveProfilingTaskDescInfo(kTaskTypeAicpu, 0); | |||
uint32_t task_id = 0; | |||
uint32_t stream_id = 0; | |||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
return FAILED; | |||
} | |||
GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | |||
(void)context.SaveProfilingGraphDescInfo(task_id, stream_id); | |||
auto callback = [=, &context]() { | |||
GELOGD("Node[%s] callback start.", node_name_.c_str()); | |||
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | |||
@@ -500,21 +500,12 @@ Status TaskContext::Synchronize() { | |||
return execution_context_->Synchronize(GetStream()); | |||
} | |||
Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim) { | |||
Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||
uint32_t task_type, uint32_t block_dim) { | |||
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||
const NodeItem &node_item = GetNodeItem(); | |||
auto op_desc = node_item.GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
uint32_t task_id = 0; | |||
uint32_t stream_id = 0; | |||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||
return rt_ret; | |||
} | |||
GELOGD("Node[%s] task_id: %u, stream_id: %u.", GetNodeName(), task_id, stream_id); | |||
const GraphExecutionContext * graph_context = GetExecutionContext(); | |||
GE_CHECK_NOTNULL(graph_context); | |||
const HybridModel *model = graph_context->model; | |||
@@ -536,5 +527,59 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block | |||
return SUCCESS; | |||
} | |||
Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) { | |||
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||
const NodeItem &node_item = GetNodeItem(); | |||
auto op_desc = node_item.GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
const GraphExecutionContext * graph_context = GetExecutionContext(); | |||
GE_CHECK_NOTNULL(graph_context); | |||
const HybridModel *model = graph_context->model; | |||
GE_CHECK_NOTNULL(model); | |||
std::string dynamic_model_name = model->GetModelName(); | |||
auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && | |||
op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||
ComputeGraphDescInfo tmp_compute_graph_info; | |||
tmp_compute_graph_info.model_name = dynamic_model_name; | |||
tmp_compute_graph_info.op_name = op_desc->GetName(); | |||
tmp_compute_graph_info.op_type = op_desc->GetType(); | |||
// default | |||
if (op_desc->GetAllInputsSize() == 0) { | |||
tmp_compute_graph_info.input_format = { FORMAT_NULL }; | |||
tmp_compute_graph_info.input_shape = { {0} }; | |||
tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; | |||
} | |||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||
if (input_desc == nullptr) { | |||
continue; | |||
} | |||
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||
} | |||
if (op_desc->GetOutputsSize() == 0) { | |||
tmp_compute_graph_info.output_format = { FORMAT_NULL }; | |||
tmp_compute_graph_info.output_shape = { {0} }; | |||
tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; | |||
} | |||
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||
GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||
} | |||
tmp_compute_graph_info.task_id = task_id; | |||
tmp_compute_graph_info.stream_id = stream_id; | |||
compute_graph_info.emplace_back(tmp_compute_graph_info); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace hybrid | |||
} // namespace ge |
@@ -110,9 +110,13 @@ class TaskContext { | |||
void *handle_ = nullptr; | |||
const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | |||
Status SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim); | |||
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, uint32_t task_type, uint32_t block_dim); | |||
void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | |||
const std::vector<ComputeGraphDescInfo>& GetProfilingGraphDescInfo() const { return compute_graph_info; } | |||
Status SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id); | |||
void ClearProfilingGraphDescInfo() { compute_graph_info.clear(); } | |||
private: | |||
TaskContext(GraphExecutionContext *execution_context, | |||
const NodeItem *node_item, | |||
@@ -133,6 +137,7 @@ class TaskContext { | |||
uint32_t task_id_ = 0; | |||
uint32_t stream_id_ = 0; | |||
std::vector<TaskDescInfo> task_desc_info; | |||
std::vector<ComputeGraphDescInfo> compute_graph_info; | |||
}; | |||
} // namespace hybrid | |||
} // namespace ge | |||