From: @zhengyuanhua Reviewed-by: @ji_chen,@youui Signed-off-by: @ji_chentags/v1.2.0
| @@ -212,12 +212,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||||
| uint32_t block_dim = task.block_dim; | uint32_t block_dim = task.block_dim; | ||||
| uint32_t task_id = task.task_id; | uint32_t task_id = task.task_id; | ||||
| uint32_t stream_id = task.stream_id; | uint32_t stream_id = task.stream_id; | ||||
| std::string shape_type = task.shape_type; | |||||
| int64_t cur_iter_num = task.cur_iter_num; | |||||
| data = model_name.append(" ") | data = model_name.append(" ") | ||||
| .append(op_name).append(" ") | .append(op_name).append(" ") | ||||
| .append(std::to_string(block_dim).append(" ") | |||||
| .append(std::to_string(block_dim)).append(" ") | |||||
| .append(std::to_string(task_id)).append(" ") | .append(std::to_string(task_id)).append(" ") | ||||
| .append(std::to_string(stream_id)).append(" ") | .append(std::to_string(stream_id)).append(" ") | ||||
| .append(std::to_string(model_id)).append("\n")); | |||||
| .append(std::to_string(model_id)).append(" ") | |||||
| .append(shape_type).append(" ") | |||||
| .append(std::to_string(cur_iter_num)).append("\n"); | |||||
| ReporterData reporter_data{}; | ReporterData reporter_data{}; | ||||
| reporter_data.deviceId = device_id; | reporter_data.deviceId = device_id; | ||||
| @@ -2993,9 +2993,7 @@ Status DavinciModel::DistributeTask() { | |||||
| } | } | ||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
| bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) | |||||
| && (task_type != RT_MODEL_TASK_KERNEL_EX) | |||||
| && (task_type != RT_MODEL_TASK_HCCL); | |||||
| bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) && (task_type != RT_MODEL_TASK_KERNEL_EX); | |||||
| GE_IF_BOOL_EXEC(no_need_profiling, continue); | GE_IF_BOOL_EXEC(no_need_profiling, continue); | ||||
| SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); | SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); | ||||
| @@ -3010,6 +3008,8 @@ Status DavinciModel::DistributeTask() { | |||||
| task_desc_info.block_dim = task_def.kernel().block_dim(); | task_desc_info.block_dim = task_def.kernel().block_dim(); | ||||
| task_desc_info.task_id = task->GetTaskID(); | task_desc_info.task_id = task->GetTaskID(); | ||||
| task_desc_info.stream_id = task->GetStreamId(); | task_desc_info.stream_id = task->GetStreamId(); | ||||
| task_desc_info.shape_type = "static"; | |||||
| task_desc_info.cur_iter_num = 0; | |||||
| task_desc_info_.emplace_back(task_desc_info); | task_desc_info_.emplace_back(task_desc_info); | ||||
| if (flag) { | if (flag) { | ||||
| if (task->GetSktTaskID() != 0xFFFFFFFF) { | if (task->GetSktTaskID() != 0xFFFFFFFF) { | ||||
| @@ -151,18 +151,19 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * | |||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| GE_CHECK_NOTNULL(model); | GE_CHECK_NOTNULL(model); | ||||
| // only report aicpu and aicore node | |||||
| bool is_profiling_report = context_->GetNodeItem().is_profiling_report; | |||||
| if (!is_profiling_report) { | |||||
| GELOGD("Node[%s] is not aicore or aicpu, and no need to report data.", node->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| std::string op_name = op_desc->GetName(); | std::string op_name = op_desc->GetName(); | ||||
| std::string dynamic_model_name = model->GetModelName(); | std::string dynamic_model_name = model->GetModelName(); | ||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { | |||||
| GELOGE(PARAM_INVALID, "Get task_id and stream_id failed."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| uint32_t task_id = context_->GetTaskId(); | |||||
| uint32_t stream_id = context_->GetStreamId(); | |||||
| TaskDescInfo tmp_task_desc_info; | TaskDescInfo tmp_task_desc_info; | ||||
| tmp_task_desc_info.model_name = dynamic_model_name; | tmp_task_desc_info.model_name = dynamic_model_name; | ||||
| tmp_task_desc_info.op_name = op_name; | tmp_task_desc_info.op_name = op_name; | ||||
| @@ -174,6 +175,8 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * | |||||
| } | } | ||||
| tmp_task_desc_info.task_id = task_id; | tmp_task_desc_info.task_id = task_id; | ||||
| tmp_task_desc_info.stream_id = stream_id; | tmp_task_desc_info.stream_id = stream_id; | ||||
| tmp_task_desc_info.shape_type = "dynamic"; | |||||
| tmp_task_desc_info.cur_iter_num = graph_context_->iteration; | |||||
| GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", | GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", | ||||
| node->GetName().c_str(), task_id, stream_id); | node->GetName().c_str(), task_id, stream_id); | ||||
| task_desc_info.emplace_back(tmp_task_desc_info); | task_desc_info.emplace_back(tmp_task_desc_info); | ||||
| @@ -226,7 +226,10 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n | |||||
| new_node->node_id = node_index; | new_node->node_id = node_index; | ||||
| new_node->op_desc->SetId(node_index); | new_node->op_desc->SetId(node_index); | ||||
| node_index += 1; | node_index += 1; | ||||
| NodeExecutorManager::ExecutorType executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node); | |||||
| new_node->is_profiling_report = (executor_type == NodeExecutorManager::ExecutorType::AICORE) || | |||||
| (executor_type == NodeExecutorManager::ExecutorType::AICPU_TF) || | |||||
| (executor_type == NodeExecutorManager::ExecutorType::AICPU_CUSTOM); | |||||
| *node_item = new_node.get(); | *node_item = new_node.get(); | ||||
| node_items[node] = std::move(new_node); | node_items[node] = std::move(new_node); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -99,6 +99,7 @@ struct NodeItem { | |||||
| std::map<int, int> reuse_inputs; | std::map<int, int> reuse_inputs; | ||||
| std::map<int, int> reuse_outputs; | std::map<int, int> reuse_outputs; | ||||
| int num_static_input_shapes = 0; | int num_static_input_shapes = 0; | ||||
| bool is_profiling_report = false; | |||||
| private: | private: | ||||
| explicit NodeItem(NodePtr node); | explicit NodeItem(NodePtr node); | ||||
| @@ -165,6 +165,16 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
| } | } | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | ||||
| GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | ||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
| return rt_ret; | |||||
| } | |||||
| context.SetTaskId(task_id); | |||||
| context.SetStreamId(stream_id); | |||||
| GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| } | } | ||||
| @@ -189,6 +189,17 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
| GE_CHK_STATUS_RET(LaunchTask(context)); | GE_CHK_STATUS_RET(LaunchTask(context)); | ||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
| return rt_ret; | |||||
| } | |||||
| context.SetTaskId(task_id); | |||||
| context.SetStreamId(stream_id); | |||||
| GELOGD("AiCpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||||
| auto callback = [=, &context]() { | auto callback = [=, &context]() { | ||||
| GELOGD("Node[%s] callback start.", node_name_.c_str()); | GELOGD("Node[%s] callback start.", node_name_.c_str()); | ||||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | ||||
| @@ -319,6 +319,22 @@ void TaskContext::SetStatus(Status status) { | |||||
| } | } | ||||
| } | } | ||||
| uint32_t TaskContext::GetTaskId() const { | |||||
| return task_id_; | |||||
| } | |||||
| void TaskContext::SetTaskId(uint32_t task_id) { | |||||
| task_id_ = task_id; | |||||
| } | |||||
| uint32_t TaskContext::GetStreamId() const { | |||||
| return stream_id_; | |||||
| } | |||||
| void TaskContext::SetStreamId(uint32_t stream_id) { | |||||
| stream_id_ = stream_id; | |||||
| } | |||||
| Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) { | Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) { | ||||
| GE_CHECK_NOTNULL(buffer); | GE_CHECK_NOTNULL(buffer); | ||||
| if (ori_addr == nullptr) { | if (ori_addr == nullptr) { | ||||
| @@ -96,6 +96,12 @@ class TaskContext { | |||||
| void SetStatus(Status status); | void SetStatus(Status status); | ||||
| uint32_t GetTaskId() const; | |||||
| void SetTaskId(uint32_t task_id); | |||||
| uint32_t GetStreamId() const; | |||||
| void SetStreamId(uint32_t stream_id); | |||||
| bool IsForceInferShape() const; | bool IsForceInferShape() const; | ||||
| void SetForceInferShape(bool force_infer_shape); | void SetForceInferShape(bool force_infer_shape); | ||||
| void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
| @@ -117,6 +123,8 @@ class TaskContext { | |||||
| Status status_ = SUCCESS; | Status status_ = SUCCESS; | ||||
| std::vector<void *> workspaces_; | std::vector<void *> workspaces_; | ||||
| uint64_t iteration_ = 0; | uint64_t iteration_ = 0; | ||||
| uint32_t task_id_= 0; | |||||
| uint32_t stream_id_ = 0; | |||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -32,13 +32,15 @@ namespace ge { | |||||
| namespace { | namespace { | ||||
| const size_t kDataMemAlignSize = 32; | const size_t kDataMemAlignSize = 32; | ||||
| const size_t kDataMemAlignUnit = 2; | const size_t kDataMemAlignUnit = 2; | ||||
| const string kShapeTypeDynamic = "dynamic"; | |||||
| const string kShapeTypeStatic = "static"; | |||||
| size_t GetAlignedSize(size_t size) { | size_t GetAlignedSize(size_t size) { | ||||
| size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | ||||
| return aligned_size; | return aligned_size; | ||||
| } | } | ||||
| Status ProfilingTaskInfo(OpTask *op_task) { | |||||
| Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||||
| if (!ProfilingManager::Instance().ProfilingModelLoadOn()) { | if (!ProfilingManager::Instance().ProfilingModelLoadOn()) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -66,6 +68,8 @@ Status ProfilingTaskInfo(OpTask *op_task) { | |||||
| tmp_task_desc_info.block_dim = block_dim; | tmp_task_desc_info.block_dim = block_dim; | ||||
| tmp_task_desc_info.task_id = task_id; | tmp_task_desc_info.task_id = task_id; | ||||
| tmp_task_desc_info.stream_id = stream_id; | tmp_task_desc_info.stream_id = stream_id; | ||||
| tmp_task_desc_info.shape_type = shape_type; | |||||
| tmp_task_desc_info.cur_iter_num = 0; | |||||
| GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | ||||
| task_desc_info.emplace_back(tmp_task_desc_info); | task_desc_info.emplace_back(tmp_task_desc_info); | ||||
| @@ -193,7 +197,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task)); | |||||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task, kShapeTypeStatic)); | |||||
| } | } | ||||
| return ret; | return ret; | ||||
| @@ -255,7 +259,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||||
| std::lock_guard<std::mutex> lk(*stream_mutex_); | std::lock_guard<std::mutex> lk(*stream_mutex_); | ||||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | ||||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get())); | |||||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -248,6 +248,8 @@ struct TaskDescInfo { | |||||
| uint32_t block_dim; | uint32_t block_dim; | ||||
| uint32_t task_id; | uint32_t task_id; | ||||
| uint32_t stream_id; | uint32_t stream_id; | ||||
| std::string shape_type; | |||||
| int64_t cur_iter_num; | |||||
| }; | }; | ||||
| // Profiling info of graph | // Profiling info of graph | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit 191b7ad10e99d0b8d800ce85dae3ef7a2a146870 | |||||
| Subproject commit 2f774bcd66b0d4b8d65b629f50148e9dd2248403 | |||||
| @@ -1 +1 @@ | |||||
| Subproject commit 8317ba6e886da54f4f161bf4a7fc40de27d6ce3c | |||||
| Subproject commit 89e2455f653807f7bb3177b9b5eb096100a600db | |||||