diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 1fc4dba6..9ca3aced 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin uint32_t stream_id = task.stream_id; std::string shape_type = task.shape_type; int64_t cur_iter_num = task.cur_iter_num; + uint32_t task_type = task.task_type; data = model_name.append(" ") .append(op_name).append(" ") .append(std::to_string(block_dim)).append(" ") @@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin .append(std::to_string(stream_id)).append(" ") .append(std::to_string(model_id)).append(" ") .append(shape_type).append(" ") - .append(std::to_string(cur_iter_num)).append("\n"); + .append(std::to_string(cur_iter_num)).append(" ") + .append(std::to_string(task_type)).append("\n"); ReporterData reporter_data{}; reporter_data.deviceId = device_id; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index cf2d9c5f..063c5b4c 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -3065,6 +3065,65 @@ Status DavinciModel::MallocKnownArgs() { return SUCCESS; } +void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, + const domi::TaskDef &task_def, size_t task_index) { + task_desc_info_.clear(); + bool flag = GetL1FusionEnableOption(); + char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; + INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); + int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; + if (env_flag != 0) { + flag = true; + } + + TaskDescInfo task_desc_info; + if (!om_name_.empty()) { + task_desc_info.model_name = om_name_; + } else { + task_desc_info.model_name = name_; + } + task_desc_info.op_name = op->GetName(); + task_desc_info.block_dim = task_def.kernel().block_dim(); + task_desc_info.task_id = task->GetTaskID(); + task_desc_info.stream_id = task->GetStreamId(); + task_desc_info.shape_type = "static"; + task_desc_info.cur_iter_num = 0; + // task type + task_desc_info.task_type = kTaskTypeInvalid; + auto model_task_type = static_cast(task_def.type()); + if (model_task_type == RT_MODEL_TASK_KERNEL) { + const domi::KernelDef &kernel_def = task_def.kernel(); + const auto &context = kernel_def.context(); + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == ccKernelType::TE) { + task_desc_info.task_type = kTaskTypeAicore; + } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { + task_desc_info.task_type = kTaskTypeAicpu; + } else { + GELOGD("Other kernel type: %u", context.kernel_type()); + } + } else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) { + task_desc_info.task_type = kTaskTypeAicpu; + } else { + GELOGD("Skip task type: %d", static_cast(model_task_type)); + } + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); + task_desc_info_.emplace_back(task_desc_info); + if (flag) { + if (task->GetSktTaskID() != 0xFFFFFFFF) { + TaskDescInfo task_desc_info; + string op_name = "super_kernel_" + to_string(task_index); + task_desc_info.op_name = op_name; + task_desc_info.task_id = task->GetSktTaskID(); + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); + task_desc_info_.emplace_back(task_desc_info); + } + } + return; +} + Status DavinciModel::DistributeTask() { GELOGI("do Distribute."); for (auto &task : cpu_task_list_) { @@ -3075,19 +3134,11 @@ Status DavinciModel::DistributeTask() { GE_CHK_STATUS_RET(task->Distribute()); } - task_desc_info_.clear(); - bool flag = GetL1FusionEnableOption(); - char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; - INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); - int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; - if (env_flag != 0) { - flag = true; - } - const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { auto &task_def = model_task_def->task(task_index); auto &task = task_list_.at(task_index); + GE_CHECK_NOTNULL(task); GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); // for data dump auto op_index = std::max(task_def.kernel().context().op_index(), @@ -3107,33 +3158,9 @@ Status DavinciModel::DistributeTask() { GE_IF_BOOL_EXEC(no_need_profiling, continue); SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); - // Load task info for profiling - TaskDescInfo task_desc_info; - if (!om_name_.empty()) { - task_desc_info.model_name = om_name_; - } else { - task_desc_info.model_name = name_; - } - task_desc_info.op_name = op->GetName(); - task_desc_info.block_dim = task_def.kernel().block_dim(); - task_desc_info.task_id = task->GetTaskID(); - task_desc_info.stream_id = task->GetStreamId(); - task_desc_info.shape_type = "static"; - task_desc_info.cur_iter_num = 0; - profiler_report_op_info_[task_desc_info.op_name] = - std::pair(task_desc_info.task_id, task_desc_info.stream_id); - task_desc_info_.emplace_back(task_desc_info); - if (flag) { - if (task->GetSktTaskID() != 0xFFFFFFFF) { - TaskDescInfo task_desc_info; - string op_name = "super_kernel_" + to_string(task_index); - task_desc_info.op_name = op_name; - task_desc_info.task_id = task->GetSktTaskID(); - profiler_report_op_info_[task_desc_info.op_name] = - std::pair(task_desc_info.task_id, task_desc_info.stream_id); - task_desc_info_.emplace_back(task_desc_info); - } - } + + // save task info for profiling + SaveProfilingTaskDescInfo(op, task, task_def, task_index); } // launch dump kernel to aicpu GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index e9804dc5..f02015a8 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -623,6 +623,9 @@ class DavinciModel { Status DistributeTask(); + void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, + const domi::TaskDef &task_def, size_t task_index); + uint8_t *MallocFeatureMapMem(size_t data_size); uint8_t *MallocWeightsMem(size_t weights_size); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index b5de2a70..5e9d3607 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -159,27 +159,9 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * } GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); - auto op_desc = node->GetOpDesc(); - std::string op_name = op_desc->GetName(); - std::string dynamic_model_name = model->GetModelName(); - uint32_t task_id = context_->GetTaskId(); - uint32_t stream_id = context_->GetStreamId(); - TaskDescInfo tmp_task_desc_info; - tmp_task_desc_info.model_name = dynamic_model_name; - tmp_task_desc_info.op_name = op_name; - tmp_task_desc_info.block_dim = 0; - auto task_defs = model->GetTaskDefs(node); - if (task_defs != nullptr && (*task_defs).size() > 0) { - const auto &task_def = (*task_defs)[0]; - tmp_task_desc_info.block_dim = task_def.kernel().block_dim(); - } - tmp_task_desc_info.task_id = task_id; - tmp_task_desc_info.stream_id = stream_id; - tmp_task_desc_info.shape_type = "dynamic"; - tmp_task_desc_info.cur_iter_num = graph_context_->iteration; - GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", - node->GetName().c_str(), task_id, stream_id); - task_desc_info.emplace_back(tmp_task_desc_info); + task_desc_info = context_->GetProfilingTaskDescInfo(); + context_->ClearProfilingTaskDescInfo(); + return SUCCESS; } @@ -247,7 +229,6 @@ Status NodeDoneCallback::ProfilingReport() { GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str()); std::vector task_desc_info; - TaskDescInfo tmp_task_desc_info; auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info); if (profiling_ret != RT_ERROR_NONE) { GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 2abc5b03..a8736154 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -182,16 +182,8 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function } RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); - uint32_t task_id = 0; - uint32_t stream_id = 0; - rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Get task_id and stream_id failed."); - return rt_ret; - } - context.SetTaskId(task_id); - context.SetStreamId(stream_id); - GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); + // save profiling data + (void)context.SaveProfilingTaskDescInfo(kTaskTypeAicore, (*it)->GetBlockDim()); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 5818f384..dd15c608 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -48,6 +48,8 @@ class AiCoreOpTask { bool GetClearAtomic() const {return clear_atomic_;} + uint32_t GetBlockDim() const {return block_dim_;} + protected: Status UpdateTilingInfo(TaskContext &context); virtual std::string GetKeyForOpParamSize() const; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 63ce65e9..2a7cbc67 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -190,16 +190,8 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionSynchronize(GetStream()); } + +Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim) { + if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + const NodeItem &node_item = GetNodeItem(); + auto op_desc = node_item.GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + + uint32_t task_id = 0; + uint32_t stream_id = 0; + rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "Get task_id and stream_id failed."); + return rt_ret; + } + GELOGD("Node[%s] task_id: %u, stream_id: %u.", GetNodeName(), task_id, stream_id); + + const GraphExecutionContext * graph_context = GetExecutionContext(); + GE_CHECK_NOTNULL(graph_context); + const HybridModel *model = graph_context->model; + GE_CHECK_NOTNULL(model); + + std::string op_name = op_desc->GetName(); + std::string dynamic_model_name = model->GetModelName(); + TaskDescInfo tmp_task_desc_info; + tmp_task_desc_info.model_name = dynamic_model_name; + tmp_task_desc_info.op_name = op_name; + tmp_task_desc_info.block_dim = block_dim; + tmp_task_desc_info.task_type = task_type; + tmp_task_desc_info.task_id = task_id; + tmp_task_desc_info.stream_id = stream_id; + tmp_task_desc_info.shape_type = "dynamic"; + tmp_task_desc_info.cur_iter_num = iteration_; + task_desc_info.emplace_back(tmp_task_desc_info); + } + + return SUCCESS; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 6a4bcb8c..9a668f8c 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -22,6 +22,7 @@ #include #include "common/properties_manager.h" #include "external/ge/ge_api_error_codes.h" +#include "framework/common/ge_types.h" #include "hybrid/common/tensor_value.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/executor/rt_callback_manager.h" @@ -108,6 +109,10 @@ class TaskContext { void SetForceInferShape(bool force_infer_shape); void *handle_ = nullptr; + const std::vector& GetProfilingTaskDescInfo() const { return task_desc_info; } + Status SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim); + void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } + private: TaskContext(GraphExecutionContext *execution_context, const NodeItem *node_item, @@ -127,6 +132,7 @@ class TaskContext { uint64_t iteration_ = 0; uint32_t task_id_ = 0; uint32_t stream_id_ = 0; + std::vector task_desc_info; }; } // namespace hybrid } // namespace ge diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 1f3fc5c5..081ce13b 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -70,6 +70,7 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { tmp_task_desc_info.stream_id = stream_id; tmp_task_desc_info.shape_type = shape_type; tmp_task_desc_info.cur_iter_num = 0; + tmp_task_desc_info.task_type = op_task->GetTaskType(); GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); task_desc_info.emplace_back(tmp_task_desc_info); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index cc63e811..1772ca88 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -145,6 +145,8 @@ Status OpTask::LaunchKernel(const vector &input_desc, return UNSUPPORTED; } +uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } + TbeOpTask::~TbeOpTask() { if (sm_desc_ != nullptr) { (void)rtMemFreeManaged(sm_desc_); @@ -161,6 +163,8 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } const std::string &TbeOpTask::GetStubName() const { return stub_name_; } +uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } + Status TbeOpTask::LaunchKernel(rtStream_t stream) { GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); auto *sm_desc = reinterpret_cast(sm_desc_); @@ -802,6 +806,8 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { return DoUpdateArgTable(param, false); } +uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } + void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { arg_base = reinterpret_cast(io_addr_host_.data()); arg_count = io_addr_host_.size(); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 2d0740a6..78e1f6f0 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -52,6 +52,7 @@ class OpTask { std::vector &output_desc, std::vector &output_buffers, rtStream_t stream); + virtual uint32_t GetTaskType() const; protected: Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); @@ -85,6 +86,7 @@ class TbeOpTask : public OpTask { size_t GetArgSize() const; const std::string &GetStubName() const; void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); + uint32_t GetTaskType() const override; private: friend class SingleOpModel; @@ -113,6 +115,8 @@ class AiCpuBaseTask : public OpTask { ~AiCpuBaseTask() override; UnknowShapeOpType GetUnknownType() const { return unknown_type_; } Status UpdateArgTable(const SingleOpModelParam ¶m) override; + uint32_t GetTaskType() const override; + protected: Status UpdateIoAddr(const std::vector &inputs, const std::vector &outputs); Status SetInputConst(); diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index f7e6d679..9ca77f1c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -41,12 +41,7 @@ enum FrameworkType { }; const std::map kFwkTypeToStr = { - {"0", "Caffe"}, - {"1", "MindSpore"}, - {"3", "TensorFlow"}, - {"4", "Android_NN"}, - {"5", "Onnx"} -}; + {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}}; enum OpEngineType { ENGINE_SYS = 0, // default engine @@ -61,6 +56,11 @@ enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYN const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; +// profiling data +const uint32_t kTaskTypeAicore = 0; +const uint32_t kTaskTypeAicpu = 1; +const uint32_t kTaskTypeInvalid = 0xFFFF; + // Data cache, including data address and length struct DataBuffer { public: @@ -256,6 +256,7 @@ struct TaskDescInfo { uint32_t stream_id; std::string shape_type; int64_t cur_iter_num; + uint32_t task_type; }; // Profiling info of graph diff --git a/metadef b/metadef index dc6cceb6..b00c50c2 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit dc6cceb67bc82b567bcbd6f415776644253e1467 +Subproject commit b00c50c2a8c2ce06929b27f7b74185a950737ec8 diff --git a/parser b/parser index 4e72aae4..f0109a2c 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 4e72aae41e78af1a19cd965da4a45cbd988b9a75 +Subproject commit f0109a2c70981d74932bb38bb56722caff3323a5