| @@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||||
| uint32_t stream_id = task.stream_id; | uint32_t stream_id = task.stream_id; | ||||
| std::string shape_type = task.shape_type; | std::string shape_type = task.shape_type; | ||||
| int64_t cur_iter_num = task.cur_iter_num; | int64_t cur_iter_num = task.cur_iter_num; | ||||
| uint32_t task_type = task.task_type; | |||||
| data = model_name.append(" ") | data = model_name.append(" ") | ||||
| .append(op_name).append(" ") | .append(op_name).append(" ") | ||||
| .append(std::to_string(block_dim)).append(" ") | .append(std::to_string(block_dim)).append(" ") | ||||
| @@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||||
| .append(std::to_string(stream_id)).append(" ") | .append(std::to_string(stream_id)).append(" ") | ||||
| .append(std::to_string(model_id)).append(" ") | .append(std::to_string(model_id)).append(" ") | ||||
| .append(shape_type).append(" ") | .append(shape_type).append(" ") | ||||
| .append(std::to_string(cur_iter_num)).append("\n"); | |||||
| .append(std::to_string(cur_iter_num)).append(" ") | |||||
| .append(std::to_string(task_type)).append("\n"); | |||||
| ReporterData reporter_data{}; | ReporterData reporter_data{}; | ||||
| reporter_data.deviceId = device_id; | reporter_data.deviceId = device_id; | ||||
| @@ -3064,6 +3064,65 @@ Status DavinciModel::MallocKnownArgs() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, | |||||
| const domi::TaskDef &task_def, size_t task_index) { | |||||
| task_desc_info_.clear(); | |||||
| bool flag = GetL1FusionEnableOption(); | |||||
| char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||||
| int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||||
| if (env_flag != 0) { | |||||
| flag = true; | |||||
| } | |||||
| TaskDescInfo task_desc_info; | |||||
| if (!om_name_.empty()) { | |||||
| task_desc_info.model_name = om_name_; | |||||
| } else { | |||||
| task_desc_info.model_name = name_; | |||||
| } | |||||
| task_desc_info.op_name = op->GetName(); | |||||
| task_desc_info.block_dim = task_def.kernel().block_dim(); | |||||
| task_desc_info.task_id = task->GetTaskID(); | |||||
| task_desc_info.stream_id = task->GetStreamId(); | |||||
| task_desc_info.shape_type = "static"; | |||||
| task_desc_info.cur_iter_num = 0; | |||||
| // task type | |||||
| task_desc_info.task_type = kTaskTypeInvalid; | |||||
| auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
| if (model_task_type == RT_MODEL_TASK_KERNEL) { | |||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||||
| const auto &context = kernel_def.context(); | |||||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||||
| if (kernel_type == ccKernelType::TE) { | |||||
| task_desc_info.task_type = kTaskTypeAicore; | |||||
| } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | |||||
| task_desc_info.task_type = kTaskTypeAicpu; | |||||
| } else { | |||||
| GELOGD("Other kernel type: %u", context.kernel_type()); | |||||
| } | |||||
| } else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) { | |||||
| task_desc_info.task_type = kTaskTypeAicpu; | |||||
| } else { | |||||
| GELOGD("Skip task type: %d", static_cast<int>(model_task_type)); | |||||
| } | |||||
| profiler_report_op_info_[task_desc_info.op_name] = | |||||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
| task_desc_info_.emplace_back(task_desc_info); | |||||
| if (flag) { | |||||
| if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||||
| TaskDescInfo task_desc_info; | |||||
| string op_name = "super_kernel_" + to_string(task_index); | |||||
| task_desc_info.op_name = op_name; | |||||
| task_desc_info.task_id = task->GetSktTaskID(); | |||||
| profiler_report_op_info_[task_desc_info.op_name] = | |||||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
| task_desc_info_.emplace_back(task_desc_info); | |||||
| } | |||||
| } | |||||
| return; | |||||
| } | |||||
| Status DavinciModel::DistributeTask() { | Status DavinciModel::DistributeTask() { | ||||
| GELOGI("do Distribute."); | GELOGI("do Distribute."); | ||||
| for (auto &task : cpu_task_list_) { | for (auto &task : cpu_task_list_) { | ||||
| @@ -3074,19 +3133,11 @@ Status DavinciModel::DistributeTask() { | |||||
| GE_CHK_STATUS_RET(task->Distribute()); | GE_CHK_STATUS_RET(task->Distribute()); | ||||
| } | } | ||||
| task_desc_info_.clear(); | |||||
| bool flag = GetL1FusionEnableOption(); | |||||
| char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||||
| int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||||
| if (env_flag != 0) { | |||||
| flag = true; | |||||
| } | |||||
| const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | ||||
| for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | ||||
| auto &task_def = model_task_def->task(task_index); | auto &task_def = model_task_def->task(task_index); | ||||
| auto &task = task_list_.at(task_index); | auto &task = task_list_.at(task_index); | ||||
| GE_CHECK_NOTNULL(task); | |||||
| GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | ||||
| // for data dump | // for data dump | ||||
| auto op_index = std::max(task_def.kernel().context().op_index(), | auto op_index = std::max(task_def.kernel().context().op_index(), | ||||
| @@ -3106,33 +3157,9 @@ Status DavinciModel::DistributeTask() { | |||||
| GE_IF_BOOL_EXEC(no_need_profiling, continue); | GE_IF_BOOL_EXEC(no_need_profiling, continue); | ||||
| SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); | SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); | ||||
| // Load task info for profiling | |||||
| TaskDescInfo task_desc_info; | |||||
| if (!om_name_.empty()) { | |||||
| task_desc_info.model_name = om_name_; | |||||
| } else { | |||||
| task_desc_info.model_name = name_; | |||||
| } | |||||
| task_desc_info.op_name = op->GetName(); | |||||
| task_desc_info.block_dim = task_def.kernel().block_dim(); | |||||
| task_desc_info.task_id = task->GetTaskID(); | |||||
| task_desc_info.stream_id = task->GetStreamId(); | |||||
| task_desc_info.shape_type = "static"; | |||||
| task_desc_info.cur_iter_num = 0; | |||||
| profiler_report_op_info_[task_desc_info.op_name] = | |||||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
| task_desc_info_.emplace_back(task_desc_info); | |||||
| if (flag) { | |||||
| if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||||
| TaskDescInfo task_desc_info; | |||||
| string op_name = "super_kernel_" + to_string(task_index); | |||||
| task_desc_info.op_name = op_name; | |||||
| task_desc_info.task_id = task->GetSktTaskID(); | |||||
| profiler_report_op_info_[task_desc_info.op_name] = | |||||
| std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
| task_desc_info_.emplace_back(task_desc_info); | |||||
| } | |||||
| } | |||||
| // save task info for profiling | |||||
| SaveProfilingTaskDescInfo(op, task, task_def, task_index); | |||||
| } | } | ||||
| // launch dump kernel to aicpu | // launch dump kernel to aicpu | ||||
| GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); | GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); | ||||
| @@ -623,6 +623,9 @@ class DavinciModel { | |||||
| Status DistributeTask(); | Status DistributeTask(); | ||||
| void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, | |||||
| const domi::TaskDef &task_def, size_t task_index); | |||||
| uint8_t *MallocFeatureMapMem(size_t data_size); | uint8_t *MallocFeatureMapMem(size_t data_size); | ||||
| uint8_t *MallocWeightsMem(size_t weights_size); | uint8_t *MallocWeightsMem(size_t weights_size); | ||||
| @@ -159,27 +159,9 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * | |||||
| } | } | ||||
| GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | ||||
| auto op_desc = node->GetOpDesc(); | |||||
| std::string op_name = op_desc->GetName(); | |||||
| std::string dynamic_model_name = model->GetModelName(); | |||||
| uint32_t task_id = context_->GetTaskId(); | |||||
| uint32_t stream_id = context_->GetStreamId(); | |||||
| TaskDescInfo tmp_task_desc_info; | |||||
| tmp_task_desc_info.model_name = dynamic_model_name; | |||||
| tmp_task_desc_info.op_name = op_name; | |||||
| tmp_task_desc_info.block_dim = 0; | |||||
| auto task_defs = model->GetTaskDefs(node); | |||||
| if (task_defs != nullptr && (*task_defs).size() > 0) { | |||||
| const auto &task_def = (*task_defs)[0]; | |||||
| tmp_task_desc_info.block_dim = task_def.kernel().block_dim(); | |||||
| } | |||||
| tmp_task_desc_info.task_id = task_id; | |||||
| tmp_task_desc_info.stream_id = stream_id; | |||||
| tmp_task_desc_info.shape_type = "dynamic"; | |||||
| tmp_task_desc_info.cur_iter_num = graph_context_->iteration; | |||||
| GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", | |||||
| node->GetName().c_str(), task_id, stream_id); | |||||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||||
| task_desc_info = context_->GetProfilingTaskDescInfo(); | |||||
| context_->ClearProfilingTaskDescInfo(); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -247,7 +229,6 @@ Status NodeDoneCallback::ProfilingReport() { | |||||
| GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str()); | GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str()); | ||||
| std::vector<TaskDescInfo> task_desc_info; | std::vector<TaskDescInfo> task_desc_info; | ||||
| TaskDescInfo tmp_task_desc_info; | |||||
| auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info); | auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info); | ||||
| if (profiling_ret != RT_ERROR_NONE) { | if (profiling_ret != RT_ERROR_NONE) { | ||||
| GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str()); | GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str()); | ||||
| @@ -182,16 +182,8 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
| } | } | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | ||||
| GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | ||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
| return rt_ret; | |||||
| } | |||||
| context.SetTaskId(task_id); | |||||
| context.SetStreamId(stream_id); | |||||
| GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||||
| // save profiling data | |||||
| (void)context.SaveProfilingTaskDescInfo(kTaskTypeAicore, (*it)->GetBlockDim()); | |||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| } | } | ||||
| @@ -48,6 +48,8 @@ class AiCoreOpTask { | |||||
| bool GetClearAtomic() const {return clear_atomic_;} | bool GetClearAtomic() const {return clear_atomic_;} | ||||
| uint32_t GetBlockDim() const {return block_dim_;} | |||||
| protected: | protected: | ||||
| Status UpdateTilingInfo(TaskContext &context); | Status UpdateTilingInfo(TaskContext &context); | ||||
| virtual std::string GetKeyForOpParamSize() const; | virtual std::string GetKeyForOpParamSize() const; | ||||
| @@ -190,16 +190,8 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
| HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str()); | HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str()); | ||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
| return rt_ret; | |||||
| } | |||||
| context.SetTaskId(task_id); | |||||
| context.SetStreamId(stream_id); | |||||
| GELOGD("AiCpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||||
| // save profiling data | |||||
| (void)context.SaveProfilingTaskDescInfo(kTaskTypeAicpu, 0); | |||||
| auto callback = [=, &context]() { | auto callback = [=, &context]() { | ||||
| GELOGD("Node[%s] callback start.", node_name_.c_str()); | GELOGD("Node[%s] callback start.", node_name_.c_str()); | ||||
| @@ -21,6 +21,7 @@ | |||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
| #include "hybrid/executor/subgraph_executor.h" | #include "hybrid/executor/subgraph_executor.h" | ||||
| #include "common/profiling/profiling_manager.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -498,5 +499,42 @@ bool TaskContext::NeedCallback() { | |||||
| Status TaskContext::Synchronize() { | Status TaskContext::Synchronize() { | ||||
| return execution_context_->Synchronize(GetStream()); | return execution_context_->Synchronize(GetStream()); | ||||
| } | } | ||||
| Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim) { | |||||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||||
| const NodeItem &node_item = GetNodeItem(); | |||||
| auto op_desc = node_item.GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
| return rt_ret; | |||||
| } | |||||
| GELOGD("Node[%s] task_id: %u, stream_id: %u.", GetNodeName(), task_id, stream_id); | |||||
| const GraphExecutionContext * graph_context = GetExecutionContext(); | |||||
| GE_CHECK_NOTNULL(graph_context); | |||||
| const HybridModel *model = graph_context->model; | |||||
| GE_CHECK_NOTNULL(model); | |||||
| std::string op_name = op_desc->GetName(); | |||||
| std::string dynamic_model_name = model->GetModelName(); | |||||
| TaskDescInfo tmp_task_desc_info; | |||||
| tmp_task_desc_info.model_name = dynamic_model_name; | |||||
| tmp_task_desc_info.op_name = op_name; | |||||
| tmp_task_desc_info.block_dim = block_dim; | |||||
| tmp_task_desc_info.task_type = task_type; | |||||
| tmp_task_desc_info.task_id = task_id; | |||||
| tmp_task_desc_info.stream_id = stream_id; | |||||
| tmp_task_desc_info.shape_type = "dynamic"; | |||||
| tmp_task_desc_info.cur_iter_num = iteration_; | |||||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -22,6 +22,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
| #include "external/ge/ge_api_error_codes.h" | #include "external/ge/ge_api_error_codes.h" | ||||
| #include "framework/common/ge_types.h" | |||||
| #include "hybrid/common/tensor_value.h" | #include "hybrid/common/tensor_value.h" | ||||
| #include "hybrid/common/npu_memory_allocator.h" | #include "hybrid/common/npu_memory_allocator.h" | ||||
| #include "hybrid/executor/rt_callback_manager.h" | #include "hybrid/executor/rt_callback_manager.h" | ||||
| @@ -108,6 +109,10 @@ class TaskContext { | |||||
| void SetForceInferShape(bool force_infer_shape); | void SetForceInferShape(bool force_infer_shape); | ||||
| void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
| const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | |||||
| Status SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim); | |||||
| void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | |||||
| private: | private: | ||||
| TaskContext(GraphExecutionContext *execution_context, | TaskContext(GraphExecutionContext *execution_context, | ||||
| const NodeItem *node_item, | const NodeItem *node_item, | ||||
| @@ -127,6 +132,7 @@ class TaskContext { | |||||
| uint64_t iteration_ = 0; | uint64_t iteration_ = 0; | ||||
| uint32_t task_id_ = 0; | uint32_t task_id_ = 0; | ||||
| uint32_t stream_id_ = 0; | uint32_t stream_id_ = 0; | ||||
| std::vector<TaskDescInfo> task_desc_info; | |||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -70,6 +70,7 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||||
| tmp_task_desc_info.stream_id = stream_id; | tmp_task_desc_info.stream_id = stream_id; | ||||
| tmp_task_desc_info.shape_type = shape_type; | tmp_task_desc_info.shape_type = shape_type; | ||||
| tmp_task_desc_info.cur_iter_num = 0; | tmp_task_desc_info.cur_iter_num = 0; | ||||
| tmp_task_desc_info.task_type = op_task->GetTaskType(); | |||||
| GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | ||||
| task_desc_info.emplace_back(tmp_task_desc_info); | task_desc_info.emplace_back(tmp_task_desc_info); | ||||
| @@ -145,6 +145,8 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||||
| TbeOpTask::~TbeOpTask() { | TbeOpTask::~TbeOpTask() { | ||||
| if (sm_desc_ != nullptr) { | if (sm_desc_ != nullptr) { | ||||
| (void)rtMemFreeManaged(sm_desc_); | (void)rtMemFreeManaged(sm_desc_); | ||||
| @@ -161,6 +163,8 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } | |||||
| const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | ||||
| uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||||
| Status TbeOpTask::LaunchKernel(rtStream_t stream) { | Status TbeOpTask::LaunchKernel(rtStream_t stream) { | ||||
| GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | ||||
| auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | ||||
| @@ -802,6 +806,8 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||||
| return DoUpdateArgTable(param, false); | return DoUpdateArgTable(param, false); | ||||
| } | } | ||||
| uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||||
| void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | ||||
| arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | ||||
| arg_count = io_addr_host_.size(); | arg_count = io_addr_host_.size(); | ||||
| @@ -52,6 +52,7 @@ class OpTask { | |||||
| std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
| std::vector<DataBuffer> &output_buffers, | std::vector<DataBuffer> &output_buffers, | ||||
| rtStream_t stream); | rtStream_t stream); | ||||
| virtual uint32_t GetTaskType() const; | |||||
| protected: | protected: | ||||
| Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | ||||
| @@ -85,6 +86,7 @@ class TbeOpTask : public OpTask { | |||||
| size_t GetArgSize() const; | size_t GetArgSize() const; | ||||
| const std::string &GetStubName() const; | const std::string &GetStubName() const; | ||||
| void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | ||||
| uint32_t GetTaskType() const override; | |||||
| private: | private: | ||||
| friend class SingleOpModel; | friend class SingleOpModel; | ||||
| @@ -113,6 +115,8 @@ class AiCpuBaseTask : public OpTask { | |||||
| ~AiCpuBaseTask() override; | ~AiCpuBaseTask() override; | ||||
| UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | ||||
| Status UpdateArgTable(const SingleOpModelParam ¶m) override; | Status UpdateArgTable(const SingleOpModelParam ¶m) override; | ||||
| uint32_t GetTaskType() const override; | |||||
| protected: | protected: | ||||
| Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
| Status SetInputConst(); | Status SetInputConst(); | ||||
| @@ -41,12 +41,7 @@ enum FrameworkType { | |||||
| }; | }; | ||||
| const std::map<std::string, std::string> kFwkTypeToStr = { | const std::map<std::string, std::string> kFwkTypeToStr = { | ||||
| {"0", "Caffe"}, | |||||
| {"1", "MindSpore"}, | |||||
| {"3", "TensorFlow"}, | |||||
| {"4", "Android_NN"}, | |||||
| {"5", "Onnx"} | |||||
| }; | |||||
| {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}}; | |||||
| enum OpEngineType { | enum OpEngineType { | ||||
| ENGINE_SYS = 0, // default engine | ENGINE_SYS = 0, // default engine | ||||
| @@ -61,6 +56,11 @@ enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYN | |||||
| const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | ||||
| const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | ||||
| // profiling data | |||||
| const uint32_t kTaskTypeAicore = 0; | |||||
| const uint32_t kTaskTypeAicpu = 1; | |||||
| const uint32_t kTaskTypeInvalid = 0xFFFF; | |||||
| // Data cache, including data address and length | // Data cache, including data address and length | ||||
| struct DataBuffer { | struct DataBuffer { | ||||
| public: | public: | ||||
| @@ -256,6 +256,7 @@ struct TaskDescInfo { | |||||
| uint32_t stream_id; | uint32_t stream_id; | ||||
| std::string shape_type; | std::string shape_type; | ||||
| int64_t cur_iter_num; | int64_t cur_iter_num; | ||||
| uint32_t task_type; | |||||
| }; | }; | ||||
| // Profiling info of graph | // Profiling info of graph | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit dc6cceb67bc82b567bcbd6f415776644253e1467 | |||||
| Subproject commit b00c50c2a8c2ce06929b27f7b74185a950737ec8 | |||||
| @@ -1 +1 @@ | |||||
| Subproject commit 4e72aae41e78af1a19cd965da4a45cbd988b9a75 | |||||
| Subproject commit f0109a2c70981d74932bb38bb56722caff3323a5 | |||||