| @@ -283,7 +283,7 @@ Status GeExecutor::Initialize() { | |||||
| // Start profiling | // Start profiling | ||||
| Options profiling_options; | Options profiling_options; | ||||
| profiling_options.device_id = 0; | profiling_options.device_id = 0; | ||||
| profiling_options.job_id = ""; | |||||
| profiling_options.job_id = "1"; | |||||
| ProfilingManager::Instance().Init(profiling_options); | ProfilingManager::Instance().Init(profiling_options); | ||||
| isInit_ = true; | isInit_ = true; | ||||
| @@ -17,6 +17,7 @@ | |||||
| #include "single_op/single_op.h" | #include "single_op/single_op.h" | ||||
| #include "common/fmk_types.h" | #include "common/fmk_types.h" | ||||
| #include "common/ge_types.h" | |||||
| #include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
| #include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| @@ -34,6 +35,45 @@ size_t GetAlignedSize(size_t size) { | |||||
| size_t aligned_size = (size + 2 * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | size_t aligned_size = (size + 2 * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | ||||
| return aligned_size; | return aligned_size; | ||||
| } | } | ||||
| Status ProfilingTaskInfo(OpTask *op_task) { | |||||
| if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||||
| return SUCCESS; | |||||
| } | |||||
| string model_name; | |||||
| string op_name; | |||||
| uint32_t model_id; | |||||
| uint32_t block_dim; | |||||
| if (GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) { | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed"); | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | |||||
| } | |||||
| GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str()); | |||||
| std::vector<TaskDescInfo> task_desc_info; | |||||
| uint32_t task_id = 0; | |||||
| uint32_t stream_id = 0; | |||||
| if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | |||||
| } | |||||
| TaskDescInfo tmp_task_desc_info; | |||||
| tmp_task_desc_info.model_name = model_name_; | |||||
| tmp_task_desc_info.op_name = op_name; | |||||
| tmp_task_desc_info.block_dim = block_dim; | |||||
| tmp_task_desc_info.task_id = task_id; | |||||
| tmp_task_desc_info.stream_id = stream_id; | |||||
| GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||||
| task_desc_info.emplace_back(tmp_task_desc_info); | |||||
| std::vector<ComputeGraphDescInfo> compute_graph_info; | |||||
| auto &profiling_manager = ProfilingManager::Instance(); | |||||
| profiling_manager.ReportProfilingData(model_id_, task_desc_info, compute_graph_info, | |||||
| !profiling_manager.IsAclApiMode()); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) { | SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) { | ||||
| @@ -169,6 +209,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task)); | |||||
| } | } | ||||
| return ret; | return ret; | ||||
| @@ -281,9 +322,17 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||||
| } | } | ||||
| if (op_task_->GetOpTaskType() == OP_TASK_TBE) { | if (op_task_->GetOpTaskType() == OP_TASK_TBE) { | ||||
| return ExecuteTbeTask(input_desc, inputs, output_desc, outputs); | |||||
| auto ret = ExecuteTbeTask(input_desc, inputs, output_desc, outputs); | |||||
| if (ret == SUCCESS) { | |||||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_)); | |||||
| } | |||||
| return ret; | |||||
| } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { | } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { | ||||
| return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); | |||||
| auto aicpu_ret = op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); | |||||
| if (aicpu_ret == SUCCESS) { | |||||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_)); | |||||
| } | |||||
| return aicpu_ret; | |||||
| } else { | } else { | ||||
| GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, | GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, | ||||
| "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", | "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", | ||||
| @@ -157,6 +157,7 @@ Status SingleOpModel::LoadAllNodes() { | |||||
| auto ge_model = model_helper_.GetGeModel(); | auto ge_model = model_helper_.GetGeModel(); | ||||
| GE_CHECK_NOTNULL(ge_model); | GE_CHECK_NOTNULL(ge_model); | ||||
| Graph graph = ge_model->GetGraph(); | Graph graph = ge_model->GetGraph(); | ||||
| model_id_ = ge_model->GetModelId(); | |||||
| auto compute_graph = GraphUtils::GetComputeGraph(graph); | auto compute_graph = GraphUtils::GetComputeGraph(graph); | ||||
| if (compute_graph == nullptr) { | if (compute_graph == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] compute_graph is null", model_name_.c_str()); | GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] compute_graph is null", model_name_.c_str()); | ||||
| @@ -248,6 +249,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||||
| single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); | single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); | ||||
| ParseArgTable(tbe_task, single_op); | ParseArgTable(tbe_task, single_op); | ||||
| tbe_task->SetModelArgs(model_name_, model_id_); | |||||
| single_op.tasks_.emplace_back(tbe_task); | single_op.tasks_.emplace_back(tbe_task); | ||||
| } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | ||||
| GELOGD("Building AICPU_CC task"); | GELOGD("Building AICPU_CC task"); | ||||
| @@ -258,6 +260,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| task->SetModelArgs(model_name_, model_id_); | |||||
| single_op.tasks_.emplace_back(task); | single_op.tasks_.emplace_back(task); | ||||
| } else { | } else { | ||||
| GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); | GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); | ||||
| @@ -273,6 +276,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| aicpu_task->SetModelArgs(model_name_, model_id_); | |||||
| single_op.tasks_.emplace_back(aicpu_task); | single_op.tasks_.emplace_back(aicpu_task); | ||||
| } else { | } else { | ||||
| // skip | // skip | ||||
| @@ -393,6 +397,7 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||||
| GELOGD("Building TBE task"); | GELOGD("Building TBE task"); | ||||
| TbeOpTask *tbe_task = nullptr; | TbeOpTask *tbe_task = nullptr; | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); | GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); | ||||
| tbe_task->SetModelArgs(model_name_, model_id_); | |||||
| single_op.op_task_.reset(tbe_task); | single_op.op_task_.reset(tbe_task); | ||||
| } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | ||||
| GELOGD("Building AICPU_CC task"); | GELOGD("Building AICPU_CC task"); | ||||
| @@ -400,6 +405,7 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||||
| uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | ||||
| GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); | GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); | GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); | ||||
| task->SetModelArgs(model_name_, model_id_); | |||||
| single_op.op_task_.reset(task); | single_op.op_task_.reset(task); | ||||
| } else { | } else { | ||||
| GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, | GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, | ||||
| @@ -446,6 +452,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
| const TaskDef ©_task_def = tasks[i]; | const TaskDef ©_task_def = tasks[i]; | ||||
| GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | ||||
| } | } | ||||
| aicpu_task->SetModelArgs(model_name_, model_id_); | |||||
| single_op.op_task_.reset(aicpu_task); | single_op.op_task_.reset(aicpu_task); | ||||
| } else { | } else { | ||||
| // skip | // skip | ||||
| @@ -77,6 +77,7 @@ class SingleOpModel { | |||||
| void ParseArgTable(TbeOpTask *task, SingleOp &op); | void ParseArgTable(TbeOpTask *task, SingleOp &op); | ||||
| std::string model_name_; | std::string model_name_; | ||||
| uint32_t model_id_ = 0; | |||||
| const void *ori_model_data_; | const void *ori_model_data_; | ||||
| uint32_t ori_model_size_; | uint32_t ori_model_size_; | ||||
| @@ -93,6 +93,21 @@ const vector<int64_t> &OpTask::GetWorkspaceSizes() const { return workspace_size | |||||
| void OpTask::SetWorkspaceSizes(const vector<int64_t> &workspace_sizes) { workspace_sizes_ = workspace_sizes; } | void OpTask::SetWorkspaceSizes(const vector<int64_t> &workspace_sizes) { workspace_sizes_ = workspace_sizes; } | ||||
| void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | |||||
| model_name_ = model_name; | |||||
| model_id_ = model_id; | |||||
| } | |||||
| Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, | |||||
| uint32_t &block_dim) { | |||||
| model_name = model_name_; | |||||
| model_id = model_id_; | |||||
| block_dim = block_dim_; | |||||
| GE_CHECK_NOTNULL(op_desc_); | |||||
| op_name = op_desc_->GetName(); | |||||
| return SUCCESS; | |||||
| } | |||||
| TbeOpTask::~TbeOpTask() { | TbeOpTask::~TbeOpTask() { | ||||
| if (sm_desc_ != nullptr) { | if (sm_desc_ != nullptr) { | ||||
| (void)rtMemFreeManaged(sm_desc_); | (void)rtMemFreeManaged(sm_desc_); | ||||
| @@ -58,6 +58,8 @@ class OpTask { | |||||
| virtual const void *GetIOAddr() const = 0; | virtual const void *GetIOAddr() const = 0; | ||||
| const vector<int64_t> &GetWorkspaceSizes() const; | const vector<int64_t> &GetWorkspaceSizes() const; | ||||
| void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes); | void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes); | ||||
| void SetModelArgs(std::string model_name, uint32_t model_id); | |||||
| Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim); | |||||
| const OpDescPtr &GetOpdesc() const {return op_desc_;} | const OpDescPtr &GetOpdesc() const {return op_desc_;} | ||||
| Status OpenDump(rtStream_t stream); | Status OpenDump(rtStream_t stream); | ||||
| void SetIoAddrsForDump(const vector<uint64_t> &io_addrs_for_dump) { | void SetIoAddrsForDump(const vector<uint64_t> &io_addrs_for_dump) { | ||||
| @@ -77,6 +79,9 @@ class OpTask { | |||||
| DumpProperties dump_properties_; | DumpProperties dump_properties_; | ||||
| DumpOp dump_op_; | DumpOp dump_op_; | ||||
| OpDescPtr op_desc_; | OpDescPtr op_desc_; | ||||
| std::string model_name_; | |||||
| uint32_t model_id_ = 0; | |||||
| uint32_t block_dim_ = 1; | |||||
| std::vector<uint64_t> io_addrs_for_dump_; | std::vector<uint64_t> io_addrs_for_dump_; | ||||
| }; | }; | ||||
| @@ -115,7 +120,6 @@ class TbeOpTask : public OpTask { | |||||
| const void *stub_func_ = nullptr; | const void *stub_func_ = nullptr; | ||||
| std::unique_ptr<uint8_t[]> args_; | std::unique_ptr<uint8_t[]> args_; | ||||
| size_t arg_size_ = 0; | size_t arg_size_ = 0; | ||||
| uint32_t block_dim_ = 1; | |||||
| void *sm_desc_ = nullptr; | void *sm_desc_ = nullptr; | ||||
| std::string stub_name_; | std::string stub_name_; | ||||
| @@ -239,7 +243,6 @@ private: | |||||
| std::string kernel_name_; | std::string kernel_name_; | ||||
| std::unique_ptr<uint8_t[]> args_; | std::unique_ptr<uint8_t[]> args_; | ||||
| size_t arg_size_ = 0; | size_t arg_size_ = 0; | ||||
| uint32_t block_dim_ = 1; | |||||
| void *sm_desc_ = nullptr; | void *sm_desc_ = nullptr; | ||||
| void *io_addr_ = nullptr; | void *io_addr_ = nullptr; | ||||
| bool is_custom_ = false; | bool is_custom_ = false; | ||||