Browse Source

Feature: Support single op profiling

tags/v1.2.0
l00444296 3 years ago
parent
commit
8b010963be
6 changed files with 80 additions and 5 deletions
  1. +1
    -1
      ge/executor/ge_executor.cc
  2. +51
    -2
      ge/single_op/single_op.cc
  3. +7
    -0
      ge/single_op/single_op_model.cc
  4. +1
    -0
      ge/single_op/single_op_model.h
  5. +15
    -0
      ge/single_op/task/op_task.cc
  6. +5
    -2
      ge/single_op/task/op_task.h

+ 1
- 1
ge/executor/ge_executor.cc View File

@@ -283,7 +283,7 @@ Status GeExecutor::Initialize() {
// Start profiling
Options profiling_options;
profiling_options.device_id = 0;
profiling_options.job_id = "";
profiling_options.job_id = "1";
ProfilingManager::Instance().Init(profiling_options);

isInit_ = true;


+ 51
- 2
ge/single_op/single_op.cc View File

@@ -17,6 +17,7 @@
#include "single_op/single_op.h"

#include "common/fmk_types.h"
#include "common/ge_types.h"
#include "common/math/math_util.h"
#include "common/profiling/profiling_manager.h"
#include "framework/common/debug/ge_log.h"
@@ -34,6 +35,45 @@ size_t GetAlignedSize(size_t size) {
size_t aligned_size = (size + 2 * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize;
return aligned_size;
}

Status ProfilingTaskInfo(OpTask *op_task) {
if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) {
return SUCCESS;
}

string model_name;
string op_name;
uint32_t model_id;
uint32_t block_dim;
if (GetProfilingArgs(model_name, op_name, model_id, block_dim) != SUCCESS) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed");
return ACL_ERROR_GE_PARAM_INVALID;
}
GELOGD("ProfilingReport of op[%s] model[%s] start.", op_name.c_str(), model_name.c_str());
std::vector<TaskDescInfo> task_desc_info;
uint32_t task_id = 0;
uint32_t stream_id = 0;
if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed.");
return ACL_ERROR_GE_PARAM_INVALID;
}

TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = model_name_;
tmp_task_desc_info.op_name = op_name;
tmp_task_desc_info.block_dim = block_dim;
tmp_task_desc_info.task_id = task_id;
tmp_task_desc_info.stream_id = stream_id;
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
task_desc_info.emplace_back(tmp_task_desc_info);

std::vector<ComputeGraphDescInfo> compute_graph_info;

auto &profiling_manager = ProfilingManager::Instance();
profiling_manager.ReportProfilingData(model_id_, task_desc_info, compute_graph_info,
!profiling_manager.IsAclApiMode());
return SUCCESS;
}
} // namespace

SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) {
@@ -169,6 +209,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
if (ret != SUCCESS) {
return ret;
}
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task));
}

return ret;
@@ -281,9 +322,17 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
}

if (op_task_->GetOpTaskType() == OP_TASK_TBE) {
return ExecuteTbeTask(input_desc, inputs, output_desc, outputs);
auto ret = ExecuteTbeTask(input_desc, inputs, output_desc, outputs);
if (ret == SUCCESS) {
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_));
}
return ret;
} else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) {
return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_);
auto aicpu_ret = op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_);
if (aicpu_ret == SUCCESS) {
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_));
}
return aicpu_ret;
} else {
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID,
"Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u",


+ 7
- 0
ge/single_op/single_op_model.cc View File

@@ -157,6 +157,7 @@ Status SingleOpModel::LoadAllNodes() {
auto ge_model = model_helper_.GetGeModel();
GE_CHECK_NOTNULL(ge_model);
Graph graph = ge_model->GetGraph();
model_id_ = ge_model->GetModelId();
auto compute_graph = GraphUtils::GetComputeGraph(graph);
if (compute_graph == nullptr) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] compute_graph is null", model_name_.c_str());
@@ -248,6 +249,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {

single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size());
ParseArgTable(tbe_task, single_op);
tbe_task->SetModelArgs(model_name_, model_id_);
single_op.tasks_.emplace_back(tbe_task);
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task");
@@ -258,6 +260,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
if (ret != SUCCESS) {
return ret;
}
task->SetModelArgs(model_name_, model_id_);
single_op.tasks_.emplace_back(task);
} else {
GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type());
@@ -273,6 +276,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
if (ret != SUCCESS) {
return ret;
}
aicpu_task->SetModelArgs(model_name_, model_id_);
single_op.tasks_.emplace_back(aicpu_task);
} else {
// skip
@@ -393,6 +397,7 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
GELOGD("Building TBE task");
TbeOpTask *tbe_task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task));
tbe_task->SetModelArgs(model_name_, model_id_);
single_op.op_task_.reset(tbe_task);
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task");
@@ -400,6 +405,7 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++;
GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id);
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id));
task->SetModelArgs(model_name_, model_id_);
single_op.op_task_.reset(task);
} else {
GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID,
@@ -446,6 +452,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
const TaskDef &copy_task_def = tasks[i];
GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex()));
}
aicpu_task->SetModelArgs(model_name_, model_id_);
single_op.op_task_.reset(aicpu_task);
} else {
// skip


+ 1
- 0
ge/single_op/single_op_model.h View File

@@ -77,6 +77,7 @@ class SingleOpModel {
void ParseArgTable(TbeOpTask *task, SingleOp &op);

std::string model_name_;
uint32_t model_id_ = 0;
const void *ori_model_data_;
uint32_t ori_model_size_;



+ 15
- 0
ge/single_op/task/op_task.cc View File

@@ -93,6 +93,21 @@ const vector<int64_t> &OpTask::GetWorkspaceSizes() const { return workspace_size

void OpTask::SetWorkspaceSizes(const vector<int64_t> &workspace_sizes) { workspace_sizes_ = workspace_sizes; }

void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) {
model_name_ = model_name;
model_id_ = model_id;
}

Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id,
uint32_t &block_dim) {
model_name = model_name_;
model_id = model_id_;
block_dim = block_dim_;
GE_CHECK_NOTNULL(op_desc_);
op_name = op_desc_->GetName();
return SUCCESS;
}

TbeOpTask::~TbeOpTask() {
if (sm_desc_ != nullptr) {
(void)rtMemFreeManaged(sm_desc_);


+ 5
- 2
ge/single_op/task/op_task.h View File

@@ -58,6 +58,8 @@ class OpTask {
virtual const void *GetIOAddr() const = 0;
const vector<int64_t> &GetWorkspaceSizes() const;
void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes);
void SetModelArgs(std::string model_name, uint32_t model_id);
Status GetProfilingArgs(std::string &model_name, std::string &op_name, uint32_t &model_id, uint32_t &block_dim);
const OpDescPtr &GetOpdesc() const {return op_desc_;}
Status OpenDump(rtStream_t stream);
void SetIoAddrsForDump(const vector<uint64_t> &io_addrs_for_dump) {
@@ -77,6 +79,9 @@ class OpTask {
DumpProperties dump_properties_;
DumpOp dump_op_;
OpDescPtr op_desc_;
std::string model_name_;
uint32_t model_id_ = 0;
uint32_t block_dim_ = 1;
std::vector<uint64_t> io_addrs_for_dump_;
};

@@ -115,7 +120,6 @@ class TbeOpTask : public OpTask {
const void *stub_func_ = nullptr;
std::unique_ptr<uint8_t[]> args_;
size_t arg_size_ = 0;
uint32_t block_dim_ = 1;
void *sm_desc_ = nullptr;
std::string stub_name_;

@@ -239,7 +243,6 @@ private:
std::string kernel_name_;
std::unique_ptr<uint8_t[]> args_;
size_t arg_size_ = 0;
uint32_t block_dim_ = 1;
void *sm_desc_ = nullptr;
void *io_addr_ = nullptr;
bool is_custom_ = false;


Loading…
Cancel
Save