Browse Source

profiling data add iter num

tags/v1.2.0
zhengyuanhua 3 years ago
parent
commit
60fd661c3e
13 changed files with 81 additions and 19 deletions
  1. +6
    -2
      ge/common/profiling/profiling_manager.cc
  2. +3
    -3
      ge/graph/load/new_model_manager/davinci_model.cc
  3. +11
    -8
      ge/hybrid/executor/worker/execution_engine.cc
  4. +4
    -1
      ge/hybrid/model/hybrid_model_builder.cc
  5. +1
    -0
      ge/hybrid/model/node_item.h
  6. +10
    -0
      ge/hybrid/node_executor/aicore/aicore_node_executor.cc
  7. +11
    -0
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  8. +16
    -0
      ge/hybrid/node_executor/task_context.cc
  9. +8
    -0
      ge/hybrid/node_executor/task_context.h
  10. +7
    -3
      ge/single_op/single_op.cc
  11. +2
    -0
      inc/framework/common/ge_types.h
  12. +1
    -1
      metadef
  13. +1
    -1
      parser

+ 6
- 2
ge/common/profiling/profiling_manager.cc View File

@@ -212,12 +212,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
uint32_t block_dim = task.block_dim;
uint32_t task_id = task.task_id;
uint32_t stream_id = task.stream_id;
std::string shape_type = task.shape_type;
int64_t cur_iter_num = task.cur_iter_num;
data = model_name.append(" ")
.append(op_name).append(" ")
.append(std::to_string(block_dim).append(" ")
.append(std::to_string(block_dim)).append(" ")
.append(std::to_string(task_id)).append(" ")
.append(std::to_string(stream_id)).append(" ")
.append(std::to_string(model_id)).append("\n"));
.append(std::to_string(model_id)).append(" ")
.append(shape_type).append(" ")
.append(std::to_string(cur_iter_num)).append("\n");

ReporterData reporter_data{};
reporter_data.deviceId = device_id;


+ 3
- 3
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -2993,9 +2993,7 @@ Status DavinciModel::DistributeTask() {
}

auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL)
&& (task_type != RT_MODEL_TASK_KERNEL_EX)
&& (task_type != RT_MODEL_TASK_HCCL);
bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) && (task_type != RT_MODEL_TASK_KERNEL_EX);
GE_IF_BOOL_EXEC(no_need_profiling, continue);

SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId());
@@ -3010,6 +3008,8 @@ Status DavinciModel::DistributeTask() {
task_desc_info.block_dim = task_def.kernel().block_dim();
task_desc_info.task_id = task->GetTaskID();
task_desc_info.stream_id = task->GetStreamId();
task_desc_info.shape_type = "static";
task_desc_info.cur_iter_num = 0;
task_desc_info_.emplace_back(task_desc_info);
if (flag) {
if (task->GetSktTaskID() != 0xFFFFFFFF) {


+ 11
- 8
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -151,18 +151,19 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(model);

// only report aicpu and aicore node
bool is_profiling_report = context_->GetNodeItem().is_profiling_report;
if (!is_profiling_report) {
GELOGD("Node[%s] is not aicore or aicpu, and no need to report data.", node->GetName().c_str());
return SUCCESS;
}

GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str());
auto op_desc = node->GetOpDesc();
std::string op_name = op_desc->GetName();
std::string dynamic_model_name = model->GetModelName();

uint32_t task_id = 0;
uint32_t stream_id = 0;
if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) {
GELOGE(PARAM_INVALID, "Get task_id and stream_id failed.");
return PARAM_INVALID;
}

uint32_t task_id = context_->GetTaskId();
uint32_t stream_id = context_->GetStreamId();
TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = dynamic_model_name;
tmp_task_desc_info.op_name = op_name;
@@ -174,6 +175,8 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
}
tmp_task_desc_info.task_id = task_id;
tmp_task_desc_info.stream_id = stream_id;
tmp_task_desc_info.shape_type = "dynamic";
tmp_task_desc_info.cur_iter_num = graph_context_->iteration;
GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]",
node->GetName().c_str(), task_id, stream_id);
task_desc_info.emplace_back(tmp_task_desc_info);


+ 4
- 1
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -226,7 +226,10 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n
new_node->node_id = node_index;
new_node->op_desc->SetId(node_index);
node_index += 1;

NodeExecutorManager::ExecutorType executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node);
new_node->is_profiling_report = (executor_type == NodeExecutorManager::ExecutorType::AICORE) ||
(executor_type == NodeExecutorManager::ExecutorType::AICPU_TF) ||
(executor_type == NodeExecutorManager::ExecutorType::AICPU_CUSTOM);
*node_item = new_node.get();
node_items[node] = std::move(new_node);
return SUCCESS;


+ 1
- 0
ge/hybrid/model/node_item.h View File

@@ -99,6 +99,7 @@ struct NodeItem {
std::map<int, int> reuse_inputs;
std::map<int, int> reuse_outputs;
int num_static_input_shapes = 0;
bool is_profiling_report = false;

private:
explicit NodeItem(NodePtr node);


+ 10
- 0
ge/hybrid/node_executor/aicore/aicore_node_executor.cc View File

@@ -165,6 +165,16 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
}
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start");
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream()));
uint32_t task_id = 0;
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return rt_ret;
}
context.SetTaskId(task_id);
context.SetStreamId(stream_id);
GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
}


+ 11
- 0
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -189,6 +189,17 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(

GE_CHK_STATUS_RET(LaunchTask(context));

uint32_t task_id = 0;
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return rt_ret;
}
context.SetTaskId(task_id);
context.SetStreamId(stream_id);
GELOGD("AiCpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);

auto callback = [=, &context]() {
GELOGD("Node[%s] callback start.", node_name_.c_str());
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start");


+ 16
- 0
ge/hybrid/node_executor/task_context.cc View File

@@ -319,6 +319,22 @@ void TaskContext::SetStatus(Status status) {
}
}

uint32_t TaskContext::GetTaskId() const {
return task_id_;
}

void TaskContext::SetTaskId(uint32_t task_id) {
task_id_ = task_id;
}

uint32_t TaskContext::GetStreamId() const {
return stream_id_;
}

void TaskContext::SetStreamId(uint32_t stream_id) {
stream_id_ = stream_id;
}

Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) {
GE_CHECK_NOTNULL(buffer);
if (ori_addr == nullptr) {


+ 8
- 0
ge/hybrid/node_executor/task_context.h View File

@@ -96,6 +96,12 @@ class TaskContext {

void SetStatus(Status status);

uint32_t GetTaskId() const;
void SetTaskId(uint32_t task_id);

uint32_t GetStreamId() const;
void SetStreamId(uint32_t stream_id);

bool IsForceInferShape() const;
void SetForceInferShape(bool force_infer_shape);
void *handle_ = nullptr;
@@ -117,6 +123,8 @@ class TaskContext {
Status status_ = SUCCESS;
std::vector<void *> workspaces_;
uint64_t iteration_ = 0;
uint32_t task_id_= 0;
uint32_t stream_id_ = 0;
};
} // namespace hybrid
} // namespace ge


+ 7
- 3
ge/single_op/single_op.cc View File

@@ -32,13 +32,15 @@ namespace ge {
namespace {
const size_t kDataMemAlignSize = 32;
const size_t kDataMemAlignUnit = 2;
const string kShapeTypeDynamic = "dynamic";
const string kShapeTypeStatic = "static";

size_t GetAlignedSize(size_t size) {
size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize;
return aligned_size;
}

Status ProfilingTaskInfo(OpTask *op_task) {
Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) {
if (!ProfilingManager::Instance().ProfilingModelLoadOn()) {
return SUCCESS;
}
@@ -66,6 +68,8 @@ Status ProfilingTaskInfo(OpTask *op_task) {
tmp_task_desc_info.block_dim = block_dim;
tmp_task_desc_info.task_id = task_id;
tmp_task_desc_info.stream_id = stream_id;
tmp_task_desc_info.shape_type = shape_type;
tmp_task_desc_info.cur_iter_num = 0;
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
task_desc_info.emplace_back(tmp_task_desc_info);

@@ -193,7 +197,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
if (ret != SUCCESS) {
return ret;
}
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task));
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task, kShapeTypeStatic));
}

return ret;
@@ -255,7 +259,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
std::lock_guard<std::mutex> lk(*stream_mutex_);

GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get()));
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic));
return SUCCESS;
}
} // namespace ge

+ 2
- 0
inc/framework/common/ge_types.h View File

@@ -248,6 +248,8 @@ struct TaskDescInfo {
uint32_t block_dim;
uint32_t task_id;
uint32_t stream_id;
std::string shape_type;
int64_t cur_iter_num;
};

// Profiling info of graph


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit 191b7ad10e99d0b8d800ce85dae3ef7a2a146870
Subproject commit 2f774bcd66b0d4b8d65b629f50148e9dd2248403

+ 1
- 1
parser

@@ -1 +1 @@
Subproject commit 8317ba6e886da54f4f161bf4a7fc40de27d6ce3c
Subproject commit 89e2455f653807f7bb3177b9b5eb096100a600db

Loading…
Cancel
Save