Browse Source

!968 profiling task desc info

From: @zhengyuanhua
Reviewed-by: @xchu42,@ji_chen
Signed-off-by: @ji_chen
tags/v1.2.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
7bb0eeb614
15 changed files with 142 additions and 87 deletions
  1. +3
    -1
      ge/common/profiling/profiling_manager.cc
  2. +63
    -36
      ge/graph/load/new_model_manager/davinci_model.cc
  3. +3
    -0
      ge/graph/load/new_model_manager/davinci_model.h
  4. +3
    -22
      ge/hybrid/executor/worker/execution_engine.cc
  5. +2
    -10
      ge/hybrid/node_executor/aicore/aicore_node_executor.cc
  6. +2
    -0
      ge/hybrid/node_executor/aicore/aicore_op_task.h
  7. +2
    -10
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  8. +38
    -0
      ge/hybrid/node_executor/task_context.cc
  9. +6
    -0
      ge/hybrid/node_executor/task_context.h
  10. +1
    -0
      ge/single_op/single_op.cc
  11. +6
    -0
      ge/single_op/task/op_task.cc
  12. +4
    -0
      ge/single_op/task/op_task.h
  13. +7
    -6
      inc/framework/common/ge_types.h
  14. +1
    -1
      metadef
  15. +1
    -1
      parser

+ 3
- 1
ge/common/profiling/profiling_manager.cc View File

@@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
uint32_t stream_id = task.stream_id;
std::string shape_type = task.shape_type;
int64_t cur_iter_num = task.cur_iter_num;
uint32_t task_type = task.task_type;
data = model_name.append(" ")
.append(op_name).append(" ")
.append(std::to_string(block_dim)).append(" ")
@@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
.append(std::to_string(stream_id)).append(" ")
.append(std::to_string(model_id)).append(" ")
.append(shape_type).append(" ")
.append(std::to_string(cur_iter_num)).append("\n");
.append(std::to_string(cur_iter_num)).append(" ")
.append(std::to_string(task_type)).append("\n");

ReporterData reporter_data{};
reporter_data.deviceId = device_id;


+ 63
- 36
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -3065,6 +3065,65 @@ Status DavinciModel::MallocKnownArgs() {
return SUCCESS;
}

void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task,
const domi::TaskDef &task_def, size_t task_index) {
task_desc_info_.clear();
bool flag = GetL1FusionEnableOption();
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0;
if (env_flag != 0) {
flag = true;
}

TaskDescInfo task_desc_info;
if (!om_name_.empty()) {
task_desc_info.model_name = om_name_;
} else {
task_desc_info.model_name = name_;
}
task_desc_info.op_name = op->GetName();
task_desc_info.block_dim = task_def.kernel().block_dim();
task_desc_info.task_id = task->GetTaskID();
task_desc_info.stream_id = task->GetStreamId();
task_desc_info.shape_type = "static";
task_desc_info.cur_iter_num = 0;
// task type
task_desc_info.task_type = kTaskTypeInvalid;
auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (model_task_type == RT_MODEL_TASK_KERNEL) {
const domi::KernelDef &kernel_def = task_def.kernel();
const auto &context = kernel_def.context();
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type == ccKernelType::TE) {
task_desc_info.task_type = kTaskTypeAicore;
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
task_desc_info.task_type = kTaskTypeAicpu;
} else {
GELOGD("Other kernel type: %u", context.kernel_type());
}
} else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) {
task_desc_info.task_type = kTaskTypeAicpu;
} else {
GELOGD("Skip task type: %d", static_cast<int>(model_task_type));
}
profiler_report_op_info_[task_desc_info.op_name] =
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
task_desc_info_.emplace_back(task_desc_info);
if (flag) {
if (task->GetSktTaskID() != 0xFFFFFFFF) {
TaskDescInfo task_desc_info;
string op_name = "super_kernel_" + to_string(task_index);
task_desc_info.op_name = op_name;
task_desc_info.task_id = task->GetSktTaskID();
profiler_report_op_info_[task_desc_info.op_name] =
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
task_desc_info_.emplace_back(task_desc_info);
}
}
return;
}

Status DavinciModel::DistributeTask() {
GELOGI("do Distribute.");
for (auto &task : cpu_task_list_) {
@@ -3075,19 +3134,11 @@ Status DavinciModel::DistributeTask() {
GE_CHK_STATUS_RET(task->Distribute());
}

task_desc_info_.clear();
bool flag = GetL1FusionEnableOption();
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0;
if (env_flag != 0) {
flag = true;
}

const auto &model_task_def = ge_model_->GetModelTaskDefPtr();
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
auto &task_def = model_task_def->task(task_index);
auto &task = task_list_.at(task_index);
GE_CHECK_NOTNULL(task);
GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index);
// for data dump
auto op_index = std::max(task_def.kernel().context().op_index(),
@@ -3107,33 +3158,9 @@ Status DavinciModel::DistributeTask() {
GE_IF_BOOL_EXEC(no_need_profiling, continue);

SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId());
// Load task info for profiling
TaskDescInfo task_desc_info;
if (!om_name_.empty()) {
task_desc_info.model_name = om_name_;
} else {
task_desc_info.model_name = name_;
}
task_desc_info.op_name = op->GetName();
task_desc_info.block_dim = task_def.kernel().block_dim();
task_desc_info.task_id = task->GetTaskID();
task_desc_info.stream_id = task->GetStreamId();
task_desc_info.shape_type = "static";
task_desc_info.cur_iter_num = 0;
profiler_report_op_info_[task_desc_info.op_name] =
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
task_desc_info_.emplace_back(task_desc_info);
if (flag) {
if (task->GetSktTaskID() != 0xFFFFFFFF) {
TaskDescInfo task_desc_info;
string op_name = "super_kernel_" + to_string(task_index);
task_desc_info.op_name = op_name;
task_desc_info.task_id = task->GetSktTaskID();
profiler_report_op_info_[task_desc_info.op_name] =
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
task_desc_info_.emplace_back(task_desc_info);
}
}

// save task info for profiling
SaveProfilingTaskDescInfo(op, task, task_def, task_index);
}
// launch dump kernel to aicpu
GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed.");


+ 3
- 0
ge/graph/load/new_model_manager/davinci_model.h View File

@@ -623,6 +623,9 @@ class DavinciModel {

Status DistributeTask();

void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task,
const domi::TaskDef &task_def, size_t task_index);

uint8_t *MallocFeatureMapMem(size_t data_size);

uint8_t *MallocWeightsMem(size_t weights_size);


+ 3
- 22
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -159,27 +159,9 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
}

GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str());
auto op_desc = node->GetOpDesc();
std::string op_name = op_desc->GetName();
std::string dynamic_model_name = model->GetModelName();
uint32_t task_id = context_->GetTaskId();
uint32_t stream_id = context_->GetStreamId();
TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = dynamic_model_name;
tmp_task_desc_info.op_name = op_name;
tmp_task_desc_info.block_dim = 0;
auto task_defs = model->GetTaskDefs(node);
if (task_defs != nullptr && (*task_defs).size() > 0) {
const auto &task_def = (*task_defs)[0];
tmp_task_desc_info.block_dim = task_def.kernel().block_dim();
}
tmp_task_desc_info.task_id = task_id;
tmp_task_desc_info.stream_id = stream_id;
tmp_task_desc_info.shape_type = "dynamic";
tmp_task_desc_info.cur_iter_num = graph_context_->iteration;
GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]",
node->GetName().c_str(), task_id, stream_id);
task_desc_info.emplace_back(tmp_task_desc_info);
task_desc_info = context_->GetProfilingTaskDescInfo();
context_->ClearProfilingTaskDescInfo();

return SUCCESS;
}

@@ -247,7 +229,6 @@ Status NodeDoneCallback::ProfilingReport() {

GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str());
std::vector<TaskDescInfo> task_desc_info;
TaskDescInfo tmp_task_desc_info;
auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info);
if (profiling_ret != RT_ERROR_NONE) {
GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str());


+ 2
- 10
ge/hybrid/node_executor/aicore/aicore_node_executor.cc View File

@@ -182,16 +182,8 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
}
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start");
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream()));
uint32_t task_id = 0;
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return rt_ret;
}
context.SetTaskId(task_id);
context.SetStreamId(stream_id);
GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
// save profiling data
(void)context.SaveProfilingTaskDescInfo(kTaskTypeAicore, (*it)->GetBlockDim());
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
}


+ 2
- 0
ge/hybrid/node_executor/aicore/aicore_op_task.h View File

@@ -48,6 +48,8 @@ class AiCoreOpTask {

bool GetClearAtomic() const {return clear_atomic_;}

uint32_t GetBlockDim() const {return block_dim_;}

protected:
Status UpdateTilingInfo(TaskContext &context);
virtual std::string GetKeyForOpParamSize() const;


+ 2
- 10
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -190,16 +190,8 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(

HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str());

uint32_t task_id = 0;
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return rt_ret;
}
context.SetTaskId(task_id);
context.SetStreamId(stream_id);
GELOGD("AiCpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
// save profiling data
(void)context.SaveProfilingTaskDescInfo(kTaskTypeAicpu, 0);

auto callback = [=, &context]() {
GELOGD("Node[%s] callback start.", node_name_.c_str());


+ 38
- 0
ge/hybrid/node_executor/task_context.cc View File

@@ -21,6 +21,7 @@
#include "graph/debug/ge_attr_define.h"
#include "hybrid/executor/hybrid_execution_context.h"
#include "hybrid/executor/subgraph_executor.h"
#include "common/profiling/profiling_manager.h"

namespace ge {
namespace hybrid {
@@ -498,5 +499,42 @@ bool TaskContext::NeedCallback() {
Status TaskContext::Synchronize() {
return execution_context_->Synchronize(GetStream());
}

Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim) {
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
const NodeItem &node_item = GetNodeItem();
auto op_desc = node_item.GetOpDesc();
GE_CHECK_NOTNULL(op_desc);

uint32_t task_id = 0;
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return rt_ret;
}
GELOGD("Node[%s] task_id: %u, stream_id: %u.", GetNodeName(), task_id, stream_id);

const GraphExecutionContext * graph_context = GetExecutionContext();
GE_CHECK_NOTNULL(graph_context);
const HybridModel *model = graph_context->model;
GE_CHECK_NOTNULL(model);

std::string op_name = op_desc->GetName();
std::string dynamic_model_name = model->GetModelName();
TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = dynamic_model_name;
tmp_task_desc_info.op_name = op_name;
tmp_task_desc_info.block_dim = block_dim;
tmp_task_desc_info.task_type = task_type;
tmp_task_desc_info.task_id = task_id;
tmp_task_desc_info.stream_id = stream_id;
tmp_task_desc_info.shape_type = "dynamic";
tmp_task_desc_info.cur_iter_num = iteration_;
task_desc_info.emplace_back(tmp_task_desc_info);
}

return SUCCESS;
}
} // namespace hybrid
} // namespace ge

+ 6
- 0
ge/hybrid/node_executor/task_context.h View File

@@ -22,6 +22,7 @@
#include <vector>
#include "common/properties_manager.h"
#include "external/ge/ge_api_error_codes.h"
#include "framework/common/ge_types.h"
#include "hybrid/common/tensor_value.h"
#include "hybrid/common/npu_memory_allocator.h"
#include "hybrid/executor/rt_callback_manager.h"
@@ -108,6 +109,10 @@ class TaskContext {
void SetForceInferShape(bool force_infer_shape);
void *handle_ = nullptr;

const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; }
Status SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim);
void ClearProfilingTaskDescInfo() { task_desc_info.clear(); }

private:
TaskContext(GraphExecutionContext *execution_context,
const NodeItem *node_item,
@@ -127,6 +132,7 @@ class TaskContext {
uint64_t iteration_ = 0;
uint32_t task_id_ = 0;
uint32_t stream_id_ = 0;
std::vector<TaskDescInfo> task_desc_info;
};
} // namespace hybrid
} // namespace ge


+ 1
- 0
ge/single_op/single_op.cc View File

@@ -70,6 +70,7 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) {
tmp_task_desc_info.stream_id = stream_id;
tmp_task_desc_info.shape_type = shape_type;
tmp_task_desc_info.cur_iter_num = 0;
tmp_task_desc_info.task_type = op_task->GetTaskType();
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
task_desc_info.emplace_back(tmp_task_desc_info);



+ 6
- 0
ge/single_op/task/op_task.cc View File

@@ -145,6 +145,8 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
return UNSUPPORTED;
}

uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; }

TbeOpTask::~TbeOpTask() {
if (sm_desc_ != nullptr) {
(void)rtMemFreeManaged(sm_desc_);
@@ -161,6 +163,8 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; }

const std::string &TbeOpTask::GetStubName() const { return stub_name_; }

uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }

Status TbeOpTask::LaunchKernel(rtStream_t stream) {
GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_);
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_);
@@ -802,6 +806,8 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam &param) {
return DoUpdateArgTable(param, false);
}

uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; }

void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data());
arg_count = io_addr_host_.size();


+ 4
- 0
ge/single_op/task/op_task.h View File

@@ -52,6 +52,7 @@ class OpTask {
std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &output_buffers,
rtStream_t stream);
virtual uint32_t GetTaskType() const;

protected:
Status DoUpdateArgTable(const SingleOpModelParam &param, bool keep_workspace);
@@ -85,6 +86,7 @@ class TbeOpTask : public OpTask {
size_t GetArgSize() const;
const std::string &GetStubName() const;
void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size);
uint32_t GetTaskType() const override;

private:
friend class SingleOpModel;
@@ -113,6 +115,8 @@ class AiCpuBaseTask : public OpTask {
~AiCpuBaseTask() override;
UnknowShapeOpType GetUnknownType() const { return unknown_type_; }
Status UpdateArgTable(const SingleOpModelParam &param) override;
uint32_t GetTaskType() const override;

protected:
Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
Status SetInputConst();


+ 7
- 6
inc/framework/common/ge_types.h View File

@@ -41,12 +41,7 @@ enum FrameworkType {
};

const std::map<std::string, std::string> kFwkTypeToStr = {
{"0", "Caffe"},
{"1", "MindSpore"},
{"3", "TensorFlow"},
{"4", "Android_NN"},
{"5", "Onnx"}
};
{"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}};

enum OpEngineType {
ENGINE_SYS = 0, // default engine
@@ -61,6 +56,11 @@ enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYN
const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";

// profiling data
const uint32_t kTaskTypeAicore = 0;
const uint32_t kTaskTypeAicpu = 1;
const uint32_t kTaskTypeInvalid = 0xFFFF;

// Data cache, including data address and length
struct DataBuffer {
public:
@@ -256,6 +256,7 @@ struct TaskDescInfo {
uint32_t stream_id;
std::string shape_type;
int64_t cur_iter_num;
uint32_t task_type;
};

// Profiling info of graph


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit dc6cceb67bc82b567bcbd6f415776644253e1467
Subproject commit b00c50c2a8c2ce06929b27f7b74185a950737ec8

+ 1
- 1
parser

@@ -1 +1 @@
Subproject commit 4e72aae41e78af1a19cd965da4a45cbd988b9a75
Subproject commit f0109a2c70981d74932bb38bb56722caff3323a5

Loading…
Cancel
Save