Browse Source

!1899 Fix bug of atomic profiling.

Merge pull request !1899 from 赵之轩/my_dev4
tags/v1.5.1
i-robot Gitee 3 years ago
parent
commit
aa2a61c39e
8 changed files with 32 additions and 9 deletions
  1. +1
    -1
      ge/hybrid/node_executor/aicore/aicore_node_executor.cc
  2. +10
    -0
      ge/hybrid/node_executor/aicore/aicore_op_task.cc
  3. +4
    -0
      ge/hybrid/node_executor/aicore/aicore_op_task.h
  4. +1
    -1
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  5. +3
    -3
      ge/hybrid/node_executor/task_context.cc
  6. +2
    -2
      ge/hybrid/node_executor/task_context.h
  7. +1
    -1
      tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc
  8. +10
    -1
      tests/ut/ge/hybrid/ge_hybrid_unittest.cc

+ 1
- 1
ge/hybrid/node_executor/aicore/aicore_node_executor.cc View File

@@ -214,7 +214,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
context.SetTaskId(task_id);
context.SetStreamId(stream_id);
GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim());
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim(), (*it)->GetOpType());
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
}


+ 10
- 0
ge/hybrid/node_executor/aicore/aicore_op_task.cc View File

@@ -33,6 +33,7 @@ namespace {
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
constexpr char const *kAttrOpParamSize = "op_para_size";
constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size";
const string kAtomicOpType = "DynamicAtomicAddrClean";
std::atomic<std::uint64_t> log_id(0);
} // namespace

@@ -51,6 +52,7 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) {
}

Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) {
op_type_ = op_desc.GetType();
log_name_ = op_desc.GetName() + "_tvmbin";
log_id_ = log_id++;
auto op_desc_ptr = MakeShared<OpDesc>(op_desc);
@@ -538,6 +540,10 @@ const std::string &AiCoreOpTask::GetName() const {
return stub_name_;
}

const std::string &AiCoreOpTask::GetOpType() const {
return op_type_;
}

std::string AiCoreOpTask::GetKeyForOpParamSize() const {
return kAttrOpParamSize;
}
@@ -631,6 +637,10 @@ std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) co
return op_desc.GetName() + "_atomic_kernelname";
}

const std::string &AtomicAddrCleanOpTask::GetOpType() const {
return kAtomicOpType;
}

Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) {
GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str());
GE_CHK_STATUS_RET(optiling::OpAtomicCalculateV2(*node, tiling_info),


+ 4
- 0
ge/hybrid/node_executor/aicore/aicore_op_task.h View File

@@ -80,6 +80,8 @@ class AiCoreOpTask {

void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;};

virtual const std::string& GetOpType() const;

protected:
Status UpdateTilingInfo(TaskContext &context);
virtual std::string GetKeyForOpParamSize() const;
@@ -119,12 +121,14 @@ class AiCoreOpTask {
uint64_t log_id_ = 0;
std::string log_name_;
uint32_t offset_ = 0;
std::string op_type_;
};

class AtomicAddrCleanOpTask : public AiCoreOpTask {
public:
Status Init(const OpDesc &op_desc, const domi::TaskDef &task_def) override;
Status UpdateArgs(TaskContext &task_context) override;
const std::string& GetOpType() const override;

protected:
std::string GetKeyForOpParamSize() const override;


+ 1
- 1
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -207,7 +207,7 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(
context.SetTaskId(task_id);
context.SetStreamId(stream_id);
GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0);
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0, node_type_);
auto callback = [=, &context]() {
GELOGD("Node[%s] callback start.", node_name_.c_str());
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start");


+ 3
- 3
ge/hybrid/node_executor/task_context.cc View File

@@ -571,8 +571,8 @@ Status TaskContext::Synchronize() {
return execution_context_->Synchronize(GetStream());
}

Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
const std::string &task_type, uint32_t block_dim) {
Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type,
uint32_t block_dim, const std::string &op_type) {
if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
const NodeItem &node_item = GetNodeItem();
auto op_desc = node_item.GetOpDesc();
@@ -586,7 +586,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream
TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = dynamic_model_name;
tmp_task_desc_info.op_name = op_desc->GetName();
tmp_task_desc_info.op_type = op_desc->GetType();
tmp_task_desc_info.op_type = op_type;
tmp_task_desc_info.block_dim = block_dim;
tmp_task_desc_info.task_type = task_type;
tmp_task_desc_info.task_id = task_id;


+ 2
- 2
ge/hybrid/node_executor/task_context.h View File

@@ -118,8 +118,8 @@ class TaskContext {
void *handle_ = nullptr;

const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; }
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
const std::string &task_type, uint32_t block_dim);
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type,
uint32_t block_dim, const std::string &op_type);
void ClearProfilingTaskDescInfo() { task_desc_info.clear(); }

private:


+ 1
- 1
tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc View File

@@ -119,7 +119,7 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) {
uint32_t stream_id = 1;
std::string task_type = "rts";
uint32_t block_dim = 0;
node_state->GetTaskContext()->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim);
node_state->GetTaskContext()->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim, op_desc->GetType());

ASSERT_TRUE(node_state->GetTaskContext() != nullptr);



+ 10
- 1
tests/ut/ge/hybrid/ge_hybrid_unittest.cc View File

@@ -102,7 +102,7 @@ TEST_F(UtestGeHybrid, aicore_op_task_init_success) {
op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
std::string kernel_name("kernel/Add");
AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS);
ASSERT_EQ(aicore_task->Init(*op_desc.get(), task_def), SUCCESS);
rtStream_t stream = nullptr;
rtStreamCreate(&stream, 0);
ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
@@ -678,6 +678,15 @@ TEST_F(UtestGeHybrid, test_key_for_kernel_bin) {
EXPECT_EQ(atomic_task->GetKeyForKernelName(op_desc), "Sum_atomic_kernelname");
}

TEST_F(UtestGeHybrid, test_op_type) {
auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
aicore_task->op_type_ = "Add";
EXPECT_EQ(aicore_task->GetOpType(), "Add");

auto atomic_task = std::unique_ptr<hybrid::AtomicAddrCleanOpTask>(new(std::nothrow)hybrid::AtomicAddrCleanOpTask());
EXPECT_EQ(atomic_task->GetOpType(), "DynamicAtomicAddrClean");
}

TEST_F(UtestGeHybrid, TestParseDependentInputNodesForHccl) {
NodeExecutorManager::GetInstance().engine_mapping_.emplace("ops_kernel_info_hccl",
NodeExecutorManager::ExecutorType::HCCL);


Loading…
Cancel
Save