diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc index 231655d1..356919f6 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc @@ -195,7 +195,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return RT_ERROR_TO_GE_STATUS(rt_ret);) SetIoAddrs(op_desc); - InitDumpTask(input_output_addr, op_desc); + InitDumpFlag(op_desc); + InitDumpArgs(input_output_addr, op_desc); GELOGI("KernelExTaskInfo knonw node Init Success."); return SUCCESS; } @@ -237,7 +238,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, ret:0x%X, size:%lu", rt_ret, addrs_size); return RT_ERROR_TO_GE_STATUS(rt_ret);) - InitDumpTask(input_output_addr_, op_desc); + InitDumpFlag(op_desc); + InitDumpArgs(input_output_addr_, op_desc); } uint64_t input_output_addr = static_cast(reinterpret_cast(input_output_addr_)); @@ -269,10 +271,16 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return SUCCESS; } -void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { +void KernelExTaskInfo::InitDumpFlag(const OpDescPtr &op_desc) { if (davinci_model_->OpNeedDump(op_desc->GetName())) { - GELOGD("Op %s need dump in kernel ex task info", op_desc->GetName().c_str()); + GELOGD("Op %s need init dump flag in kernel ex task info", op_desc->GetName().c_str()); dump_flag_ = RT_KERNEL_DUMPFLAG; + } +} + +void KernelExTaskInfo::InitDumpArgs(void *addr, const OpDescPtr &op_desc) { + if (davinci_model_->OpNeedDump(op_desc->GetName())) { + GELOGD("Op %s need dump in kernel ex task info", op_desc->GetName().c_str()); dump_args_ = addr; } if (davinci_model_->GetOpDugReg()) { diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h index bcc17168..1b77b715 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h @@ -61,7 +61,8 @@ class KernelExTaskInfo : public TaskInfo { Status CopyTaskInfo(const domi::KernelExDef &kernel_def, const RuntimeParam &rts_param, const OpDescPtr &op_desc); void SetIoAddrs(const OpDescPtr &op_desc); - void InitDumpTask(void *addr, const OpDescPtr &op_desc); + void InitDumpFlag(const OpDescPtr &op_desc); + void InitDumpArgs(void *addr, const OpDescPtr &op_desc); Status InitTaskExtInfo(const std::string &ext_info, const OpDescPtr &op_desc); uint32_t task_id_; diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc index c72bfeef..d69d0a8b 100755 --- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc @@ -129,6 +129,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci ctx_.opIndex2[i] = context.origin_op_index(i); } ctx_.opCount = context.origin_op_index_size(); + InitDumpFlag(); if (kernel_type_ == ccKernelType::TE) { ctx_.opIndex = context.op_index(); uint16_t *args_offset_tmp = reinterpret_cast(const_cast(context.args_offset().data())); @@ -660,7 +661,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne if (davinci_model_->IsKnownNode()) { args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_) : davinci_model_->GetCurrentArgsAddr(args_offset_); - InitDumpTask(offset); + InitDumpArgs(offset); return SUCCESS; } @@ -726,7 +727,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne return FAILED; } skt_dump_args_ = static_cast(args_) + offset; - InitDumpTask(offset); + InitDumpArgs(offset); vector virtual_io_addrs; // use virtual address for zero copy key. virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); @@ -1022,7 +1023,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k if (davinci_model_->IsKnownNode()) { args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); - InitDumpTask(sizeof(aicpu::AicpuParamHead)); + InitDumpArgs(sizeof(aicpu::AicpuParamHead)); return SUCCESS; } const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); @@ -1063,7 +1064,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } - InitDumpTask(sizeof(aicpu::AicpuParamHead)); + InitDumpArgs(sizeof(aicpu::AicpuParamHead)); if (kernel_type_ == ccKernelType::CUST_AI_CPU) { dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; @@ -1074,14 +1075,20 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k return SUCCESS; } -void KernelTaskInfo::InitDumpTask(uint32_t offset) { +void KernelTaskInfo::InitDumpFlag() { if (davinci_model_->OpNeedDump(op_desc_->GetName())) { - GELOGD("Op %s need dump in task info", op_desc_->GetName().c_str()); + GELOGD("Op %s init dump flag", op_desc_->GetName().c_str()); if (IsL1FusionOp(op_desc_)) { dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; } else { dump_flag_ = RT_KERNEL_DUMPFLAG; } + } +} + +void KernelTaskInfo::InitDumpArgs(uint32_t offset) { + if (davinci_model_->OpNeedDump(op_desc_->GetName())) { + GELOGD("Op %s need dump in task info", op_desc_->GetName().c_str()); dump_args_ = static_cast(args_) + offset; } if (davinci_model_->GetOpDugReg()) { diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.h b/ge/graph/load/model_manager/task_info/kernel_task_info.h index 79347255..d9dd30bb 100644 --- a/ge/graph/load/model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.h @@ -128,7 +128,8 @@ class KernelTaskInfo : public TaskInfo { Status SuperKernelDistribute(); bool IsL1FusionOp(const OpDescPtr &op_desc); void SetIoAddrs(const OpDescPtr &op_desc); - void InitDumpTask(uint32_t offset); + void InitDumpFlag(); + void InitDumpArgs(uint32_t offset); void SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model); void SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model); Status CopyNoncontinuousArgs(uint16_t offset); diff --git a/ge/graph/load/model_manager/task_info/task_info.cc b/ge/graph/load/model_manager/task_info/task_info.cc index fad18d37..9f82efad 100755 --- a/ge/graph/load/model_manager/task_info/task_info.cc +++ b/ge/graph/load/model_manager/task_info/task_info.cc @@ -26,7 +26,7 @@ Status TaskInfo::SetStream(uint32_t stream_id, const std::vector &st stream_ = stream_list[stream_id]; } else { REPORT_INNER_ERROR("E19999", "stream_id:%u >= stream_list.size(): %zu, check invalid", - stream_id, stream_list.size()); + stream_id, stream_list.size()); GELOGE(FAILED, "[Check][Param] index:%u >= stream_list.size():%zu.", stream_id, stream_list.size()); return FAILED; } diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index b82d1034..6cb57e6b 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -335,9 +335,9 @@ Status DeleteIdentityInsertByAdapter(ComputeGraphPtr &graph) { GE_IF_BOOL_EXEC(peer_in_anchor == nullptr, continue); auto dst_node = peer_in_anchor->GetOwnerNode(); GE_IF_BOOL_EXEC(dst_node == nullptr, continue); - if (dst_node->GetType() == IDENTITY) { + if (dst_node->GetType() == IDENTITY && dst_node->GetAllOutDataAnchors().empty()) { GELOGI("Need to remove %s.", dst_node->GetName().c_str()); - if (ge::GraphUtils::RemoveNodeWithoutRelink(graph, dst_node) != GRAPH_SUCCESS) { + if (GraphUtils::RemoveNodeWithoutRelink(graph, dst_node) != GRAPH_SUCCESS) { REPORT_CALL_ERROR("E19999", "Remove node:%s(%s) from graph:%s failed", dst_node->GetName().c_str(), dst_node->GetType().c_str(), graph->GetName().c_str()); GELOGE(FAILED, "Remove Identity node %s failed.", dst_node->GetName().c_str()); diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc index 19b8aeab..bbe29007 100644 --- a/tests/ut/ge/graph/ge_executor_unittest.cc +++ b/tests/ut/ge/graph/ge_executor_unittest.cc @@ -157,7 +157,7 @@ TEST_F(UtestGeExecutor, InitFeatureMapAndP2PMem_failed) { EXPECT_EQ(model.InitFeatureMapAndP2PMem(nullptr, 0), PARAM_INVALID); } -TEST_F(UtestGeExecutor, kernel_InitDumpTask) { +TEST_F(UtestGeExecutor, kernel_InitDumpArgs) { DavinciModel model(0, g_label_call_back); model.om_name_ = "testom"; model.name_ = "test"; @@ -173,10 +173,10 @@ TEST_F(UtestGeExecutor, kernel_InitDumpTask) { KernelTaskInfo kernel_task_info; kernel_task_info.davinci_model_ = &model; kernel_task_info.op_desc_ = op_desc; - kernel_task_info.InitDumpTask(0); + kernel_task_info.InitDumpArgs(0); } -TEST_F(UtestGeExecutor, kernel_ex_InitDumpTask) { +TEST_F(UtestGeExecutor, kernel_ex_InitDumpArgs) { DavinciModel model(0, g_label_call_back); model.om_name_ = "testom"; model.name_ = "test"; @@ -191,7 +191,25 @@ TEST_F(UtestGeExecutor, kernel_ex_InitDumpTask) { KernelExTaskInfo kernel_ex_task_info; kernel_ex_task_info.davinci_model_ = &model; - kernel_ex_task_info.InitDumpTask(nullptr, op_desc); + kernel_ex_task_info.InitDumpArgs(nullptr, op_desc); +} + +TEST_F(UtestGeExecutor, kernel_ex_InitDumpFlag) { + DavinciModel model(0, g_label_call_back); + model.om_name_ = "testom"; + model.name_ = "test"; + OpDescPtr op_desc = CreateOpDesc("test", "test"); + + std::map> model_dump_properties_map; + std::set s; + model_dump_properties_map[DUMP_ALL_MODEL] = s; + DumpProperties dp; + dp.model_dump_properties_map_ = model_dump_properties_map; + model.SetDumpProperties(dp); + + KernelExTaskInfo kernel_ex_task_info; + kernel_ex_task_info.davinci_model_ = &model; + kernel_ex_task_info.InitDumpFlag(op_desc); } TEST_F(UtestGeExecutor, execute_graph_with_stream) {