diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index bef3a558..d7e67bc1 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -124,6 +124,7 @@ Status SingleOp::UpdateArgs(const std::vector &inputs, const std::ve size_t io_addr_num = args_.size(); if (task->GetOpTaskType() == OP_TASK_AICPU) { GELOGD("Update aicpu_TF task args"); + task->SetIoAddrsForDump(args_); auto *dst_io_addr = const_cast(reinterpret_cast(task->GetIOAddr())); GE_CHECK_NOTNULL(dst_io_addr); auto rt_ret = rtMemcpyAsync(dst_io_addr, @@ -170,11 +171,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c if (ret != SUCCESS) { return ret; } - ret = task->OpenDump(args_, stream_); - if (ret != SUCCESS) { - GELOGE(ret, "Open dump failed"); - return ret; - } } return ret; diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 8ddcba58..e76da6cf 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -39,26 +39,27 @@ void FreeHbm(void *var) { (void)rtFree(var); } } -} +} // namespace -Status OpTask::OpenDump(const std::vector &io_addr, rtStream_t stream) { +Status OpTask::OpenDump(rtStream_t stream) { if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) { GELOGI("Dump is open in single op,start to set dump info"); std::vector input_addrs; std::vector output_adds; auto input_size = op_desc_->GetInputsSize(); auto output_size = op_desc_->GetOutputsSize(); - auto all_size = io_addr.size(); + auto all_size = io_addrs_for_dump_.size(); if (input_size + output_size != all_size) { - GELOGE(FAILED, "io_addr size is not equal input and output size"); + GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", all_size, + input_size + output_size); return FAILED; } for (size_t i = 0; i < input_size; i++) { - uint64_t input_addr = static_cast(io_addr[i]); + uint64_t input_addr = io_addrs_for_dump_[i]; input_addrs.emplace_back(input_addr); } for (size_t j = 0; j < output_size; j++) { - uint64_t output_addr = static_cast(io_addr[input_size + j]); + uint64_t output_addr = io_addrs_for_dump_[input_size + j]; output_adds.emplace_back(output_addr); } dump_op_.SetDumpInfo(DumpManager::GetInstance().GetDumpProperties(), op_desc_, input_addrs, output_adds, stream); @@ -126,6 +127,17 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { } GELOGI("[TASK_INFO] %s", this->stub_name_.c_str()); + size_t input_size = op_desc_->GetInputsSize(); + size_t output_size = op_desc_->GetOutputsSize(); + uint64_t *io_addr = reinterpret_cast(args_.get()); + std::vector io_addrs(io_addr, io_addr + input_size + output_size); + SetIoAddrsForDump(io_addrs); + auto status = OpenDump(stream); + if (status != SUCCESS) { + GELOGE(status, "Open dump failed in the tbe single op %s", this->stub_name_.c_str()); + return status; + } + return SUCCESS; } @@ -387,6 +399,12 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { } GELOGI("[TASK_INFO] is %s", this->task_info_.c_str()); + auto status = OpenDump(stream); + if (status != SUCCESS) { + GELOGE(status, "Open dump failed in aicpu single op %s", this->op_type_.c_str()); + return status; + } + GELOGD("Done launch kernel successfully. task = %s", this->op_type_.c_str()); return SUCCESS; } @@ -680,6 +698,17 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { } GELOGD("Invoke rtCpuKernelLaunch succeeded"); + size_t input_size = op_desc_->GetInputsSize(); + size_t output_size = op_desc_->GetOutputsSize(); + uint64_t *io_addr = reinterpret_cast(io_addr_); + std::vector io_addrs (io_addr, io_addr + input_size + output_size); + SetIoAddrsForDump(io_addrs); + auto status = OpenDump(stream); + if (status != SUCCESS) { + GELOGE(status, "Open dump failed in the aicpucc single op %s", this->kernel_name_.c_str()); + return status; + } + return SUCCESS; } diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index b9165641..1b4d9c02 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -59,7 +59,10 @@ class OpTask { const vector &GetWorkspaceSizes() const; void SetWorkspaceSizes(const vector &workspace_sizes); const OpDescPtr &GetOpdesc() const {return op_desc_;} - Status OpenDump(const std::vector &io_addr, rtStream_t stream); + Status OpenDump(rtStream_t stream); + void SetIoAddrsForDump(const vector &io_addrs_for_dump) { + io_addrs_for_dump_ = io_addrs_for_dump; + } virtual Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, std::vector &output_desc, @@ -74,6 +77,7 @@ class OpTask { DumpProperties dump_properties_; DumpOp dump_op_; OpDescPtr op_desc_; + std::vector io_addrs_for_dump_; }; class TbeOpTask : public OpTask { @@ -180,9 +184,11 @@ class AiCpuTask : public AiCpuBaseTask { friend class AiCpuTaskBuilder; void *workspace_addr_ = nullptr; std::string task_info_; + // device addr void *args_ = nullptr; size_t arg_size_ = 0; std::string op_type_; + // device addr void *io_addr_ = nullptr; bool dynamic_flag_ = false;