Browse Source

modify single op dump

tags/v1.1.0
zhou_chao1993 3 years ago
parent
commit
4982336db7
3 changed files with 43 additions and 12 deletions
  1. +1
    -5
      ge/single_op/single_op.cc
  2. +35
    -6
      ge/single_op/task/op_task.cc
  3. +7
    -1
      ge/single_op/task/op_task.h

+ 1
- 5
ge/single_op/single_op.cc View File

@@ -124,6 +124,7 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve
size_t io_addr_num = args_.size();
if (task->GetOpTaskType() == OP_TASK_AICPU) {
GELOGD("Update aicpu_TF task args");
task->SetIoAddrsForDump(args_);
auto *dst_io_addr = const_cast<uintptr_t *>(reinterpret_cast<const uintptr_t *>(task->GetIOAddr()));
GE_CHECK_NOTNULL(dst_io_addr);
auto rt_ret = rtMemcpyAsync(dst_io_addr,
@@ -170,11 +171,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
if (ret != SUCCESS) {
return ret;
}
ret = task->OpenDump(args_, stream_);
if (ret != SUCCESS) {
GELOGE(ret, "Open dump failed");
return ret;
}
}

return ret;


+ 35
- 6
ge/single_op/task/op_task.cc View File

@@ -39,26 +39,27 @@ void FreeHbm(void *var) {
(void)rtFree(var);
}
}
}
} // namespace

Status OpTask::OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream) {
Status OpTask::OpenDump(rtStream_t stream) {
if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) {
GELOGI("Dump is open in single op,start to set dump info");
std::vector<uint64_t> input_addrs;
std::vector<uint64_t> output_adds;
auto input_size = op_desc_->GetInputsSize();
auto output_size = op_desc_->GetOutputsSize();
auto all_size = io_addr.size();
auto all_size = io_addrs_for_dump_.size();
if (input_size + output_size != all_size) {
GELOGE(FAILED, "io_addr size is not equal input and output size");
GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", all_size,
input_size + output_size);
return FAILED;
}
for (size_t i = 0; i < input_size; i++) {
uint64_t input_addr = static_cast<uint64_t>(io_addr[i]);
uint64_t input_addr = io_addrs_for_dump_[i];
input_addrs.emplace_back(input_addr);
}
for (size_t j = 0; j < output_size; j++) {
uint64_t output_addr = static_cast<uint64_t>(io_addr[input_size + j]);
uint64_t output_addr = io_addrs_for_dump_[input_size + j];
output_adds.emplace_back(output_addr);
}
dump_op_.SetDumpInfo(DumpManager::GetInstance().GetDumpProperties(), op_desc_, input_addrs, output_adds, stream);
@@ -126,6 +127,17 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) {
}
GELOGI("[TASK_INFO] %s", this->stub_name_.c_str());

size_t input_size = op_desc_->GetInputsSize();
size_t output_size = op_desc_->GetOutputsSize();
uint64_t *io_addr = reinterpret_cast<uint64_t *>(args_.get());
std::vector<uint64_t> io_addrs(io_addr, io_addr + input_size + output_size);
SetIoAddrsForDump(io_addrs);
auto status = OpenDump(stream);
if (status != SUCCESS) {
GELOGE(status, "Open dump failed in the tbe single op %s", this->stub_name_.c_str());
return status;
}

return SUCCESS;
}

@@ -387,6 +399,12 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) {
}
GELOGI("[TASK_INFO] is %s", this->task_info_.c_str());

auto status = OpenDump(stream);
if (status != SUCCESS) {
GELOGE(status, "Open dump failed in aicpu single op %s", this->op_type_.c_str());
return status;
}

GELOGD("Done launch kernel successfully. task = %s", this->op_type_.c_str());
return SUCCESS;
}
@@ -680,6 +698,17 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) {
}
GELOGD("Invoke rtCpuKernelLaunch succeeded");

size_t input_size = op_desc_->GetInputsSize();
size_t output_size = op_desc_->GetOutputsSize();
uint64_t *io_addr = reinterpret_cast<uint64_t *>(io_addr_);
std::vector<uint64_t> io_addrs (io_addr, io_addr + input_size + output_size);
SetIoAddrsForDump(io_addrs);
auto status = OpenDump(stream);
if (status != SUCCESS) {
GELOGE(status, "Open dump failed in the aicpucc single op %s", this->kernel_name_.c_str());
return status;
}

return SUCCESS;
}



+ 7
- 1
ge/single_op/task/op_task.h View File

@@ -59,7 +59,10 @@ class OpTask {
const vector<int64_t> &GetWorkspaceSizes() const;
void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes);
const OpDescPtr &GetOpdesc() const {return op_desc_;}
Status OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream);
Status OpenDump(rtStream_t stream);
void SetIoAddrsForDump(const vector<uint64_t> &io_addrs_for_dump) {
io_addrs_for_dump_ = io_addrs_for_dump;
}
virtual Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &input_buffers,
std::vector<GeTensorDesc> &output_desc,
@@ -74,6 +77,7 @@ class OpTask {
DumpProperties dump_properties_;
DumpOp dump_op_;
OpDescPtr op_desc_;
std::vector<uint64_t> io_addrs_for_dump_;
};

class TbeOpTask : public OpTask {
@@ -180,9 +184,11 @@ class AiCpuTask : public AiCpuBaseTask {
friend class AiCpuTaskBuilder;
void *workspace_addr_ = nullptr;
std::string task_info_;
// device addr
void *args_ = nullptr;
size_t arg_size_ = 0;
std::string op_type_;
// device addr
void *io_addr_ = nullptr;

bool dynamic_flag_ = false;


Loading…
Cancel
Save