Browse Source

!210 fix bug of zero copy

Merge pull request !210 from 梁昊/development
tags/v1.1.0
mindspore-ci-bot Gitee 4 years ago
parent
commit
191215d278
2 changed files with 8 additions and 12 deletions
  1. +4
    -8
      ge/graph/build/model_builder.cc
  2. +4
    -4
      ge/graph/load/new_model_manager/task_info/kernel_task_info.cc

+ 4
- 8
ge/graph/build/model_builder.cc View File

@@ -706,14 +706,10 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) {
GELOGE(RT_FAILED, "rtGetRtCapability failed."); GELOGE(RT_FAILED, "rtGetRtCapability failed.");
return RT_FAILED; return RT_FAILED;
} else { } else {
if (value == RT_CAPABILITY_SUPPORT) {
GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode);
MemcpyAddrAsyncPass memcpy_addr;
GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph_), "Add memcpy_addr_async node failed.");
GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run.");
} else {
GELOGW("rtGetRtCapability not support memcpy_addr_async.");
}
GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode);
MemcpyAddrAsyncPass memcpy_addr;
GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph_), "Add memcpy_addr_async node failed.");
GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run.");
} }


GE_TIMESTAMP_START(AssignMemory); GE_TIMESTAMP_START(AssignMemory);


+ 4
- 4
ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -559,10 +559,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
GE_CHECK_NOTNULL(davinci_model_); GE_CHECK_NOTNULL(davinci_model_);
// get tvm op desc // get tvm op desc
OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex); OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex);
if (op_desc == nullptr) {
GELOGE(INTERNAL_ERROR, "InitTVMTaskInfo error, index:%u out of range!", ctx_.opIndex);
return INTERNAL_ERROR;
}
GE_CHECK_NOTNULL(op_desc);
if (davinci_model_->IsKnownNode()) { if (davinci_model_->IsKnownNode()) {
return SUCCESS; return SUCCESS;
} }
@@ -650,6 +647,9 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
vector<void *> virtual_io_addrs; // use virtual address for zero copy key. vector<void *> virtual_io_addrs; // use virtual address for zero copy key.
virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
if (op_desc->GetType() == ATOMICADDRCLEAN) {
virtual_io_addrs.insert(virtual_io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
}
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset); davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset);


GELOGD("Do InitTVMTask end"); GELOGD("Do InitTVMTask end");


Loading…
Cancel
Save