| @@ -489,7 +489,7 @@ void DavinciModel::InitRuntimeParams() { | |||||
| runtime_param_.memory_infos[kSessionScopeMemory | RT_MEMORY_HBM] = std::move(session_scope_mem_info); | runtime_param_.memory_infos[kSessionScopeMemory | RT_MEMORY_HBM] = std::move(session_scope_mem_info); | ||||
| ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_ZERO_COPY_MEMORY_SIZE, value); | ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_ZERO_COPY_MEMORY_SIZE, value); | ||||
| runtime_param_.zero_copy_size = ret ? (uint64_t)value : 0; | |||||
| runtime_param_.zero_copy_size = ret ? value : 0; | |||||
| GELOGI("InitRuntimeParams(), %s.", runtime_param_.ToString().c_str()); | GELOGI("InitRuntimeParams(), %s.", runtime_param_.ToString().c_str()); | ||||
| } | } | ||||
| @@ -4507,4 +4507,22 @@ void DavinciModel::UpdateOpIOAddrs(uint32_t task_id, uint32_t stream_id, const s | |||||
| op_desc_info->output_addrs = output_addrs; | op_desc_info->output_addrs = output_addrs; | ||||
| GELOGD("[Update][OpIOAddrs] Op [%s] update input output addr success.", op_desc_info->op_name.c_str()); | GELOGD("[Update][OpIOAddrs] Op [%s] update input output addr success.", op_desc_info->op_name.c_str()); | ||||
| } | } | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get total useful size, in known subgraph, no need to allocate zero copy memory during initialization. | |||||
| /// @param [in] total_useful_size: total mem size - zero copy size. | |||||
| /// @return Status | |||||
| /// | |||||
| Status DavinciModel::GetTotalMemSizeExcludeZeroCopy(int64_t &total_useful_size) { | |||||
| if (runtime_param_.mem_size < runtime_param_.zero_copy_size) { | |||||
| REPORT_CALL_ERROR("E19999", "total mem size[%lu] is less than zero copy size[%ld] ", runtime_param_.mem_size, | |||||
| runtime_param_.zero_copy_size); | |||||
| GELOGE(FAILED, "[Check][TotalMemSizeExcludeZeroCopy] failed, total mem size[%lu] is less than zero copy size[%ld]", | |||||
| runtime_param_.mem_size, runtime_param_.zero_copy_size); | |||||
| return FAILED; | |||||
| } | |||||
| total_useful_size = runtime_param_.mem_size - runtime_param_.zero_copy_size; | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -248,12 +248,13 @@ class DavinciModel { | |||||
| // get total mem size | // get total mem size | ||||
| size_t TotalMemSize() const { return runtime_param_.mem_size; } | size_t TotalMemSize() const { return runtime_param_.mem_size; } | ||||
| // get total useful size: total mem size - zero copy size; | |||||
| // In known subgraph, no need to allocate zero copy memory during initialization | |||||
| size_t TotalMemSizeExcludeZeroCopy() const { | |||||
| return runtime_param_.mem_size > runtime_param_.zero_copy_size | |||||
| ? runtime_param_.mem_size - runtime_param_.zero_copy_size : 0; | |||||
| } | |||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get total useful size, in known subgraph, no need to allocate zero copy memory during initialization. | |||||
| /// @param [in] total_useful_size: total mem size - zero copy size. | |||||
| /// @return Status | |||||
| /// | |||||
| status GetTotalMemSizeExcludeZeroCopy(int64_t &total_useful_size); | |||||
| // model name | // model name | ||||
| string Name() const { return name_; } | string Name() const { return name_; } | ||||
| @@ -65,7 +65,7 @@ struct RuntimeParam { | |||||
| uint64_t var_size = 0; | uint64_t var_size = 0; | ||||
| uint64_t logic_var_base = 0; | uint64_t logic_var_base = 0; | ||||
| uint8_t *var_base = nullptr; | uint8_t *var_base = nullptr; | ||||
| uint64_t zero_copy_size = 0; | |||||
| int64_t zero_copy_size = 0; | |||||
| std::map<uint64_t, MemInfo> memory_infos; | std::map<uint64_t, MemInfo> memory_infos; | ||||
| uint32_t batch_num = 0; | uint32_t batch_num = 0; | ||||
| uint32_t stream_num = 0; | uint32_t stream_num = 0; | ||||
| @@ -101,18 +101,20 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||||
| GE_CHK_STATUS_RET(context.AllocateOutputs(), "[Allocate][Outputs] failed for %s.", context.GetNodeName()); | GE_CHK_STATUS_RET(context.AllocateOutputs(), "[Allocate][Outputs] failed for %s.", context.GetNodeName()); | ||||
| // allocate mem base | // allocate mem base | ||||
| void *buffer = nullptr; | void *buffer = nullptr; | ||||
| if (davinci_model_->TotalMemSizeExcludeZeroCopy != 0) { | |||||
| int64_t total_useful_size = 0; | |||||
| GE_CHK_STATUS_RET(davinci_model_.GetTotalMemSizeExcludeZeroCopy(total_useful_size), | |||||
| "[Get][TotalMemSizeExcludeZeroCopy] failed."); | |||||
| if (total_useful_size != 0) { | |||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), | ||||
| "[KnownNodeTask_AllocateWorkspace] Start"); | "[KnownNodeTask_AllocateWorkspace] Start"); | ||||
| GE_CHK_STATUS_RET(context.AllocateWorkspace(davinci_model_->TotalMemSizeExcludeZeroCopy(), &buffer, | |||||
| davinci_model_->GetRuntimeParam().mem_base), | |||||
| GE_CHK_STATUS_RET(context.AllocateWorkspace(total_useful_size, &buffer, davinci_model_->GetRuntimeParam().mem_base), | |||||
| "[Allocate][Workspace] failed for %s.", context.GetNodeName()); | "[Allocate][Workspace] failed for %s.", context.GetNodeName()); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), | ||||
| "[KnownNodeTask_AllocateWorkspace] End, size %zu", davinci_model_->TotalMemSize()); | |||||
| "[KnownNodeTask_AllocateWorkspace] End, size %ld", total_useful_size); | |||||
| // update mem base | // update mem base | ||||
| davinci_model_->UpdateMemBase(static_cast<uint8_t *>(buffer)); | davinci_model_->UpdateMemBase(static_cast<uint8_t *>(buffer)); | ||||
| GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", | |||||
| davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); | |||||
| GELOGI("KnownNodeTask::Init mem base is %p, size %ld.", | |||||
| davinci_model_->GetRuntimeParam().mem_base, total_useful_size); | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), | GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), | ||||
| davinci_model_->Id(), | davinci_model_->Id(), | ||||