diff --git a/ge/hybrid/common/tensor_value.h b/ge/hybrid/common/tensor_value.h index c20074fd..c041263b 100644 --- a/ge/hybrid/common/tensor_value.h +++ b/ge/hybrid/common/tensor_value.h @@ -95,7 +95,8 @@ class TensorValue { name_ = name; } - MemStorageType GetMemType() const { + Status GetMemType(MemStorageType &mem_type) const { + GE_CHECK_NOTNULL(buffer_); return buffer_->GetMemType(); } diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index a6f31522..e0dd768d 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -458,7 +458,8 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a auto tensor = TensorAdapter::AsTensor(ge_tensor); outputs.emplace_back(std::move(tensor)); } else { - BuildDeviceTensor(output_tensor, ge_tensor_desc, output_size, outputs); + GE_CHK_STATUS_RET(BuildDeviceTensor(output_tensor, ge_tensor_desc, output_size, outputs), + "Build device tensor failed"); output_data->blobs.emplace_back(output_tensor.Release(), static_cast(output_size), false, static_cast(kPlacementDevice)); } @@ -478,13 +479,15 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a return SUCCESS; } -void HybridModelAsyncExecutor::BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, - int64_t output_size, std::vector &outputs) { +Status HybridModelAsyncExecutor::BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, + int64_t output_size, std::vector &outputs) { GELOGD("Start to build device tensor"); - auto mem_type = output_tensor.GetMemType(); + MemStorageType mem_type = HBM; + GE_CHK_STATUS_RET(output_tensor.GetMemType(mem_type), "[Build][DeviceTensor] Get mem type failed"); GELOGD("Mem type is %d", static_cast(mem_type)); auto deleter = [=](uint8_t *device_data) { if (device_data != nullptr) { + GELOGD("Free device addr is %p", device_data); if (mem_type == RDMA_HBM) { MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(device_data, device_id_); } else if (mem_type == HOST_DDR) { @@ -499,6 +502,7 @@ void HybridModelAsyncExecutor::BuildDeviceTensor(TensorValue &output_tensor, GeT auto tensor = TensorAdapter::AsTensor(ge_tensor); tensor.SetData(reinterpret_cast(output_tensor.Release()), static_cast(output_size), deleter); outputs.emplace_back(std::move(tensor)); + return SUCCESS; } Status HybridModelAsyncExecutor::Execute(const std::vector &inputs, diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 5ae1a222..f94f6aa5 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -76,8 +76,8 @@ class HybridModelAsyncExecutor { OutputData *output_data); Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, OutputData *output_data, std::vector &outputs); - void BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, int64_t output_size, - std::vector &outputs); + Status BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, int64_t output_size, + std::vector &outputs); Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector &outputs); diff --git a/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc index 98bb78f2..f772af23 100644 --- a/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc +++ b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc @@ -82,7 +82,7 @@ TEST_F(UtestHybridModelAsyncExecutor, BuildDeviceTensor) { GeTensorDesc ge_tensor_desc; int64_t output_size = 100; std::vector outputs; - executor.BuildDeviceTensor(tensor, ge_tensor_desc, output_size, outputs); + auto ret = executor.BuildDeviceTensor(tensor, ge_tensor_desc, output_size, outputs); auto size = tensor.GetSize(); ASSERT_EQ(size, 100); }