| @@ -40,6 +40,12 @@ class TensorBuffer { | |||
| TensorBuffer &operator = (const TensorBuffer &) = delete; | |||
| ~TensorBuffer(); | |||
| void* Release() { | |||
| auto ret = buffer_; | |||
| buffer_ = nullptr; | |||
| return ret; | |||
| } | |||
| void *GetData() { | |||
| return buffer_; | |||
| } | |||
| @@ -48,6 +54,10 @@ class TensorBuffer { | |||
| return size_; | |||
| } | |||
| MemStorageType GetMemType() const { | |||
| return mem_type_; | |||
| } | |||
| private: | |||
| TensorBuffer(NpuMemoryAllocator *allocator, void *buffer, size_t size, MemStorageType mem_type = HBM); | |||
| @@ -69,6 +79,10 @@ class TensorValue { | |||
| void Destroy(); | |||
| void *Release() { | |||
| return buffer_->Release(); | |||
| } | |||
| bool IsEmpty() { | |||
| return ref_buffer_ == nullptr && buffer_ == nullptr; | |||
| } | |||
| @@ -80,6 +94,10 @@ class TensorValue { | |||
| void SetName(const std::string &name) { | |||
| name_ = name; | |||
| } | |||
| MemStorageType GetMemType() const { | |||
| return buffer_->GetMemType(); | |||
| } | |||
| void *MutableData(); | |||
| @@ -19,6 +19,13 @@ | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "graph/ge_context.h" | |||
| #include "graph/types.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "graph/manager/graph_caching_allocator.h" | |||
| #include "graph/manager/graph_mem_allocator.h" | |||
| #include "graph/manager/rdma_pool_allocator.h" | |||
| #include "graph/manager/host_mem_allocator.h" | |||
| #include "graph/manager/graph_mem_manager.h" | |||
| namespace ge { | |||
| namespace hybrid { | |||
| @@ -440,22 +447,31 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a | |||
| GeShape ge_shape(tensor_desc->GetShape().GetDims()); | |||
| GeTensorDesc ge_tensor_desc; | |||
| ge_tensor_desc.SetShape(ge_shape); | |||
| GeTensor ge_tensor(ge_tensor_desc); | |||
| if (output_size > 0) { | |||
| auto aligned_ptr = MakeShared<AlignedPtr>(output_size, kAlignment); | |||
| GE_CHECK_NOTNULL(aligned_ptr); | |||
| auto data_buf = aligned_ptr->MutableGet(); | |||
| GE_CHECK_NOTNULL(data_buf); | |||
| GE_CHK_RT_RET(rtMemcpy(data_buf, output_size, output_tensor.GetData(), output_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||
| ge_tensor.SetData(aligned_ptr, output_size); | |||
| output_data->blobs.emplace_back(data_buf, static_cast<uint32_t>(output_size), false); | |||
| if (execute_mode != kLazyRecompile) { | |||
| auto aligned_ptr = MakeShared<AlignedPtr>(output_size, kAlignment); | |||
| GE_CHECK_NOTNULL(aligned_ptr); | |||
| auto data_buf = aligned_ptr->MutableGet(); | |||
| GE_CHECK_NOTNULL(data_buf); | |||
| GE_CHK_RT_RET(rtMemcpy(data_buf, output_size, output_tensor.GetData(), output_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||
| GeTensor ge_tensor(ge_tensor_desc); | |||
| ge_tensor.SetData(aligned_ptr, output_size); | |||
| output_data->blobs.emplace_back(data_buf, static_cast<uint32_t>(output_size), false); | |||
| auto tensor = TensorAdapter::AsTensor(ge_tensor); | |||
| outputs.emplace_back(std::move(tensor)); | |||
| } else { | |||
| BuildDeviceTensor(output_tensor, ge_tensor_desc, output_size, outputs); | |||
| output_data->blobs.emplace_back(output_tensor.Release(), static_cast<uint32_t>(output_size), false, | |||
| static_cast<uint32_t>(kPlacementDevice)); | |||
| } | |||
| } else { | |||
| GELOGW("Output[%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str()); | |||
| GELOGW("Output [%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str()); | |||
| GeTensor ge_tensor(ge_tensor_desc); | |||
| ge_tensor.SetData(nullptr, 0U); | |||
| output_data->blobs.emplace_back(nullptr, 0U, false); | |||
| auto tensor = TensorAdapter::AsTensor(ge_tensor); | |||
| outputs.emplace_back(std::move(tensor)); | |||
| } | |||
| auto tensor = TensorAdapter::AsTensor(ge_tensor); | |||
| outputs.emplace_back(std::move(tensor)); | |||
| GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld", i, | |||
| TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(), | |||
| tensor_desc->GetShape().ToString().c_str(), output_size); | |||
| @@ -464,6 +480,29 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a | |||
| return SUCCESS; | |||
| } | |||
| void HybridModelAsyncExecutor::BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, | |||
| int64_t output_size, std::vector<ge::Tensor> &outputs) { | |||
| GELOGD("Start to build device tensor"); | |||
| auto mem_type = output_tensor.GetMemType(); | |||
| GELOGD("Mem type is %d", static_cast<uint32_t>(mem_type)); | |||
| auto deleter = [=](uint8_t *device_data) { | |||
| if (device_data != nullptr) { | |||
| if (mem_type == RDMA_HBM) { | |||
| MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(device_data, device_id_); | |||
| } else if (mem_type == HOST_DDR) { | |||
| MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(device_data); | |||
| } else { | |||
| MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(device_data, device_id_); | |||
| } | |||
| } | |||
| }; | |||
| ge_tensor_desc.SetPlacement(kPlacementDevice); | |||
| GeTensor ge_tensor(ge_tensor_desc); | |||
| auto tensor = TensorAdapter::AsTensor(ge_tensor); | |||
| tensor.SetData(reinterpret_cast<uint8_t *>(output_tensor.Release()), static_cast<size_t>(output_size), deleter); | |||
| outputs.emplace_back(std::move(tensor)); | |||
| } | |||
| Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs, | |||
| const std::vector<GeTensorDesc> &input_desc, | |||
| std::vector<DataBuffer> &outputs, | |||
| @@ -75,9 +75,9 @@ class HybridModelAsyncExecutor { | |||
| HybridModelExecutor::ExecuteArgs &args, | |||
| OutputData *output_data); | |||
| Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, | |||
| OutputData *output_data, | |||
| std::vector<ge::Tensor> &outputs); | |||
| Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, OutputData *output_data, std::vector<ge::Tensor> &outputs); | |||
| void BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, int64_t output_size, | |||
| std::vector<ge::Tensor> &outputs); | |||
| Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector<ge::Tensor> &outputs); | |||
| @@ -61,6 +61,9 @@ const std::string kTaskTypeAicore = "AI_CORE"; | |||
| const std::string kTaskTypeAicpu = "AI_CPU"; | |||
| const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; | |||
| // dynamic execute mode | |||
| const char *const kLazyRecompile = "lazy_recompile"; | |||
| // Data cache, including data address and length | |||
| struct DataBuffer { | |||
| public: | |||
| @@ -1 +1 @@ | |||
| Subproject commit 8dd3448e2f0150c51266bc120bdd5d171a003e6b | |||
| Subproject commit 1aa10c59b4e11564c2db76c2ba0039474d38df26 | |||
| @@ -828,6 +828,7 @@ set(HYBRID_TEST_FILES | |||
| "hybrid/executor/worker/execution_engine_unittest.cc" | |||
| "hybrid/model/hybrid_model_builder_unittest.cc" | |||
| "hybrid/node_executor/rts/rts_node_task_unittest.cc" | |||
| "hybrid/executor/hybrid_model_async_executor_unittest.cc" | |||
| ) | |||
| set(OTHERS_TEST_FILES | |||
| @@ -0,0 +1,89 @@ | |||
| /** | |||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <gtest/gtest.h> | |||
| #include <gmock/gmock.h> | |||
| #include <vector> | |||
| #define private public | |||
| #define protected public | |||
| #include "hybrid/executor/hybrid_model_async_executor.h" | |||
| #include "hybrid/executor/hybrid_model_executor.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "graph/utils/graph_utils.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| using namespace std; | |||
| using namespace testing; | |||
| namespace ge { | |||
| using namespace hybrid; | |||
| class UtestHybridModelAsyncExecutor : public testing::Test { | |||
| protected: | |||
| void SetUp() {} | |||
| void TearDown() { } | |||
| }; | |||
| TEST_F(UtestHybridModelAsyncExecutor, CopyOutputs_success) { | |||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||
| GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph); | |||
| ge_root_model->SetModelName("test_name"); | |||
| GeModelPtr ge_sub_model = make_shared<GeModel>(); | |||
| HybridModel hybrid_model(ge_root_model); | |||
| HybridModelAsyncExecutor executor(&hybrid_model); | |||
| TensorValue input_tensor; | |||
| HybridModelExecutor::ExecuteArgs args; | |||
| args.inputs.emplace_back(input_tensor); | |||
| auto desc = MakeShared<GeTensorDesc>(); | |||
| GeShape geshape({2,2,2,2}); | |||
| desc->SetShape(geshape); | |||
| auto allocator = NpuMemoryAllocator::GetAllocator(); | |||
| auto tensor_buffer = TensorBuffer::Create(allocator, 100); | |||
| auto output_tensor = TensorValue(shared_ptr<TensorBuffer>(tensor_buffer.release())); | |||
| args.outputs.emplace_back(output_tensor); | |||
| args.output_desc.emplace_back(desc); | |||
| OutputData output_data; | |||
| std::vector<ge::Tensor> outputs; | |||
| auto ret = executor.CopyOutputs(args, &output_data, outputs); | |||
| ASSERT_EQ(ret,SUCCESS); | |||
| } | |||
| TEST_F(UtestHybridModelAsyncExecutor, BuildDeviceTensor) { | |||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||
| GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph); | |||
| ge_root_model->SetModelName("test_name"); | |||
| GeModelPtr ge_sub_model = make_shared<GeModel>(); | |||
| HybridModel hybrid_model(ge_root_model); | |||
| HybridModelAsyncExecutor executor(&hybrid_model); | |||
| auto allocator = NpuMemoryAllocator::GetAllocator(); | |||
| auto tensor_buffer = TensorBuffer::Create(allocator, 100); | |||
| auto tensor = TensorValue(shared_ptr<TensorBuffer>(tensor_buffer.release())); | |||
| GeTensorDesc ge_tensor_desc; | |||
| int64_t output_size = 100; | |||
| std::vector<ge::Tensor> outputs; | |||
| executor.BuildDeviceTensor(tensor, ge_tensor_desc, output_size, outputs); | |||
| auto size = tensor.GetSize(); | |||
| ASSERT_EQ(size, 100); | |||
| } | |||
| } // namespace ge | |||