|
- /**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include "graph/load/model_manager/cpu_queue_schedule.h"
- #include "framework/common/debug/ge_log.h"
- #include "framework/common/debug/log.h"
-
- namespace {
- const uint32_t kCoreDim = 1; // for rtCpuKernelLaunch
- const char *const kCpuTaskModelPrepare = "modelPrepare";
- const char *const kCpuTaskWaitEndGraph = "modelWaitEndGraph";
- const char *const kCpuTaskModelPostpare = "modelPostpare";
- } // namespace
-
- namespace ge {
- CpuTaskInfo::CpuTaskInfo(rtStream_t stream) : args_(nullptr), args_size_(0) { stream_ = stream; }
-
- CpuTaskInfo::~CpuTaskInfo() {
- if (args_ == nullptr) {
- return;
- }
-
- rtError_t status = rtFree(args_);
- if (status != RT_ERROR_NONE) {
- GELOGW("Call rt free failed, status: 0x%x", status);
- }
- args_ = nullptr;
- }
-
- Status CpuTaskModelPrepare::GenerateCpuAddr(const map<uint32_t, ZeroCopyOffset> &node_addrs, void *&data_list_addr,
- void *&index_list_addr, uint32_t &num) {
- vector<uint64_t> addrs_list;
- vector<uint32_t> index_list;
- for (const auto &addrs : node_addrs) {
- const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
- GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "[Check][Param] not set outside_addrs");
- std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
- for (const auto &virtual_args_addr : virtual_args_addrs) {
- num += virtual_args_addr.second.size();
- for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) {
- index_list.emplace_back(addrs.first);
- addrs_list.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i))));
- }
- }
- }
-
- GE_CHK_RT_RET(rtMalloc(&data_list_addr, addrs_list.size() * sizeof(uint64_t), RT_MEMORY_HBM));
- rtError_t status = rtMemcpy(data_list_addr, addrs_list.size() * sizeof(uint64_t), addrs_list.data(),
- addrs_list.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", addrs_list.size() * sizeof(uint64_t),
- status);
- GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", addrs_list.size() * sizeof(uint64_t), status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
-
- GE_CHK_RT_RET(rtMalloc(&index_list_addr, index_list.size() * sizeof(uint32_t), RT_MEMORY_HBM));
- status = rtMemcpy(index_list_addr, index_list.size() * sizeof(uint32_t), index_list.data(),
- index_list.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", index_list.size() * sizeof(uint32_t),
- status);
- GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", index_list.size() * sizeof(uint32_t), status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
-
- return SUCCESS;
- }
-
- Status CpuTaskModelPrepare::GenerateOutSizeAddr(const map<uint32_t, ZeroCopyOffset> &outside_addrs,
- void *&output_size_list_addr) {
- vector<uint32_t> output_sizes;
- for (const auto &addrs : outside_addrs) {
- if (addrs.second.GetDataInfo().empty()) {
- REPORT_INNER_ERROR("E19999", "Index:%u out_data_info is empty, check invalid", addrs.first);
- GELOGE(INTERNAL_ERROR, "[Check][Param] Index:%u out_data_info is empty, check invalid", addrs.first);
- return INTERNAL_ERROR;
- }
- uint32_t data_size = static_cast<uint32_t>(addrs.second.GetDataInfo().at(0).first);
- output_sizes.push_back(data_size);
- }
-
- GE_CHK_RT_RET(rtMalloc(&output_size_list_addr, output_sizes.size() * sizeof(uint32_t), RT_MEMORY_HBM));
- rtError_t status = rtMemcpy(output_size_list_addr, output_sizes.size() * sizeof(uint32_t), output_sizes.data(),
- output_sizes.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", output_sizes.size() * sizeof(uint32_t),
- status);
- GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", output_sizes.size() * sizeof(uint32_t), status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
-
- return SUCCESS;
- }
-
- Status CpuTaskModelPrepare::Init(const vector<uint32_t> &input_queue_ids, const vector<uint32_t> &output_queue_ids,
- const map<uint32_t, ZeroCopyOffset> &inside_addrs,
- const map<uint32_t, ZeroCopyOffset> &outside_addrs, uintptr_t &out_mbuf) {
- if ((args_ != nullptr) || (args_size_ > 0)) {
- REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
- GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
- return FAILED;
- }
-
- GE_CHK_RT_RET(rtMalloc(&mbufptr_list_, output_queue_ids.size() * sizeof(uint64_t), RT_MEMORY_HBM));
- GE_CHK_RT_RET(rtMalloc(&queue_id_list_addr_, input_queue_ids.size() * sizeof(uint32_t), RT_MEMORY_HBM));
- rtError_t status = rtMemcpy(queue_id_list_addr_, input_queue_ids.size() * sizeof(uint32_t), input_queue_ids.data(),
- input_queue_ids.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", input_queue_ids.size() * sizeof(uint32_t),
- status);
- GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", input_queue_ids.size() * sizeof(uint32_t), status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
-
- uint32_t input_addr_num = 0;
- uint32_t output_addr_num = 0;
- if (GenerateCpuAddr(inside_addrs, input_list_addr_, input_index_list_addr_, input_addr_num) != SUCCESS) {
- return FAILED;
- }
- if (GenerateCpuAddr(outside_addrs, output_list_addr_, output_index_list_addr_, output_addr_num) != SUCCESS) {
- return FAILED;
- }
- if (GenerateOutSizeAddr(outside_addrs, output_size_list_addr_) != SUCCESS) {
- return FAILED;
- }
-
- AicpuPareInfo aicpu_info;
- aicpu_info.aicpu_info_size = sizeof(AicpuPareInfo);
- aicpu_info.input_addr_num = input_addr_num;
- aicpu_info.input_addr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(input_list_addr_));
- aicpu_info.input_index_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(input_index_list_addr_));
- aicpu_info.output_addr_num = output_addr_num;
- aicpu_info.output_addr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(output_list_addr_));
- aicpu_info.output_index_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(output_index_list_addr_));
- aicpu_info.output_num = outside_addrs.size();
- aicpu_info.output_size_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(output_size_list_addr_));
- aicpu_info.in_queue_num = input_queue_ids.size();
- aicpu_info.in_queueid_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(queue_id_list_addr_));
- aicpu_info.out_queue_num = output_queue_ids.size();
- aicpu_info.mbufptr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(mbufptr_list_));
-
- args_size_ = sizeof(AicpuPareInfo);
- GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM));
- status = rtMemcpy(args_, args_size_, &aicpu_info, sizeof(AicpuPareInfo), RT_MEMCPY_HOST_TO_DEVICE);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
- GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
- out_mbuf = reinterpret_cast<uintptr_t>(mbufptr_list_);
-
- return SUCCESS;
- }
-
- Status CpuTaskModelPrepare::Distribute() {
- if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
- REPORT_INNER_ERROR("E19999",
- "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
- "check invalid",
- args_size_);
- GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
- return FAILED;
- }
-
- rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelPrepare, kCoreDim, args_, args_size_, nullptr, stream_);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
- GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
-
- GELOGI("Cpu kernel launch model prepare task success.");
- return SUCCESS;
- }
-
- CpuTaskModelPrepare::~CpuTaskModelPrepare() {
- if (input_list_addr_ != nullptr) {
- GE_CHK_RT(rtFree(input_list_addr_));
- }
- if (input_index_list_addr_ != nullptr) {
- GE_CHK_RT(rtFree(input_index_list_addr_));
- }
- if (output_list_addr_ != nullptr) {
- GE_CHK_RT(rtFree(output_list_addr_));
- }
- if (output_index_list_addr_ != nullptr) {
- GE_CHK_RT(rtFree(output_index_list_addr_));
- }
- if (output_size_list_addr_ != nullptr) {
- GE_CHK_RT(rtFree(output_size_list_addr_));
- }
- if (queue_id_list_addr_ != nullptr) {
- GE_CHK_RT(rtFree(queue_id_list_addr_));
- }
- if (mbufptr_list_ != nullptr) {
- GE_CHK_RT(rtFree(mbufptr_list_));
- }
-
- input_list_addr_ = nullptr;
- input_index_list_addr_ = nullptr;
- output_list_addr_ = nullptr;
- output_index_list_addr_ = nullptr;
- output_size_list_addr_ = nullptr;
- queue_id_list_addr_ = nullptr;
- mbufptr_list_ = nullptr;
- }
-
- Status CpuTaskModelPostpare::Init(uint32_t model_id, const vector<uint32_t> &output_queue_ids, uintptr_t out_mbuf) {
- if ((args_ != nullptr) || (args_size_ > 0)) {
- REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
- GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
- return FAILED;
- }
-
- GE_CHK_RT_RET(rtMalloc(&queue_id_list_addr_, output_queue_ids.size() * sizeof(uint32_t), RT_MEMORY_HBM));
- rtError_t status = rtMemcpy(queue_id_list_addr_, output_queue_ids.size() * sizeof(uint32_t), output_queue_ids.data(),
- output_queue_ids.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", output_queue_ids.size() * sizeof(uint32_t),
- status);
- GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", output_queue_ids.size() * sizeof(uint32_t),
- status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
-
- AicpuPareInfo aicpu_info;
- aicpu_info.model_id = model_id;
- aicpu_info.out_queue_num = output_queue_ids.size();
- aicpu_info.out_queueid_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(queue_id_list_addr_));
- aicpu_info.mbufptr_list = static_cast<uint64_t>(out_mbuf);
-
- args_size_ = sizeof(AicpuPareInfo);
- GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM));
- status = rtMemcpy(args_, args_size_, &aicpu_info, sizeof(AicpuPareInfo), RT_MEMCPY_HOST_TO_DEVICE);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
- GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
-
- return SUCCESS;
- }
-
- Status CpuTaskModelPostpare::Distribute() {
- if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
- REPORT_INNER_ERROR("E19999",
- "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
- "check invalid",
- args_size_);
- GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
- return FAILED;
- }
-
- rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelPostpare, kCoreDim, args_, args_size_, nullptr, stream_);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
- GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
-
- GELOGI("Cpu kernel launch model postpare task success.");
- return SUCCESS;
- }
-
- CpuTaskModelPostpare::~CpuTaskModelPostpare() {
- if (queue_id_list_addr_ != nullptr) {
- GE_CHK_RT(rtFree(queue_id_list_addr_));
- }
-
- queue_id_list_addr_ = nullptr;
- }
-
- ///
- /// @ingroup ge
- /// @brief definiteness queue schedule, active entry stream.
- /// @param [in] stream: stream to be active.
- /// @return: 0 for success / others for failCpuTaskModelPostpareed
- ///
- Status CpuTaskActiveEntry::Init(rtStream_t stream) {
- if (stream == nullptr) {
- REPORT_INNER_ERROR("E19999", "Param stream is nullptr, check invalid");
- GELOGE(FAILED, "[Check][Param] Task active stream not valid");
- return FAILED;
- }
-
- active_stream_ = stream;
- return SUCCESS;
- }
-
- Status CpuTaskActiveEntry::Distribute() {
- if ((active_stream_ == nullptr) || (stream_ == nullptr)) {
- REPORT_INNER_ERROR("E19999", "Param stream is nullptr or active_stream_ is nullptr, check invalid");
- GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
- return FAILED;
- }
-
- rtError_t ret = rtStreamActive(active_stream_, stream_);
- if (ret != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtStreamActive failed, ret:0x%X", ret);
- GELOGE(RT_FAILED, "[Call][RtStreamActive] failed, ret:0x%X", ret);
- return RT_ERROR_TO_GE_STATUS(ret);
- }
-
- GELOGI("Cpu kernel launch active entry task success.");
- return SUCCESS;
- }
-
- ///
- /// @ingroup ge
- /// @brief definiteness queue schedule, wait for end graph.
- /// @param [in] model_id: model id for wait end graph.
- /// @return: 0 for success / others for failed
- ///
- Status CpuTaskWaitEndGraph::Init(uint32_t model_id) {
- if ((args_ != nullptr) || (args_size_ > 0)) {
- REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_);
- GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_);
- return FAILED;
- }
-
- args_size_ = sizeof(model_id);
- rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status);
- GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
- GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
-
- status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status);
- GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
-
- return SUCCESS;
- }
-
- Status CpuTaskWaitEndGraph::Distribute() {
- if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
- REPORT_INNER_ERROR("E19999",
- "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
- "check invalid",
- args_size_);
- GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_);
- return FAILED;
- }
-
- rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskWaitEndGraph, kCoreDim, args_, args_size_, nullptr, stream_);
- if (status != RT_ERROR_NONE) {
- REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status);
- GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status);
- return RT_ERROR_TO_GE_STATUS(status);
- }
-
- GELOGI("Cpu kernel launch wait end task success.");
- return SUCCESS;
- }
- } // namespace ge
|