/** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "graph/load/model_manager/cpu_queue_schedule.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" namespace { const uint32_t kCoreDim = 1; // for rtCpuKernelLaunch const char *const kCpuTaskModelPrepare = "modelPrepare"; const char *const kCpuTaskWaitEndGraph = "modelWaitEndGraph"; const char *const kCpuTaskModelPostpare = "modelPostpare"; } // namespace namespace ge { CpuTaskInfo::CpuTaskInfo(rtStream_t stream) : args_(nullptr), args_size_(0) { stream_ = stream; } CpuTaskInfo::~CpuTaskInfo() { if (args_ == nullptr) { return; } rtError_t status = rtFree(args_); if (status != RT_ERROR_NONE) { GELOGW("Call rt free failed, status: 0x%x", status); } args_ = nullptr; } Status CpuTaskModelPrepare::GenerateCpuAddr(const map &node_addrs, void *&data_list_addr, void *&index_list_addr, uint32_t &num) { vector addrs_list; vector index_list; for (const auto &addrs : node_addrs) { const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "[Check][Param] not set outside_addrs"); std::map> virtual_args_addrs = addrs_mapping_list[0]; for (const auto &virtual_args_addr : virtual_args_addrs) { num += virtual_args_addr.second.size(); for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { index_list.emplace_back(addrs.first); addrs_list.push_back(static_cast(reinterpret_cast(virtual_args_addr.second.at(i)))); } } } GE_CHK_RT_RET(rtMalloc(&data_list_addr, addrs_list.size() * sizeof(uint64_t), RT_MEMORY_HBM)); rtError_t status = rtMemcpy(data_list_addr, addrs_list.size() * sizeof(uint64_t), addrs_list.data(), addrs_list.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", addrs_list.size() * sizeof(uint64_t), status); GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", addrs_list.size() * sizeof(uint64_t), status); return RT_ERROR_TO_GE_STATUS(status); } GE_CHK_RT_RET(rtMalloc(&index_list_addr, index_list.size() * sizeof(uint32_t), RT_MEMORY_HBM)); status = rtMemcpy(index_list_addr, index_list.size() * sizeof(uint32_t), index_list.data(), index_list.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", index_list.size() * sizeof(uint32_t), status); GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", index_list.size() * sizeof(uint32_t), status); return RT_ERROR_TO_GE_STATUS(status); } return SUCCESS; } Status CpuTaskModelPrepare::GenerateOutSizeAddr(const map &outside_addrs, void *&output_size_list_addr) { vector output_sizes; for (const auto &addrs : outside_addrs) { if (addrs.second.GetDataInfo().empty()) { REPORT_INNER_ERROR("E19999", "Index:%u out_data_info is empty, check invalid", addrs.first); GELOGE(INTERNAL_ERROR, "[Check][Param] Index:%u out_data_info is empty, check invalid", addrs.first); return INTERNAL_ERROR; } uint32_t data_size = static_cast(addrs.second.GetDataInfo().at(0).first); output_sizes.push_back(data_size); } GE_CHK_RT_RET(rtMalloc(&output_size_list_addr, output_sizes.size() * sizeof(uint32_t), RT_MEMORY_HBM)); rtError_t status = rtMemcpy(output_size_list_addr, output_sizes.size() * sizeof(uint32_t), output_sizes.data(), output_sizes.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", output_sizes.size() * sizeof(uint32_t), status); GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", output_sizes.size() * sizeof(uint32_t), status); return RT_ERROR_TO_GE_STATUS(status); } return SUCCESS; } Status CpuTaskModelPrepare::Init(const vector &input_queue_ids, const vector &output_queue_ids, const map &inside_addrs, const map &outside_addrs, uintptr_t &out_mbuf) { if ((args_ != nullptr) || (args_size_ > 0)) { REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_); GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_); return FAILED; } GE_CHK_RT_RET(rtMalloc(&mbufptr_list_, output_queue_ids.size() * sizeof(uint64_t), RT_MEMORY_HBM)); GE_CHK_RT_RET(rtMalloc(&queue_id_list_addr_, input_queue_ids.size() * sizeof(uint32_t), RT_MEMORY_HBM)); rtError_t status = rtMemcpy(queue_id_list_addr_, input_queue_ids.size() * sizeof(uint32_t), input_queue_ids.data(), input_queue_ids.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", input_queue_ids.size() * sizeof(uint32_t), status); GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", input_queue_ids.size() * sizeof(uint32_t), status); return RT_ERROR_TO_GE_STATUS(status); } uint32_t input_addr_num = 0; uint32_t output_addr_num = 0; if (GenerateCpuAddr(inside_addrs, input_list_addr_, input_index_list_addr_, input_addr_num) != SUCCESS) { return FAILED; } if (GenerateCpuAddr(outside_addrs, output_list_addr_, output_index_list_addr_, output_addr_num) != SUCCESS) { return FAILED; } if (GenerateOutSizeAddr(outside_addrs, output_size_list_addr_) != SUCCESS) { return FAILED; } AicpuPareInfo aicpu_info; aicpu_info.aicpu_info_size = sizeof(AicpuPareInfo); aicpu_info.input_addr_num = input_addr_num; aicpu_info.input_addr_list = static_cast(reinterpret_cast(input_list_addr_)); aicpu_info.input_index_list = static_cast(reinterpret_cast(input_index_list_addr_)); aicpu_info.output_addr_num = output_addr_num; aicpu_info.output_addr_list = static_cast(reinterpret_cast(output_list_addr_)); aicpu_info.output_index_list = static_cast(reinterpret_cast(output_index_list_addr_)); aicpu_info.output_num = outside_addrs.size(); aicpu_info.output_size_list = static_cast(reinterpret_cast(output_size_list_addr_)); aicpu_info.in_queue_num = input_queue_ids.size(); aicpu_info.in_queueid_list = static_cast(reinterpret_cast(queue_id_list_addr_)); aicpu_info.out_queue_num = output_queue_ids.size(); aicpu_info.mbufptr_list = static_cast(reinterpret_cast(mbufptr_list_)); args_size_ = sizeof(AicpuPareInfo); GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); status = rtMemcpy(args_, args_size_, &aicpu_info, sizeof(AicpuPareInfo), RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status); GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status); return RT_ERROR_TO_GE_STATUS(status); } out_mbuf = reinterpret_cast(mbufptr_list_); return SUCCESS; } Status CpuTaskModelPrepare::Distribute() { if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) { REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr," "check invalid", args_size_); GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_); return FAILED; } rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelPrepare, kCoreDim, args_, args_size_, nullptr, stream_); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status); GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status); return RT_ERROR_TO_GE_STATUS(status); } GELOGI("Cpu kernel launch model prepare task success."); return SUCCESS; } CpuTaskModelPrepare::~CpuTaskModelPrepare() { if (input_list_addr_ != nullptr) { GE_CHK_RT(rtFree(input_list_addr_)); } if (input_index_list_addr_ != nullptr) { GE_CHK_RT(rtFree(input_index_list_addr_)); } if (output_list_addr_ != nullptr) { GE_CHK_RT(rtFree(output_list_addr_)); } if (output_index_list_addr_ != nullptr) { GE_CHK_RT(rtFree(output_index_list_addr_)); } if (output_size_list_addr_ != nullptr) { GE_CHK_RT(rtFree(output_size_list_addr_)); } if (queue_id_list_addr_ != nullptr) { GE_CHK_RT(rtFree(queue_id_list_addr_)); } if (mbufptr_list_ != nullptr) { GE_CHK_RT(rtFree(mbufptr_list_)); } input_list_addr_ = nullptr; input_index_list_addr_ = nullptr; output_list_addr_ = nullptr; output_index_list_addr_ = nullptr; output_size_list_addr_ = nullptr; queue_id_list_addr_ = nullptr; mbufptr_list_ = nullptr; } Status CpuTaskModelPostpare::Init(uint32_t model_id, const vector &output_queue_ids, uintptr_t out_mbuf) { if ((args_ != nullptr) || (args_size_ > 0)) { REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_); GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_); return FAILED; } GE_CHK_RT_RET(rtMalloc(&queue_id_list_addr_, output_queue_ids.size() * sizeof(uint32_t), RT_MEMORY_HBM)); rtError_t status = rtMemcpy(queue_id_list_addr_, output_queue_ids.size() * sizeof(uint32_t), output_queue_ids.data(), output_queue_ids.size() * sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X", output_queue_ids.size() * sizeof(uint32_t), status); GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%lu, ret:0x%X", output_queue_ids.size() * sizeof(uint32_t), status); return RT_ERROR_TO_GE_STATUS(status); } AicpuPareInfo aicpu_info; aicpu_info.model_id = model_id; aicpu_info.out_queue_num = output_queue_ids.size(); aicpu_info.out_queueid_list = static_cast(reinterpret_cast(queue_id_list_addr_)); aicpu_info.mbufptr_list = static_cast(out_mbuf); args_size_ = sizeof(AicpuPareInfo); GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); status = rtMemcpy(args_, args_size_, &aicpu_info, sizeof(AicpuPareInfo), RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status); GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status); return RT_ERROR_TO_GE_STATUS(status); } return SUCCESS; } Status CpuTaskModelPostpare::Distribute() { if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) { REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr," "check invalid", args_size_); GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_); return FAILED; } rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelPostpare, kCoreDim, args_, args_size_, nullptr, stream_); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status); GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status); return RT_ERROR_TO_GE_STATUS(status); } GELOGI("Cpu kernel launch model postpare task success."); return SUCCESS; } CpuTaskModelPostpare::~CpuTaskModelPostpare() { if (queue_id_list_addr_ != nullptr) { GE_CHK_RT(rtFree(queue_id_list_addr_)); } queue_id_list_addr_ = nullptr; } /// /// @ingroup ge /// @brief definiteness queue schedule, active entry stream. /// @param [in] stream: stream to be active. /// @return: 0 for success / others for failCpuTaskModelPostpareed /// Status CpuTaskActiveEntry::Init(rtStream_t stream) { if (stream == nullptr) { REPORT_INNER_ERROR("E19999", "Param stream is nullptr, check invalid"); GELOGE(FAILED, "[Check][Param] Task active stream not valid"); return FAILED; } active_stream_ = stream; return SUCCESS; } Status CpuTaskActiveEntry::Distribute() { if ((active_stream_ == nullptr) || (stream_ == nullptr)) { REPORT_INNER_ERROR("E19999", "Param stream is nullptr or active_stream_ is nullptr, check invalid"); GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_); return FAILED; } rtError_t ret = rtStreamActive(active_stream_, stream_); if (ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtStreamActive failed, ret:0x%X", ret); GELOGE(RT_FAILED, "[Call][RtStreamActive] failed, ret:0x%X", ret); return RT_ERROR_TO_GE_STATUS(ret); } GELOGI("Cpu kernel launch active entry task success."); return SUCCESS; } /// /// @ingroup ge /// @brief definiteness queue schedule, wait for end graph. /// @param [in] model_id: model id for wait end graph. /// @return: 0 for success / others for failed /// Status CpuTaskWaitEndGraph::Init(uint32_t model_id) { if ((args_ != nullptr) || (args_size_ > 0)) { REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0, check invalid", args_size_); GELOGE(FAILED, "[Check][Param] Task already initialized, size:%u", args_size_); return FAILED; } args_size_ = sizeof(model_id); rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X", args_size_, status); GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret:0x%X", args_size_, status); return RT_ERROR_TO_GE_STATUS(status); } GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X", args_size_, status); GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%u, ret:0x%X", args_size_, status); return RT_ERROR_TO_GE_STATUS(status); } return SUCCESS; } Status CpuTaskWaitEndGraph::Distribute() { if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) { REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr," "check invalid", args_size_); GELOGE(FAILED, "[Check][Param] Task not initialized, distribute failed, size:%u", args_size_); return FAILED; } rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskWaitEndGraph, kCoreDim, args_, args_size_, nullptr, stream_); if (status != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X", status); GELOGE(RT_FAILED, "[Call][RtCpuKernelLaunch] failed, ret:0x%X", status); return RT_ERROR_TO_GE_STATUS(status); } GELOGI("Cpu kernel launch wait end task success."); return SUCCESS; } } // namespace ge