| @@ -1158,7 +1158,6 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| REPORT_CALL_ERROR("E19999", "build graph failed, graph id:%u, ret:%d", graph_id, ret); | REPORT_CALL_ERROR("E19999", "build graph failed, graph id:%u, ret:%d", graph_id, ret); | ||||
| GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "[Build][Graph] fail, graph id: %u", graph_id); | GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "[Build][Graph] fail, graph id: %u", graph_id); | ||||
| ret = GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | |||||
| } | } | ||||
| RtContextUtil::GetInstance().DestroyRtContexts(session_id); | RtContextUtil::GetInstance().DestroyRtContexts(session_id); | ||||
| @@ -275,7 +275,7 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<uint64_t, siz | |||||
| "E19022", std::vector<std::string>({"size", "item", "maxsize"}), | "E19022", std::vector<std::string>({"size", "item", "maxsize"}), | ||||
| std::vector<std::string>({std::to_string(total_mem_offset), "featuremap", | std::vector<std::string>({std::to_string(total_mem_offset), "featuremap", | ||||
| std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())})); | std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())})); | ||||
| return ge::FAILED; | |||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -29,9 +29,10 @@ Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &m | |||||
| } | } | ||||
| // Reassign memory for special nodes | // Reassign memory for special nodes | ||||
| if (graph_mem_assigner.ReAssignMemory(is_loop_graph, mem_offset) != ge::SUCCESS) { | |||||
| Status ret = graph_mem_assigner.ReAssignMemory(is_loop_graph, mem_offset); | |||||
| if (ret != ge::SUCCESS) { | |||||
| GELOGE(ge::FAILED, "[ReAssign][Memory] failed, graph:%s", compute_graph_->GetName().c_str()); | GELOGE(ge::FAILED, "[ReAssign][Memory] failed, graph:%s", compute_graph_->GetName().c_str()); | ||||
| return ge::FAILED; | |||||
| return ret; | |||||
| } | } | ||||
| // Assign memory (block and offset) for zero copy nodes | // Assign memory (block and offset) for zero copy nodes | ||||
| @@ -235,6 +235,12 @@ DavinciModel::~DavinciModel() { | |||||
| GE_LOGW_IF(rtEventDestroy(event_list_[i]) != RT_ERROR_NONE, "Destroy event failed, index: %zu", i); | GE_LOGW_IF(rtEventDestroy(event_list_[i]) != RT_ERROR_NONE, "Destroy event failed, index: %zu", i); | ||||
| } | } | ||||
| for (const auto &it : stream_2_event_) { | |||||
| if (rtEventDestroy(it.second) != RT_ERROR_NONE) { | |||||
| GELOGW("Destroy event failed"); | |||||
| } | |||||
| } | |||||
| FreeWeightsMem(); | FreeWeightsMem(); | ||||
| FreeFeatureMapMem(); | FreeFeatureMapMem(); | ||||
| @@ -4660,4 +4666,50 @@ Status DavinciModel::GetTotalMemSizeExcludeZeroCopy(int64_t &total_useful_size) | |||||
| total_useful_size = runtime_param_.mem_size - runtime_param_.zero_copy_size; | total_useful_size = runtime_param_.mem_size - runtime_param_.zero_copy_size; | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::GetEventIdForBlockingAicpuOp(const OpDescPtr &op_desc, rtStream_t stream, uint32_t &event_id) { | |||||
| GELOGI("Get event id for aicpu blocking op:%s", op_desc->GetName().c_str()); | |||||
| auto it = stream_2_event_.find(stream); | |||||
| if (it != stream_2_event_.end()) { | |||||
| auto rt_ret = rtGetEventID(it->second, &event_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed for op:%s(%s), ret:0x%X", | |||||
| op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetEventID] failed for op:%s(%s), ret:0x%X", | |||||
| op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| } else { | |||||
| rtEvent_t rt_event = nullptr; | |||||
| auto rt_ret = rtEventCreateWithFlag(&rt_event, RT_EVENT_WITH_FLAG); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtEventCreateWithFlag failed for op:%s(%s), ret:0x%X", | |||||
| op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtEventCreateWithFlag] failed for op:%s(%s), ret:0x%X", | |||||
| op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| rt_ret = rtGetEventID(rt_event, &event_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed for op:%s(%s), ret:0x%X", | |||||
| op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetEventID] failed for op:%s(%s), ret:0x%X", | |||||
| op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| stream_2_event_.emplace(stream, rt_event); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DavinciModel::GetEventByStream(const rtStream_t &stream, rtEvent_t &rt_event) { | |||||
| auto it = stream_2_event_.find(stream); | |||||
| if (it == stream_2_event_.end()) { | |||||
| REPORT_INNER_ERROR("E19999", "Get event failed"); | |||||
| GELOGE(FAILED, "[Get][Event] Get event failed"); | |||||
| return FAILED; | |||||
| } | |||||
| rt_event = it->second; | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -583,6 +583,9 @@ class DavinciModel { | |||||
| Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback); | Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback); | ||||
| Status GetAddrAndPrefCnt(const std::string &kernel_name, void *&addr, uint32_t &pref_cnt); | Status GetAddrAndPrefCnt(const std::string &kernel_name, void *&addr, uint32_t &pref_cnt); | ||||
| // for blocking aicpu op | |||||
| Status GetEventByStream(const rtStream_t &stream, rtEvent_t &rt_event); | |||||
| Status GetEventIdForBlockingAicpuOp(const OpDescPtr &op_desc, rtStream_t stream, uint32_t &event_id); | |||||
| private: | private: | ||||
| // memory address of weights | // memory address of weights | ||||
| @@ -1111,6 +1114,8 @@ class DavinciModel { | |||||
| // op name to attrs mapping | // op name to attrs mapping | ||||
| std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_; | std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_; | ||||
| std::map<rtStream_t, rtEvent_t> stream_2_event_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | ||||
| @@ -30,6 +30,7 @@ constexpr uint32_t kTailAicCtxIndex = 1; | |||||
| constexpr uint32_t kNonTailAivCtxIndex = 2; | constexpr uint32_t kNonTailAivCtxIndex = 2; | ||||
| constexpr uint32_t kTailAivCtxIndex = 3; | constexpr uint32_t kTailAivCtxIndex = 3; | ||||
| constexpr uint32_t kMixAicAivCtxPcNum = 4; | constexpr uint32_t kMixAicAivCtxPcNum = 4; | ||||
| constexpr uint32_t kModeInArgsFirstField = 1; | |||||
| } | } | ||||
| namespace ge { | namespace ge { | ||||
| FftsPlusTaskInfo::~FftsPlusTaskInfo() { | FftsPlusTaskInfo::~FftsPlusTaskInfo() { | ||||
| @@ -50,7 +51,7 @@ Status FftsPlusTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| if (args_size_ != 0) { | if (args_size_ != 0) { | ||||
| GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); | GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); | ||||
| } | } | ||||
| SetAdditionalDatatoCtx(ffts_plus_task_def); | |||||
| std::vector<uint8_t> sqe_buffer(sizeof(rtFftsPlusSqe_t)); | std::vector<uint8_t> sqe_buffer(sizeof(rtFftsPlusSqe_t)); | ||||
| auto ffts_plus_sqe = reinterpret_cast<rtFftsPlusSqe_t *>(sqe_buffer.data()); | auto ffts_plus_sqe = reinterpret_cast<rtFftsPlusSqe_t *>(sqe_buffer.data()); | ||||
| InitFftsPlusSqe(ffts_plus_task_def.ffts_plus_sqe(), ffts_plus_sqe); | InitFftsPlusSqe(ffts_plus_task_def.ffts_plus_sqe(), ffts_plus_sqe); | ||||
| @@ -124,6 +125,10 @@ Status FftsPlusTaskInfo::InitFftsPlusCtx(const domi::FftsPlusTaskDef &task_def, | |||||
| break; | break; | ||||
| } | } | ||||
| default: | default: | ||||
| if (ctx_def.hardware_ctx_type() == RT_HW_CTX_TYPE_AIC || | |||||
| ctx_def.hardware_ctx_type() == RT_HW_CTX_TYPE_AIV) { | |||||
| GE_CHK_STATUS_RET_NOLOG(UpdateMixAicAivCtxParam(ctx_def.mix_aic_aiv_ctx(), i)); | |||||
| } | |||||
| GE_CHK_STATUS_RET_NOLOG(InitHardWareCtx(ctx_def, cur_ctx)); | GE_CHK_STATUS_RET_NOLOG(InitHardWareCtx(ctx_def, cur_ctx)); | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -954,6 +959,32 @@ Status FftsPlusTaskInfo::Distribute() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void FftsPlusTaskInfo::SetAdditionalDatatoCtx(const domi::FftsPlusTaskDef &task_def) { | |||||
| for (int i = 0; i < task_def.additional_data_size(); ++i) { | |||||
| const domi::AdditionalDataDef &additionaldata = task_def.additional_data(i); | |||||
| const uint32_t &data_type = additionaldata.data_type(); | |||||
| for (int j = 0; j < additionaldata.context_id_size(); ++j) { | |||||
| ctx_additional_data_[additionaldata.context_id(j)].emplace(data_type); | |||||
| } | |||||
| } | |||||
| } | |||||
| Status FftsPlusTaskInfo::UpdateMixAicAivCtxParam(const domi::FftsPlusMixAicAivCtxDef &ctx_def, size_t ctx_idx) { | |||||
| if (ctx_additional_data_.count(ctx_idx) == 0) { | |||||
| GELOGD("ctx idx:%zu not in ctx additional data"); | |||||
| return SUCCESS; | |||||
| } | |||||
| if (ctx_additional_data_[ctx_idx].count(kModeInArgsFirstField) == 0) { | |||||
| GELOGD("ctx idx:%zu need not to save mode in args first field"); | |||||
| return SUCCESS; | |||||
| } | |||||
| if (rtApp_addr_ == 0) { | |||||
| GE_CHK_RT_RET(rtGetC2cCtrlAddr(&rtApp_addr_, &rtApp_data_len_)); | |||||
| } | |||||
| GE_CHK_RT_RET(rtMemcpy(reinterpret_cast<void *>(ctx_def.aiv_task_param_ptr()), rtApp_data_len_, | |||||
| reinterpret_cast<void *>(rtApp_addr_), rtApp_data_len_, RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| return SUCCESS; | |||||
| } | |||||
| // task_addr = {0,200,700,1000,2000, 3500} | // task_addr = {0,200,700,1000,2000, 3500} | ||||
| // task_addr_offset = {20,40,2,100,200} | // task_addr_offset = {20,40,2,100,200} | ||||
| template <typename T> | template <typename T> | ||||
| @@ -58,6 +58,9 @@ class FftsPlusTaskInfo : public TaskInfo { | |||||
| Status InitCaseSwitchCtx(const domi::FftsPlusCaseSwitchCtxDef &ctx_def, rtFftsPlusCaseSwitchCtx_t *&ctx); | Status InitCaseSwitchCtx(const domi::FftsPlusCaseSwitchCtxDef &ctx_def, rtFftsPlusCaseSwitchCtx_t *&ctx); | ||||
| Status InitCaseDefaultCtx(const domi::FftsPlusCaseDefaultCtxDef &ctx_def, rtFftsPlusCaseDefCtx_t *&ctx); | Status InitCaseDefaultCtx(const domi::FftsPlusCaseDefaultCtxDef &ctx_def, rtFftsPlusCaseDefCtx_t *&ctx); | ||||
| void SetAdditionalDatatoCtx(const domi::FftsPlusTaskDef &task_def); | |||||
| Status UpdateMixAicAivCtxParam(const domi::FftsPlusMixAicAivCtxDef &ctx_def, size_t ctx_idx); | |||||
| template<typename T> | template<typename T> | ||||
| Status InitIoAddrs(const RuntimeParam &rts_param, const T &aic_aiv_def, uint32_t thread_id, uint32_t addr_count); | Status InitIoAddrs(const RuntimeParam &rts_param, const T &aic_aiv_def, uint32_t thread_id, uint32_t addr_count); | ||||
| @@ -66,6 +69,9 @@ class FftsPlusTaskInfo : public TaskInfo { | |||||
| std::vector<void *> io_addrs_; | std::vector<void *> io_addrs_; | ||||
| void *args_{nullptr}; // runtime args memory | void *args_{nullptr}; // runtime args memory | ||||
| uint32_t args_size_{0}; // runtime args memory length | uint32_t args_size_{0}; // runtime args memory length | ||||
| std::map<uint32_t, std::set<uint32_t>> ctx_additional_data_; | |||||
| uint64_t rtApp_addr_{0}; | |||||
| uint32_t rtApp_data_len_{0}; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_PLUS_TASK_INFO_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_PLUS_TASK_INFO_H_ | ||||
| @@ -26,8 +26,8 @@ | |||||
| #include "external/graph/attr_value.h" | #include "external/graph/attr_value.h" | ||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| #include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
| #include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| #include "runtime/rt.h" | |||||
| namespace { | namespace { | ||||
| const char *const kAicpuAllshape = "_AllShape"; | const char *const kAicpuAllshape = "_AllShape"; | ||||
| @@ -43,7 +43,7 @@ Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDe | |||||
| UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | ||||
| uint32_t num_inputs = op_desc->GetInputsSize(); | uint32_t num_inputs = op_desc->GetInputsSize(); | ||||
| uint32_t num_outputs = op_desc->GetOutputsSize(); | uint32_t num_outputs = op_desc->GetOutputsSize(); | ||||
| std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||||
| std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||||
| new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc->GetName(), | new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc->GetName(), | ||||
| num_inputs, | num_inputs, | ||||
| num_outputs, | num_outputs, | ||||
| @@ -76,6 +76,16 @@ Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDe | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); | |||||
| GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_); | |||||
| if (UpdateEventIdForAicpuBlockingOp(op_desc, ext_handle) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][UpdateEventIdForAicpuBlockingOp] failed for op:%s(%s)", | |||||
| op_desc->GetName().c_str(), op_desc->GetType().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| auto rt_ret = rtMalloc(&ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); | auto rt_ret = rtMalloc(&ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); | ||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | ||||
| REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", ext_info.size(), rt_ret); | REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", ext_info.size(), rt_ret); | ||||
| @@ -448,6 +458,101 @@ Status KernelExTaskInfo::Distribute() { | |||||
| stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
| GELOGI("KernelExTaskInfo Distribute Success. task id: %u, stream id: %u", task_id_, stream_id_); | GELOGI("KernelExTaskInfo Distribute Success. task id: %u, stream id: %u", task_id_, stream_id_); | ||||
| if (is_blocking_aicpu_op_) { | |||||
| if (DistributeWaitTaskForAicpuBlockingOp() != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status KernelExTaskInfo::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) { | |||||
| int32_t device_id = 0; | |||||
| auto rt_ret = rtGetDevice(&device_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| int32_t value = 0; | |||||
| rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) { | |||||
| REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d", | |||||
| RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||||
| GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d", | |||||
| RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||||
| return FAILED; | |||||
| } | |||||
| is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status KernelExTaskInfo::UpdateEventIdForAicpuBlockingOp(const OpDescPtr &op_desc, | |||||
| std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle) { | |||||
| if (is_blocking_aicpu_op_) { | |||||
| bool is_support = false; | |||||
| if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||||
| return FAILED; | |||||
| } | |||||
| if (!is_support) { | |||||
| GELOGD("Device not support blocking aicpu op process"); | |||||
| return SUCCESS; | |||||
| } | |||||
| uint32_t event_id = 0; | |||||
| if (davinci_model_->GetEventIdForBlockingAicpuOp(op_desc, stream_, event_id) != SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "Get event id failed for op:%s(%s).", op_desc->GetName().c_str(), | |||||
| op_desc->GetType().c_str()); | |||||
| GELOGE(FAILED, "[Get][EventId] Get event id failed for op:%s(%s)", op_desc->GetName().c_str(), | |||||
| op_desc->GetType().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| if (ext_handle->UpdateEventId(event_id) != SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "Update event id failed for op:%s(%s).", op_desc->GetName().c_str(), | |||||
| op_desc->GetType().c_str()); | |||||
| GELOGE(FAILED, "[Update][EventId] Update event id failed for op:%s(%s)", op_desc->GetName().c_str(), | |||||
| op_desc->GetType().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| GELOGI("Update event_id=%u success", event_id); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status KernelExTaskInfo::DistributeWaitTaskForAicpuBlockingOp() { | |||||
| bool is_support = false; | |||||
| if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||||
| return FAILED; | |||||
| } | |||||
| if (!is_support) { | |||||
| GELOGD("Device not support blocking aicpu op process."); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGD("Distribute wait task begin"); | |||||
| rtEvent_t rt_event = nullptr; | |||||
| if (davinci_model_->GetEventByStream(stream_, rt_event) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][GetEventByStream] Call GetEventByStream failed"); | |||||
| return FAILED; | |||||
| } | |||||
| auto rt_ret = rtStreamWaitEvent(stream_, rt_event); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| rt_ret = rtEventReset(rt_event, stream_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include "graph/load/model_manager/task_info/task_info.h" | #include "graph/load/model_manager/task_info/task_info.h" | ||||
| #include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
| #include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||||
| namespace ge { | namespace ge { | ||||
| class KernelExTaskInfo : public TaskInfo { | class KernelExTaskInfo : public TaskInfo { | ||||
| @@ -65,6 +66,12 @@ class KernelExTaskInfo : public TaskInfo { | |||||
| void InitDumpArgs(void *addr, const OpDescPtr &op_desc); | void InitDumpArgs(void *addr, const OpDescPtr &op_desc); | ||||
| Status InitTaskExtInfo(const std::string &ext_info, const OpDescPtr &op_desc); | Status InitTaskExtInfo(const std::string &ext_info, const OpDescPtr &op_desc); | ||||
| // for blocking aicpu op | |||||
| Status DistributeWaitTaskForAicpuBlockingOp(); | |||||
| Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||||
| Status UpdateEventIdForAicpuBlockingOp(const OpDescPtr &op_desc, | |||||
| std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle); | |||||
| uint32_t task_id_; | uint32_t task_id_; | ||||
| uint32_t stream_id_; | uint32_t stream_id_; | ||||
| uint32_t dump_flag_; | uint32_t dump_flag_; | ||||
| @@ -79,6 +86,7 @@ class KernelExTaskInfo : public TaskInfo { | |||||
| uint32_t args_offset_ = 0; | uint32_t args_offset_ = 0; | ||||
| int64_t fixed_addr_offset_ = 0; | int64_t fixed_addr_offset_ = 0; | ||||
| int32_t topic_type_flag_ = -1; | int32_t topic_type_flag_ = -1; | ||||
| bool is_blocking_aicpu_op_ = false; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ | ||||
| @@ -28,11 +28,10 @@ | |||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| #include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
| #include "graph/load/model_manager/model_utils.h" | #include "graph/load/model_manager/model_utils.h" | ||||
| #include "runtime/kernel.h" | |||||
| #include "runtime/rt.h" | |||||
| #include "graph/load/model_manager/task_info/super_kernel/super_kernel.h" | #include "graph/load/model_manager/task_info/super_kernel/super_kernel.h" | ||||
| #include "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h" | #include "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h" | ||||
| #include "cce/aicpu_engine_struct.h" | #include "cce/aicpu_engine_struct.h" | ||||
| #include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| namespace { | namespace { | ||||
| @@ -474,6 +473,12 @@ Status KernelTaskInfo::Distribute() { | |||||
| } | } | ||||
| // set for task_id_ | // set for task_id_ | ||||
| UpdateTaskId(); | UpdateTaskId(); | ||||
| if (is_blocking_aicpu_op_) { | |||||
| if (DistributeWaitTaskForAicpuBlockingOp() != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| GELOGD( | GELOGD( | ||||
| "KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p " | "KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p " | ||||
| "blockdim:%d stream:%p", | "blockdim:%d stream:%p", | ||||
| @@ -482,6 +487,91 @@ Status KernelTaskInfo::Distribute() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status KernelTaskInfo::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) { | |||||
| int32_t device_id = 0; | |||||
| auto rt_ret = rtGetDevice(&device_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| int32_t value = 0; | |||||
| rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) { | |||||
| REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d", | |||||
| RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||||
| GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d", | |||||
| RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||||
| return FAILED; | |||||
| } | |||||
| is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status KernelTaskInfo::UpdateEventIdForAicpuBlockingOp(std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle) { | |||||
| if (is_blocking_aicpu_op_) { | |||||
| bool is_support = false; | |||||
| if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||||
| return FAILED; | |||||
| } | |||||
| if (!is_support) { | |||||
| GELOGD("Device not support blocking aicpu op process"); | |||||
| return SUCCESS; | |||||
| } | |||||
| uint32_t event_id = 0; | |||||
| if (davinci_model_->GetEventIdForBlockingAicpuOp(op_desc_, stream_, event_id) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Get][EventId] Get event id failed for op:%s(%s)", op_desc_->GetName().c_str(), | |||||
| op_desc_->GetType().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| if (ext_handle->UpdateEventId(event_id) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Update][EventId] Update event id failed for op:%s(%s)", op_desc_->GetName().c_str(), | |||||
| op_desc_->GetType().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| GELOGI("Update event_id=%u success", event_id); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status KernelTaskInfo::DistributeWaitTaskForAicpuBlockingOp() { | |||||
| bool is_support = false; | |||||
| if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||||
| return FAILED; | |||||
| } | |||||
| if (!is_support) { | |||||
| GELOGD("device not support blocking aicpu op process."); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGD("Distribute wait task begin"); | |||||
| rtEvent_t rt_event = nullptr; | |||||
| if (davinci_model_->GetEventByStream(stream_, rt_event) != SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "Call GetEventByStream failed"); | |||||
| GELOGE(FAILED, "[Call][GetEventByStream] Call GetEventByStream failed"); | |||||
| return FAILED; | |||||
| } | |||||
| auto rt_ret = rtStreamWaitEvent(stream_, rt_event); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| rt_ret = rtEventReset(rt_event, stream_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) { | void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) { | ||||
| const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | ||||
| vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | ||||
| @@ -1109,7 +1199,7 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||||
| UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | ||||
| uint32_t num_inputs = op_desc_->GetInputsSize(); | uint32_t num_inputs = op_desc_->GetInputsSize(); | ||||
| uint32_t num_outputs = op_desc_->GetOutputsSize(); | uint32_t num_outputs = op_desc_->GetOutputsSize(); | ||||
| std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||||
| std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||||
| new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), | new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), | ||||
| num_inputs, | num_inputs, | ||||
| num_outputs, | num_outputs, | ||||
| @@ -1145,6 +1235,16 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||||
| j, op_desc_->GetName().c_str()); | j, op_desc_->GetName().c_str()); | ||||
| } | } | ||||
| } | } | ||||
| AttrUtils::GetBool(op_desc_, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); | |||||
| GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc_->GetName().c_str(), is_blocking_aicpu_op_); | |||||
| if (UpdateEventIdForAicpuBlockingOp(ext_handle) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][UpdateEventIdForAicpuBlockingOp] failed for op:%s(%s)", | |||||
| op_desc_->GetName().c_str(), op_desc_->GetType().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); | auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%zu, ret:0x%X", | REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%zu, ret:0x%X", | ||||
| @@ -24,6 +24,8 @@ | |||||
| #include "graph/load/model_manager/task_info/task_info.h" | #include "graph/load/model_manager/task_info/task_info.h" | ||||
| #include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
| #include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||||
| namespace ge { | namespace ge { | ||||
| class KernelTaskInfo : public TaskInfo { | class KernelTaskInfo : public TaskInfo { | ||||
| public: | public: | ||||
| @@ -148,6 +150,11 @@ class KernelTaskInfo : public TaskInfo { | |||||
| bool DoubleCallSKTSaveCheck(); | bool DoubleCallSKTSaveCheck(); | ||||
| void SetArgs(); | void SetArgs(); | ||||
| // for blocking aicpu op | |||||
| Status DistributeWaitTaskForAicpuBlockingOp(); | |||||
| Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||||
| Status UpdateEventIdForAicpuBlockingOp(std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle); | |||||
| void *stub_func_; | void *stub_func_; | ||||
| void *args_; | void *args_; | ||||
| void *sm_desc_; | void *sm_desc_; | ||||
| @@ -187,6 +194,7 @@ class KernelTaskInfo : public TaskInfo { | |||||
| uint32_t skt_dump_flag_ = RT_KERNEL_DEFAULT; | uint32_t skt_dump_flag_ = RT_KERNEL_DEFAULT; | ||||
| void *superkernel_device_args_addr_ = nullptr; | void *superkernel_device_args_addr_ = nullptr; | ||||
| void *superkernel_dev_nav_table_ = nullptr; | void *superkernel_dev_nav_table_ = nullptr; | ||||
| bool is_blocking_aicpu_op_ = false; | |||||
| struct AICPUCustomInfo { | struct AICPUCustomInfo { | ||||
| void *input_descs = nullptr; | void *input_descs = nullptr; | ||||
| @@ -1389,8 +1389,8 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTen | |||||
| ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); | ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); | ||||
| graph_node->SetRunFlag(false); | graph_node->SetRunFlag(false); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(GE_GRAPH_PRERUN_FAILED, "[Call][StartForRunGraph] failed! graph_id:%u.", graph_id); | |||||
| return GE_GRAPH_PRERUN_FAILED; | |||||
| GELOGE(ret, "[Call][StartForRunGraph] failed! graph_id:%u.", graph_id); | |||||
| return ret; | |||||
| } | } | ||||
| GELOGI("[BuildGraph] build graph success, graph_id=%u.", graph_id); | GELOGI("[BuildGraph] build graph success, graph_id=%u.", graph_id); | ||||
| @@ -81,6 +81,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||||
| case aicpu::FWKAdapter::FWK_ADPT_EXT_TOPIC_TYPE: | case aicpu::FWKAdapter::FWK_ADPT_EXT_TOPIC_TYPE: | ||||
| GE_CHK_STATUS_RET(ParseExtTopicType(aicpu_ext_info), "[Parse][ExtTopicType] failed."); | GE_CHK_STATUS_RET(ParseExtTopicType(aicpu_ext_info), "[Parse][ExtTopicType] failed."); | ||||
| break; | break; | ||||
| case aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT: | |||||
| GE_CHK_STATUS_RET(ParseExtAsyncWait(aicpu_ext_info), "[Parse][ExtAsyncWait] failed."); | |||||
| break; | |||||
| default: | default: | ||||
| GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", | GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", | ||||
| node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | ||||
| @@ -101,6 +104,22 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AicpuExtInfoHandler::ParseExtAsyncWait(AicpuExtInfo *aicpu_ext_info) { | |||||
| if (aicpu_ext_info->infoLen != sizeof(AsyncWaitInfo)) { | |||||
| REPORT_INNER_ERROR("E19999", | |||||
| "Node[%s] parse ext async wait info failed as infoLen must be %zu but %u.", | |||||
| node_name_.c_str(), sizeof(AsyncWaitInfo), aicpu_ext_info->infoLen); | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
| "[Check][DataLen]Node[%s] parse ext async wait info failed as infoLen must be %zu but %u.", | |||||
| node_name_.c_str(), sizeof(AsyncWaitInfo), aicpu_ext_info->infoLen); | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | |||||
| } | |||||
| async_wait_ = reinterpret_cast<AsyncWaitInfo *>(aicpu_ext_info->infoMsg); | |||||
| GELOGI("Node[%s] parse async wait info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { | Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { | ||||
| GE_IF_BOOL_EXEC(aicpu_ext_info->infoLen != sizeof(int32_t), | GE_IF_BOOL_EXEC(aicpu_ext_info->infoLen != sizeof(int32_t), | ||||
| REPORT_INNER_ERROR("E19999", "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", | REPORT_INNER_ERROR("E19999", "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", | ||||
| @@ -280,6 +299,17 @@ Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kern | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AicpuExtInfoHandler::UpdateEventId(uint32_t event_id) { | |||||
| if (async_wait_ == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", "async_wait_ is nullptr."); | |||||
| GELOGE(FAILED, "[Check][async_wait_] async_wait_ is nullptr."); | |||||
| return FAILED; | |||||
| } | |||||
| async_wait_->waitType = 1; | |||||
| async_wait_->waitId = event_id; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AicpuExtInfoHandler::UpdateSessionInfoSessionId(uint64_t session_id) { | Status AicpuExtInfoHandler::UpdateSessionInfoSessionId(uint64_t session_id) { | ||||
| if (session_info_ == nullptr) { | if (session_info_ == nullptr) { | ||||
| GELOGD("There is no session info in ext_info, no need update."); | GELOGD("There is no session info in ext_info, no need update."); | ||||
| @@ -27,6 +27,7 @@ namespace ge { | |||||
| namespace hybrid { | namespace hybrid { | ||||
| using AicpuShapeAndType = aicpu::FWKAdapter::ShapeAndType; | using AicpuShapeAndType = aicpu::FWKAdapter::ShapeAndType; | ||||
| using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo; | using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo; | ||||
| using AsyncWaitInfo = aicpu::FWKAdapter::AsyncWait; | |||||
| using AicpuSessionInfo = SessionInfo; | using AicpuSessionInfo = SessionInfo; | ||||
| class AicpuExtInfoHandler { | class AicpuExtInfoHandler { | ||||
| @@ -59,6 +60,8 @@ class AicpuExtInfoHandler { | |||||
| Status UpdateExecuteMode(bool flag); | Status UpdateExecuteMode(bool flag); | ||||
| Status UpdateEventId(uint32_t event_id); | |||||
| Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); | Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); | ||||
| bool IsNeedRefreshIOAddr(); | bool IsNeedRefreshIOAddr(); | ||||
| @@ -73,6 +76,7 @@ class AicpuExtInfoHandler { | |||||
| Status ParseExtBitMap(AicpuExtInfo *aicpu_ext_info); | Status ParseExtBitMap(AicpuExtInfo *aicpu_ext_info); | ||||
| Status ParseExtUpdateAddr(AicpuExtInfo *aicpu_ext_info); | Status ParseExtUpdateAddr(AicpuExtInfo *aicpu_ext_info); | ||||
| Status ParseExtTopicType(AicpuExtInfo *aicpu_ext_info); | Status ParseExtTopicType(AicpuExtInfo *aicpu_ext_info); | ||||
| Status ParseExtAsyncWait(AicpuExtInfo *aicpu_ext_info); | |||||
| static Status UpdateShapeAndType(const GeShape &shape, | static Status UpdateShapeAndType(const GeShape &shape, | ||||
| DataType data_type, | DataType data_type, | ||||
| @@ -90,6 +94,7 @@ class AicpuExtInfoHandler { | |||||
| const uint32_t output_num_; | const uint32_t output_num_; | ||||
| UnknowShapeOpType unknown_type_; | UnknowShapeOpType unknown_type_; | ||||
| AicpuSessionInfo *session_info_ = nullptr; | AicpuSessionInfo *session_info_ = nullptr; | ||||
| AsyncWaitInfo *async_wait_ = nullptr; | |||||
| uint64_t *bit_map_ = nullptr; | uint64_t *bit_map_ = nullptr; | ||||
| uint32_t *update_addr_ = nullptr; | uint32_t *update_addr_ = nullptr; | ||||
| int32_t topic_type_flag_ = -1; | int32_t topic_type_flag_ = -1; | ||||
| @@ -22,6 +22,7 @@ | |||||
| #include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
| #include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
| #include "hybrid/model/hybrid_model.h" | #include "hybrid/model/hybrid_model.h" | ||||
| #include "runtime/rt.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -33,6 +34,12 @@ const char *const kAicpuAllshape = "_AllShape"; | |||||
| REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_TF, AiCpuNodeExecutor); | REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_TF, AiCpuNodeExecutor); | ||||
| REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_CUSTOM, AiCpuNodeExecutor); | REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_CUSTOM, AiCpuNodeExecutor); | ||||
| AicpuNodeTaskBase::~AicpuNodeTaskBase() { | |||||
| if (rt_event_ != nullptr) { | |||||
| (void)rtEventDestroy(rt_event_); | |||||
| } | |||||
| } | |||||
| Status AicpuNodeTaskBase::AllocTensorBuffer(size_t size, std::unique_ptr<TensorBuffer> &tensor_buffer) { | Status AicpuNodeTaskBase::AllocTensorBuffer(size_t size, std::unique_ptr<TensorBuffer> &tensor_buffer) { | ||||
| auto allocator = NpuMemoryAllocator::GetAllocator(); | auto allocator = NpuMemoryAllocator::GetAllocator(); | ||||
| GE_CHECK_NOTNULL(allocator); | GE_CHECK_NOTNULL(allocator); | ||||
| @@ -64,6 +71,13 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_ | |||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), | GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), | ||||
| "[Update][SessionInfoSessionId] failed, session_id:%ld.", session_id); | "[Update][SessionInfoSessionId] failed, session_id:%ld.", session_id); | ||||
| if (is_blocking_aicpu_op_) { | |||||
| if (UpdateEventIdForBlockingAicpuOp() != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][UpdateEventIdForBlockingAicpuOp] Call UpdateEventIdForBlockingAicpuOp failed"); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| // copy task args buf | // copy task args buf | ||||
| GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), | GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), | ||||
| "[Invoke][AllocTensorBuffer]Node[%s] alloc kernel_ext_info buf failed, size=%zu", | "[Invoke][AllocTensorBuffer]Node[%s] alloc kernel_ext_info buf failed, size=%zu", | ||||
| @@ -230,6 +244,96 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AicpuNodeTaskBase::UpdateEventIdForBlockingAicpuOp() { | |||||
| bool is_support = false; | |||||
| if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||||
| return FAILED; | |||||
| } | |||||
| if (!is_support) { | |||||
| GELOGD("Device not support blocking aicpu op process"); | |||||
| return SUCCESS; | |||||
| } | |||||
| uint32_t event_id = 0; | |||||
| auto rt_ret = rtEventCreateWithFlag(&rt_event_, RT_EVENT_WITH_FLAG); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtEventCreateWithFlag failed for node:%s, ret:0x%X", node_name_.c_str(), | |||||
| rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtEventCreateWithFlag] failed for node:%s, ret:0x%X", node_name_.c_str(), rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| rt_ret = rtGetEventID(rt_event_, &event_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed for node:%s, ret:0x%X", node_name_.c_str(), rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetEventID] failed for node:%s, ret:0x%X", node_name_.c_str(), rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| if (aicpu_ext_handle_.UpdateEventId(event_id) != SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "Update event id failed for node:%s.", node_name_.c_str()); | |||||
| GELOGE(FAILED, "[Update][EventId] Update event id failed for node:%s", node_name_.c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| GELOGI("Update event_id=%u success", event_id); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AicpuNodeTaskBase::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) { | |||||
| int32_t device_id = 0; | |||||
| auto rt_ret = rtGetDevice(&device_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| int32_t value = 0; | |||||
| rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) { | |||||
| REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d", | |||||
| RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||||
| GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d", | |||||
| RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||||
| return FAILED; | |||||
| } | |||||
| is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AicpuNodeTaskBase::DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream) { | |||||
| bool is_support = false; | |||||
| if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||||
| return FAILED; | |||||
| } | |||||
| if (!is_support) { | |||||
| GELOGD("Device not support blocking aicpu op process."); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGD("Distribute queue task begin"); | |||||
| if (rt_event_ == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", "rt_event_ is nullptr"); | |||||
| GELOGE(FAILED, "[Check][rt_event_] rt_event_ is nullptr"); | |||||
| return FAILED; | |||||
| } | |||||
| auto rt_ret = rtStreamWaitEvent(stream, rt_event_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| rt_ret = rtEventReset(rt_event_, stream); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AicpuTfNodeTask::InitForDependComputeTask() { | Status AicpuTfNodeTask::InitForDependComputeTask() { | ||||
| if ((unknown_type_ != DEPEND_COMPUTE) || (node_item_->num_outputs == 0)) { | if ((unknown_type_ != DEPEND_COMPUTE) || (node_item_->num_outputs == 0)) { | ||||
| GELOGD("Node[%s] type[%s] unknown_type is %d, output num is %d.", | GELOGD("Node[%s] type[%s] unknown_type is %d, output num is %d.", | ||||
| @@ -325,6 +429,9 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { | |||||
| // init ext info | // init ext info | ||||
| uint64_t ext_session_id = model.GetSessionId(); | uint64_t ext_session_id = model.GetSessionId(); | ||||
| const OpDescPtr op_desc = node_item_->GetOpDesc(); | |||||
| AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); | |||||
| GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_); | |||||
| GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "[Init][ExtInfo] failed for Node[%s].", | GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "[Init][ExtInfo] failed for Node[%s].", | ||||
| node_name_.c_str()); | node_name_.c_str()); | ||||
| GE_CHK_STATUS_RET(InitForDependComputeTask(), "[Init][DependComputeTask] failed for Node[%s].", node_name_.c_str()); | GE_CHK_STATUS_RET(InitForDependComputeTask(), "[Init][DependComputeTask] failed for Node[%s].", node_name_.c_str()); | ||||
| @@ -642,6 +749,12 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { | |||||
| kernel_buf_->GetSize(), flag, context.GetStream())); | kernel_buf_->GetSize(), flag, context.GetStream())); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); | ||||
| GELOGD("Node[%s] launch end.", node_name_.c_str()); | GELOGD("Node[%s] launch end.", node_name_.c_str()); | ||||
| if (is_blocking_aicpu_op_) { | |||||
| if (DistributeWaitTaskForAicpuBlockingOp(context.GetStream()) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| if (need_sync_) { | if (need_sync_) { | ||||
| GELOGD("[%s] Task needs sync", node_name_.c_str()); | GELOGD("[%s] Task needs sync", node_name_.c_str()); | ||||
| GE_CHK_STATUS_RET_NOLOG(context.Synchronize()); | GE_CHK_STATUS_RET_NOLOG(context.Synchronize()); | ||||
| @@ -760,6 +873,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) { | |||||
| return FAILED;); | return FAILED;); | ||||
| uint64_t ext_session_id = model.GetSessionId(); | uint64_t ext_session_id = model.GetSessionId(); | ||||
| AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); | |||||
| GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_); | |||||
| GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), | GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), | ||||
| "[Init][ExtInfo] failed for Node[%s].", node_name.c_str()); | "[Init][ExtInfo] failed for Node[%s].", node_name.c_str()); | ||||
| @@ -826,6 +941,12 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { | |||||
| args_.get(), args_size_, | args_.get(), args_size_, | ||||
| nullptr, context.GetStream(), flag); | nullptr, context.GetStream(), flag); | ||||
| GE_CHK_RT_RET(rt_ret); | GE_CHK_RT_RET(rt_ret); | ||||
| if (is_blocking_aicpu_op_) { | |||||
| if (DistributeWaitTaskForAicpuBlockingOp(context.GetStream()) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| GELOGD("Node[%s] launch task end.", node_name_.c_str()); | GELOGD("Node[%s] launch task end.", node_name_.c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -35,7 +35,7 @@ class AicpuNodeTaskBase : public NodeTask { | |||||
| node_item->num_outputs, | node_item->num_outputs, | ||||
| node_item->shape_inference_type) {} | node_item->shape_inference_type) {} | ||||
| ~AicpuNodeTaskBase() override = default; | |||||
| ~AicpuNodeTaskBase() override; | |||||
| using NodeTask::Init; | using NodeTask::Init; | ||||
| @@ -61,6 +61,10 @@ class AicpuNodeTaskBase : public NodeTask { | |||||
| static Status AllocTensorBuffer(size_t size, std::unique_ptr<TensorBuffer> &tensor_buffer); | static Status AllocTensorBuffer(size_t size, std::unique_ptr<TensorBuffer> &tensor_buffer); | ||||
| Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream); | |||||
| Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||||
| Status UpdateEventIdForBlockingAicpuOp(); | |||||
| protected: | protected: | ||||
| const NodeItem *node_item_; | const NodeItem *node_item_; | ||||
| // just reference. | // just reference. | ||||
| @@ -78,6 +82,10 @@ class AicpuNodeTaskBase : public NodeTask { | |||||
| // ext info addr, device mem | // ext info addr, device mem | ||||
| std::unique_ptr<TensorBuffer> ext_info_addr_dev_; | std::unique_ptr<TensorBuffer> ext_info_addr_dev_; | ||||
| // for blocking aicpu op | |||||
| bool is_blocking_aicpu_op_ = false; | |||||
| rtEvent_t rt_event_ = nullptr; | |||||
| }; | }; | ||||
| class AicpuTfNodeTask : public AicpuNodeTaskBase { | class AicpuTfNodeTask : public AicpuNodeTaskBase { | ||||
| @@ -89,7 +89,8 @@ map<string, DataType> kDataTypeDict = { | |||||
| {"float", DT_FLOAT}, | {"float", DT_FLOAT}, | ||||
| {"float32", DT_FLOAT}, | {"float32", DT_FLOAT}, | ||||
| {"double", DT_DOUBLE}, | {"double", DT_DOUBLE}, | ||||
| {"complex64", DT_COMPLEX64} | |||||
| {"complex64", DT_COMPLEX64}, | |||||
| {"complex128", DT_COMPLEX128} | |||||
| }; | }; | ||||
| map<string, Format> kFormatDict = { | map<string, Format> kFormatDict = { | ||||
| @@ -564,6 +564,41 @@ AiCpuBaseTask::~AiCpuBaseTask() { | |||||
| if (ext_info_addr_dev_ != nullptr) { | if (ext_info_addr_dev_ != nullptr) { | ||||
| (void)rtFree(ext_info_addr_dev_); | (void)rtFree(ext_info_addr_dev_); | ||||
| } | } | ||||
| if (rt_event_ != nullptr) { | |||||
| (void)rtEventDestroy(rt_event_); | |||||
| } | |||||
| } | |||||
| Status AiCpuBaseTask::UpdateEventIdForBlockingAicpuOp() { | |||||
| bool is_support = false; | |||||
| if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||||
| return FAILED; | |||||
| } | |||||
| if (!is_support) { | |||||
| GELOGD("Device not support blocking aicpu op process"); | |||||
| return SUCCESS; | |||||
| } | |||||
| uint32_t event_id = 0; | |||||
| auto rt_ret = rtEventCreateWithFlag(&rt_event_, RT_EVENT_WITH_FLAG); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtEventCreateWithFlag failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtEventCreateWithFlag] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| rt_ret = rtGetEventID(rt_event_, &event_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetEventID] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| if (aicpu_ext_handle_->UpdateEventId(event_id) != SUCCESS) { | |||||
| REPORT_CALL_ERROR("E19999", "Update event id=%u failed.", event_id); | |||||
| GELOGE(FAILED, "[Update][EventId] Update event id failed", event_id); | |||||
| return FAILED; | |||||
| } | |||||
| GELOGI("Update event_id=%u success", event_id); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint64_t kernel_id) { | Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint64_t kernel_id) { | ||||
| @@ -577,6 +612,9 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||||
| GELOGD("Get unknown_type is %d.", unknown_shape_type_val); | GELOGD("Get unknown_type is %d.", unknown_shape_type_val); | ||||
| unknown_type_ = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | unknown_type_ = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | ||||
| AttrUtils::GetBool(op_desc_, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); | |||||
| GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc_->GetName().c_str(), is_blocking_aicpu_op_); | |||||
| aicpu_ext_handle_.reset(new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), | aicpu_ext_handle_.reset(new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), | ||||
| num_inputs_, | num_inputs_, | ||||
| num_outputs_, | num_outputs_, | ||||
| @@ -595,6 +633,13 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false), | GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false), | ||||
| "[Update][SessionInfo] failed."); | "[Update][SessionInfo] failed."); | ||||
| if (is_blocking_aicpu_op_) { | |||||
| if (UpdateEventIdForBlockingAicpuOp() != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][UpdateEventIdForBlockingAicpuOp] Call UpdateEventIdForBlockingAicpuOp failed"); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM)); | GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM)); | ||||
| GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), | GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), | ||||
| aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), | aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), | ||||
| @@ -770,6 +815,63 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vecto | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AiCpuBaseTask::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) { | |||||
| int32_t device_id = 0; | |||||
| auto rt_ret = rtGetDevice(&device_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| int32_t value = 0; | |||||
| rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) { | |||||
| REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d", | |||||
| RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||||
| GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d", | |||||
| RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||||
| return FAILED; | |||||
| } | |||||
| is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AiCpuBaseTask::DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream) { | |||||
| bool is_support = false; | |||||
| if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||||
| return FAILED; | |||||
| } | |||||
| if (!is_support) { | |||||
| GELOGD("Device not support blocking aicpu op process."); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGI("Distribute queue task begin"); | |||||
| if (rt_event_ == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", "rt_event_ is nullptr"); | |||||
| GELOGE(FAILED, "[Check][rt_event_] rt_event_ is nullptr"); | |||||
| return FAILED; | |||||
| } | |||||
| auto rt_ret = rtStreamWaitEvent(stream, rt_event_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| rt_ret = rtEventReset(rt_event_, stream); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret); | |||||
| GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| AiCpuTask::~AiCpuTask() { | AiCpuTask::~AiCpuTask() { | ||||
| FreeHbm(args_); | FreeHbm(args_); | ||||
| FreeHbm(io_addr_); | FreeHbm(io_addr_); | ||||
| @@ -813,6 +915,14 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { | |||||
| GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); | GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); | ||||
| GELOGD("Done launch kernel successfully. task = %s", this->op_type_.c_str()); | GELOGD("Done launch kernel successfully. task = %s", this->op_type_.c_str()); | ||||
| if (is_blocking_aicpu_op_) { | |||||
| if (DistributeWaitTaskForAicpuBlockingOp(stream) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1089,6 +1199,13 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { | |||||
| } | } | ||||
| GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); | GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); | ||||
| GELOGD("Invoke rtCpuKernelLaunch succeeded"); | GELOGD("Invoke rtCpuKernelLaunch succeeded"); | ||||
| if (is_blocking_aicpu_op_) { | |||||
| if (DistributeWaitTaskForAicpuBlockingOp(stream) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -178,6 +178,10 @@ class AiCpuBaseTask : public OpTask { | |||||
| rtStream_t stream); | rtStream_t stream); | ||||
| Status UpdateOutputShape(vector<GeTensorDesc> &output_desc); | Status UpdateOutputShape(vector<GeTensorDesc> &output_desc); | ||||
| Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); | Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); | ||||
| // for blocking aicpu op | |||||
| Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream); | |||||
| Status UpdateEventIdForBlockingAicpuOp(); | |||||
| Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||||
| protected: | protected: | ||||
| size_t num_inputs_ = 0; | size_t num_inputs_ = 0; | ||||
| @@ -186,6 +190,9 @@ class AiCpuBaseTask : public OpTask { | |||||
| std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> aicpu_ext_handle_; | std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> aicpu_ext_handle_; | ||||
| void *ext_info_addr_dev_ = nullptr; | void *ext_info_addr_dev_ = nullptr; | ||||
| vector<bool> input_is_const_; | vector<bool> input_is_const_; | ||||
| // for blocking aicpu op | |||||
| bool is_blocking_aicpu_op_ = false; | |||||
| rtEvent_t rt_event_ = nullptr; | |||||
| }; | }; | ||||
| class AiCpuTask : public AiCpuBaseTask { | class AiCpuTask : public AiCpuBaseTask { | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit 8f2c4395c346af026c470b47a7c52f2ab5b51f90 | |||||
| Subproject commit a725349b65aef2940555af2ddb7b9461fbe0d5fd | |||||
| @@ -16,12 +16,94 @@ | |||||
| #include <cce/dnn.h> | #include <cce/dnn.h> | ||||
| #include <securec.h> | #include <securec.h> | ||||
| #include "runtime_stub.h" | |||||
| #include "runtime/rt.h" | |||||
| #define ADD_STUB_RETURN_VALUE(FUNC, TYPE) std::vector<TYPE> g_Stub_##FUNC##_RETURN | |||||
| #define GET_STUB_RETURN_VALUE(FUNC, TYPE, DEFAULT) ({ \ | |||||
| TYPE result = DEFAULT; \ | |||||
| if (!g_Stub_##FUNC##_RETURN.empty()) { \ | |||||
| result = g_Stub_##FUNC##_RETURN.back(); \ | |||||
| g_Stub_##FUNC##_RETURN.pop_back(); \ | |||||
| } \ | |||||
| result; \ | |||||
| }) | |||||
| #define DEL_STUB_RETURN_VALUE(FUNC, TYPE) \ | |||||
| do { \ | |||||
| extern std::vector<TYPE> g_Stub_##FUNC##_RETURN; \ | |||||
| g_Stub_##FUNC##_RETURN.clear(); \ | |||||
| } while (0) | |||||
| #define ADD_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME) std::vector<TYPE> g_Stub_##FUNC##_OUT_##NAME | |||||
| #define GET_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME, DEFAULT) ({ \ | |||||
| TYPE value; \ | |||||
| if (!g_Stub_##FUNC##_OUT_##NAME.empty()) { \ | |||||
| value = g_Stub_##FUNC##_OUT_##NAME.back(); \ | |||||
| g_Stub_##FUNC##_OUT_##NAME.pop_back(); \ | |||||
| } else { \ | |||||
| value = DEFAULT; \ | |||||
| } \ | |||||
| value; \ | |||||
| }) | |||||
| #define DEL_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME) \ | |||||
| do { \ | |||||
| extern std::vector<TYPE> g_Stub_##FUNC##_OUT_##NAME; \ | |||||
| g_Stub_##FUNC##_OUT_##NAME.clear(); \ | |||||
| } while (0) | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| #define EVENT_LENTH 10 | #define EVENT_LENTH 10 | ||||
| void rtStubTearDown() { | |||||
| DEL_STUB_RETURN_VALUE(rtGetDevice, rtError_t); | |||||
| DEL_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t); | |||||
| DEL_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t); | |||||
| DEL_STUB_RETURN_VALUE(rtEventReset, rtError_t); | |||||
| DEL_STUB_RETURN_VALUE(rtEventCreate, rtError_t); | |||||
| DEL_STUB_RETURN_VALUE(rtGetEventID, rtError_t); | |||||
| } | |||||
| ADD_STUB_RETURN_VALUE(rtGetDevice, rtError_t); | |||||
| rtError_t rtGetDevice(int32_t *device) { | |||||
| return GET_STUB_RETURN_VALUE(rtGetDevice, rtError_t, RT_ERROR_NONE); | |||||
| } | |||||
| ADD_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t); | |||||
| ADD_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value); | |||||
| rtError_t rtGetDeviceCapability(int32_t device, int32_t moduleType, int32_t featureType, int32_t *value) { | |||||
| *value = GET_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT); | |||||
| return GET_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| } | |||||
| ADD_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t); | |||||
| rtError_t rtStreamWaitEvent(rtStream_t stream, rtEvent_t event) { | |||||
| return GET_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, RT_ERROR_NONE); | |||||
| } | |||||
| ADD_STUB_RETURN_VALUE(rtEventReset, rtError_t); | |||||
| rtError_t rtEventReset(rtEvent_t event, rtStream_t stream) { | |||||
| return GET_STUB_RETURN_VALUE(rtEventReset, rtError_t, RT_ERROR_NONE); | |||||
| } | |||||
| ADD_STUB_RETURN_VALUE(rtEventCreate, rtError_t); | |||||
| rtError_t rtEventCreate(rtEvent_t *event) { | |||||
| *event = new int[EVENT_LENTH]; | |||||
| return GET_STUB_RETURN_VALUE(rtEventCreate, rtError_t, RT_ERROR_NONE); | |||||
| } | |||||
| ADD_STUB_RETURN_VALUE(rtGetEventID, rtError_t); | |||||
| rtError_t rtGetEventID(rtEvent_t event, uint32_t *event_id) { | |||||
| *event_id = 0; | |||||
| return GET_STUB_RETURN_VALUE(rtEventCreate, rtError_t, RT_ERROR_NONE); | |||||
| } | |||||
| rtError_t rtCtxSetCurrent(rtContext_t ctx) { return RT_ERROR_NONE; } | rtError_t rtCtxSetCurrent(rtContext_t ctx) { return RT_ERROR_NONE; } | ||||
| rtError_t rtGetStreamId(rtStream_t stream, int32_t *stream_id) { | rtError_t rtGetStreamId(rtStream_t stream, int32_t *stream_id) { | ||||
| @@ -42,11 +124,6 @@ rtError_t rtEventGetTimeStamp(uint64_t *time, rtEvent_t event) { | |||||
| return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
| } | } | ||||
| rtError_t rtEventCreate(rtEvent_t *event) { | |||||
| *event = new int[EVENT_LENTH]; | |||||
| return RT_ERROR_NONE; | |||||
| } | |||||
| rtError_t rtEventCreateWithFlag(rtEvent_t *event, uint32_t flag) { | rtError_t rtEventCreateWithFlag(rtEvent_t *event, uint32_t flag) { | ||||
| return rtEventCreate(event); | return rtEventCreate(event); | ||||
| } | } | ||||
| @@ -112,8 +189,6 @@ rtError_t rtMemcpyAsync(void *dst, uint64_t dest_max, const void *src, uint64_t | |||||
| return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
| } | } | ||||
| rtError_t rtStreamWaitEvent(rtStream_t stream, rtEvent_t event) { return RT_ERROR_NONE; } | |||||
| rtError_t rtSetTSDevice(uint32_t tsId) { | rtError_t rtSetTSDevice(uint32_t tsId) { | ||||
| return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
| } | } | ||||
| @@ -347,10 +422,6 @@ rtError_t rtStreamSwitchEx(void *ptr, rtCondition_t condition, void *value_ptr, | |||||
| rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream) { return RT_ERROR_NONE; } | rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream) { return RT_ERROR_NONE; } | ||||
| rtError_t rtEventReset(rtEvent_t event, rtStream_t stream) { return RT_ERROR_NONE; } | |||||
| rtError_t rtGetDevice(int32_t *device) { return RT_ERROR_NONE; } | |||||
| rtError_t rtDatadumpInfoLoad(const void *dump_info, uint32_t length) { return RT_ERROR_NONE; } | rtError_t rtDatadumpInfoLoad(const void *dump_info, uint32_t length) { return RT_ERROR_NONE; } | ||||
| rtError_t rtKernelLaunchWithFlag(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, | rtError_t rtKernelLaunchWithFlag(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, | ||||
| @@ -489,6 +560,18 @@ rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t | |||||
| return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
| } | } | ||||
| rtError_t rtGetC2cCtrlAddr(uint64_t *addr, uint32_t *len) { | |||||
| return RT_ERROR_NONE; | |||||
| } | |||||
| rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream) { | |||||
| return RT_ERROR_NONE; | |||||
| } | |||||
| rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt) { | |||||
| return RT_ERROR_NONE; | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,70 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef __INC_LLT_RUNTIME_STUB_H | |||||
| #define __INC_LLT_RUNTIME_STUB_H | |||||
| #include <vector> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| void rtStubTearDown(); | |||||
| #define RTS_STUB_SETUP() \ | |||||
| do { \ | |||||
| rtStubTearDown(); \ | |||||
| } while (0) | |||||
| #define RTS_STUB_TEARDOWN() \ | |||||
| do { \ | |||||
| rtStubTearDown(); \ | |||||
| } while (0) | |||||
| #define RTS_STUB_RETURN_VALUE(FUNC, TYPE, VALUE) \ | |||||
| do { \ | |||||
| g_Stub_##FUNC##_RETURN.emplace(g_Stub_##FUNC##_RETURN.begin(), VALUE); \ | |||||
| } while (0) | |||||
| #define RTS_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME, VALUE) \ | |||||
| do { \ | |||||
| g_Stub_##FUNC##_OUT_##NAME.emplace(g_Stub_##FUNC##_OUT_##NAME.begin(), VALUE); \ | |||||
| } while (0) | |||||
| #define RTS_STUB_RETURN_EXTERN(FUNC, TYPE) extern std::vector<TYPE> g_Stub_##FUNC##_RETURN; | |||||
| #define RTS_STUB_OUTBOUND_EXTERN(FUNC, TYPE, NAME) extern std::vector<TYPE> g_Stub_##FUNC##_OUT_##NAME; | |||||
| RTS_STUB_RETURN_EXTERN(rtGetDevice, rtError_t); | |||||
| RTS_STUB_OUTBOUND_EXTERN(rtGetDevice, int32_t, device) | |||||
| RTS_STUB_RETURN_EXTERN(rtGetDeviceCapability, rtError_t); | |||||
| RTS_STUB_OUTBOUND_EXTERN(rtGetDeviceCapability, int32_t, value); | |||||
| RTS_STUB_RETURN_EXTERN(rtStreamWaitEvent, rtError_t); | |||||
| RTS_STUB_RETURN_EXTERN(rtEventReset, rtError_t); | |||||
| RTS_STUB_RETURN_EXTERN(rtEventCreate, rtError_t); | |||||
| RTS_STUB_OUTBOUND_EXTERN(rtEventCreate, rtEvent_t, event); | |||||
| RTS_STUB_RETURN_EXTERN(rtGetEventID, rtError_t); | |||||
| RTS_STUB_OUTBOUND_EXTERN(rtEventCreate, uint32_t, event_id); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // __INC_LLT_RUNTIME_STUB_H | |||||
| @@ -244,6 +244,7 @@ set(GRAPH_DAVINCI_MODEL_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/end_graph_task_info.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/end_graph_task_info.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/model_exit_task_info.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/model_exit_task_info.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/ffts_task_info.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/ffts_task_info.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/ffts_plus_task_info.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | ||||
| "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" | "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" | ||||
| @@ -527,6 +528,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES | |||||
| "graph/load/kernel_ex_task_info_unittest.cc" | "graph/load/kernel_ex_task_info_unittest.cc" | ||||
| "graph/load/kernel_task_info_unittest.cc" | "graph/load/kernel_task_info_unittest.cc" | ||||
| "graph/load/ffts_task_info_unittest.cc" | "graph/load/ffts_task_info_unittest.cc" | ||||
| "graph/load/ffts_plus_task_info_unittest.cc" | |||||
| "graph/load/memcpy_addr_async_task_info_unittest.cc" | "graph/load/memcpy_addr_async_task_info_unittest.cc" | ||||
| "graph/load/memcpy_async_task_info_unittest.cc" | "graph/load/memcpy_async_task_info_unittest.cc" | ||||
| "graph/load/cpu_queue_schedule_unittest.cc" | "graph/load/cpu_queue_schedule_unittest.cc" | ||||
| @@ -670,6 +672,7 @@ set(MULTI_PARTS_TEST_FILES | |||||
| "graph/build/stream_allocator_unittest.cc" | "graph/build/stream_allocator_unittest.cc" | ||||
| "graph/build/model_builder_unittest.cc" | "graph/build/model_builder_unittest.cc" | ||||
| "graph/build/mem_assigner_unittest.cc" | "graph/build/mem_assigner_unittest.cc" | ||||
| "graph/build/graph_mem_assigner_unittest.cc" | |||||
| "graph/build/task_generator_unittest.cc" | "graph/build/task_generator_unittest.cc" | ||||
| "graph/build/buffer_pool_mem_assigner_unittest.cc" | "graph/build/buffer_pool_mem_assigner_unittest.cc" | ||||
| "graph/execute/graph_execute_unittest.cc" | "graph/execute/graph_execute_unittest.cc" | ||||
| @@ -935,6 +938,7 @@ target_link_libraries(ge_single_op PRIVATE | |||||
| ascend_protobuf | ascend_protobuf | ||||
| json | json | ||||
| c_sec | c_sec | ||||
| runtime_stub | |||||
| ) | ) | ||||
| # ut binary | # ut binary | ||||
| @@ -0,0 +1,90 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #include <memory> | |||||
| #include "graph/anchor.h" | |||||
| #include "graph/attr_value.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/utils/graph_utils.h" | |||||
| #include "graph/utils/node_utils.h" | |||||
| #include "graph/utils/op_desc_utils.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "omg/omg_inner_types.h" | |||||
| #include "../passes/graph_builder_utils.h" | |||||
| #define protected public | |||||
| #define private public | |||||
| #include "graph/build/memory/binary_block_mem_assigner.h" | |||||
| #include "graph/build/memory/graph_mem_assigner.h" | |||||
| #include "graph/build/memory/hybrid_mem_assigner.h" | |||||
| #include "graph/build/memory/max_block_mem_assigner.h" | |||||
| #include "graph/manager/graph_var_manager.h" | |||||
| #include "graph/manager/graph_mem_manager.h" | |||||
| #undef protected | |||||
| #undef private | |||||
| using namespace std; | |||||
| using namespace testing; | |||||
| using namespace ge; | |||||
| using domi::GetContext; | |||||
| class UtestGraphMemAssigner : public testing::Test { | |||||
| public: | |||||
| ge::ComputeGraphPtr BuildGraphWithVar(int64_t session_id) { | |||||
| // init | |||||
| MemManager::Instance().Initialize(std::vector<rtMemType_t>({RT_MEMORY_HBM})); | |||||
| VarManager::Instance(session_id)->Init(0, 0, 0, 0); | |||||
| ge::ut::GraphBuilder builder("graph"); | |||||
| auto var_input = builder.AddNode("var", "Variable", 1, 1); | |||||
| auto const_input = builder.AddNode("const", "Const", 1, 1); | |||||
| auto assign = builder.AddNode("assgin", "Assign", 2, 1); | |||||
| // add link | |||||
| builder.AddDataEdge(var_input, 0, assign, 0); | |||||
| builder.AddDataEdge(const_input, 0, assign, 1); | |||||
| // set offset | |||||
| var_input->GetOpDesc()->SetOutputOffset({10000}); | |||||
| const_input->GetOpDesc()->SetOutputOffset({1000}); | |||||
| assign->GetOpDesc()->SetInputOffset({10100, 1000}); | |||||
| assign->GetOpDesc()->SetOutputOffset({10100}); | |||||
| // set inner offset | |||||
| int64_t inner_offset = 100; | |||||
| ge::AttrUtils::SetInt(assign->GetOpDesc()->MutableInputDesc(0), ATTR_NAME_INNER_OFFSET, inner_offset); | |||||
| ge::AttrUtils::SetInt(assign->GetOpDesc()->MutableOutputDesc(0), ATTR_NAME_INNER_OFFSET, inner_offset); | |||||
| // add var addr | |||||
| VarManager::Instance(session_id)->var_resource_->var_offset_map_.emplace(10000, RT_MEMORY_HBM); | |||||
| return builder.GetGraph(); | |||||
| } | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| TEST_F(UtestGraphMemAssigner, graph_memory_assign_fail_case) { | |||||
| ge::ComputeGraphPtr compute_graph = make_shared<ge::ComputeGraph>(""); | |||||
| GraphMemoryAssigner graph_mem_assigner(compute_graph); | |||||
| MemoryOffset mem_offset(2, 10000); | |||||
| graph_mem_assigner.memory_offset_.insert({2, mem_offset}); | |||||
| VarManager::Instance(0)->graph_mem_max_size_ = 0; | |||||
| map<uint64_t, size_t> mem_type_to_offset = {}; | |||||
| Status ret = graph_mem_assigner.ReAssignMemory(false, mem_type_to_offset); | |||||
| EXPECT_EQ(ret, ACL_ERROR_GE_MEMORY_ALLOCATION); | |||||
| } | |||||
| @@ -0,0 +1,697 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #define private public | |||||
| #define protected public | |||||
| #include "graph/load/model_manager/task_info/ffts_plus_task_info.h" | |||||
| #include "cce/aicpu_engine_struct.h" | |||||
| #include "common/ge/ge_util.h" | |||||
| #include "common/properties_manager.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/fmk_error_codes.h" | |||||
| #include "graph/attr_value.h" | |||||
| #include "graph/load/model_manager/davinci_model.h" | |||||
| #include "graph/load/model_manager/model_manager.h" | |||||
| namespace ge { | |||||
| extern OpDescPtr CreateOpDesc(string name, string type); | |||||
| extern Status DavinciModel::GetAddrAndPrefCnt(const std::string &kernel_name, void *&addr, uint32_t &pref_cnt) { | |||||
| addr = 0x1245; | |||||
| pref_cnt = 3; | |||||
| return SUCCESS; | |||||
| } | |||||
| class UtestFftsPlusTaskInfo : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| public: | |||||
| void InitTaskSQEInfo(domi::FftsPlusTaskDef *task_def) { | |||||
| domi::FftsPlusSqeDef *sqedef = task_def->mutable_ffts_plus_sqe(); | |||||
| //header | |||||
| domi::StarsSqeHeaderDef *headerdef = sqedef->mutable_sqe_header(); | |||||
| headerdef->set_l1_lock(1); | |||||
| headerdef->set_l1_unlock(1); | |||||
| headerdef->set_block_dim(1); | |||||
| //sqe | |||||
| sqedef->set_pmg(1); | |||||
| sqedef->set_ns(1); | |||||
| sqedef->set_part_id(1); | |||||
| sqedef->set_qos(1); | |||||
| sqedef->set_total_context_num(2); | |||||
| sqedef->set_ready_context_num(1); | |||||
| sqedef->set_preload_context_num(1); | |||||
| sqedef->set_dsplit_unit(1); | |||||
| sqedef->set_prefetch_ost_num(1); | |||||
| sqedef->set_cmaint_ost_num(1); | |||||
| sqedef->set_aic_prefetch_lower(1); | |||||
| sqedef->set_aic_prefetch_upper(1); | |||||
| sqedef->set_aiv_prefetch_lower(1); | |||||
| sqedef->set_aiv_prefetch_upper(1); | |||||
| } | |||||
| void InitTaskAdditionalDataInfo(domi::FftsPlusTaskDef *task_def) { | |||||
| domi::AdditionalDataDef *additionaldata = task_def->add_additional_data(); | |||||
| additionaldata->set_data_type(1); | |||||
| additionaldata->add_context_id(0); | |||||
| additionaldata->add_context_id(1); | |||||
| additionaldata->add_context_id(2); | |||||
| domi::AdditionalDataDef *additionaldata1 = task_def->add_additional_data(); | |||||
| additionaldata1->set_data_type(2); | |||||
| additionaldata1->add_context_id(0); | |||||
| additionaldata1->add_context_id(3); | |||||
| additionaldata1->add_context_id(5); | |||||
| } | |||||
| void InitAicAivCtx(domi::FftsPlusAicAivCtxDef *ctxdef) { | |||||
| ctxdef->set_successor_num(26); | |||||
| ctxdef->set_aten(1); | |||||
| ctxdef->set_pred_cnt_init(1); | |||||
| ctxdef->set_pred_cnt(1); | |||||
| for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { | |||||
| ctxdef->add_successor_list(1); // 16 bits, len = 26 | |||||
| } | |||||
| ctxdef->set_stat(1); | |||||
| ctxdef->set_schem(1); | |||||
| ctxdef->set_atm(1); | |||||
| ctxdef->set_prefetch_enable_bitmap(1); | |||||
| ctxdef->set_prefetch_once_bitmap(1); | |||||
| ctxdef->set_thread_id(2); | |||||
| ctxdef->set_thread_dim(1); | |||||
| ctxdef->set_non_tail_block_dim(6); | |||||
| ctxdef->set_tail_block_dim(5); | |||||
| ctxdef->set_task_param_ptr_base(0x235689); | |||||
| ctxdef->set_task_param_ptr_offset(32); | |||||
| // task_addr = {0,200,700,1000,2000, 3500} | |||||
| // task_addr_offset = {20,40,2,100,200} | |||||
| ctxdef->add_task_addr(0); | |||||
| ctxdef->add_task_addr(200); | |||||
| ctxdef->add_task_addr(700); | |||||
| ctxdef->add_task_addr(1000); | |||||
| ctxdef->add_task_addr(2000); | |||||
| ctxdef->add_task_addr(3500); | |||||
| ctxdef->add_task_addr_offset(20); | |||||
| ctxdef->add_task_addr_offset(40); | |||||
| ctxdef->add_task_addr_offset(2); | |||||
| ctxdef->add_task_addr_offset(100); | |||||
| ctxdef->add_task_addr_offset(200); | |||||
| ctxdef->set_input_output_count(3); | |||||
| ctxdef->add_kernel_name("aictest"); | |||||
| for (int j = 1; j < 4; ++j) { | |||||
| ctxdef->add_src_slot(1); // len = 4, context ID for source data which is out of subgraph | |||||
| } | |||||
| } | |||||
| void InitMixAicAivCtx(domi::FftsPlusMixAicAivCtxDef *ctxdef) { | |||||
| ctxdef->set_successor_num(26); | |||||
| ctxdef->set_aten(1); | |||||
| ctxdef->set_pred_cnt_init(1); | |||||
| ctxdef->set_pred_cnt(1); | |||||
| for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { | |||||
| ctxdef->add_successor_list(1); // len = 26 | |||||
| } | |||||
| ctxdef->set_stat(1); | |||||
| ctxdef->set_schem(1); | |||||
| ctxdef->set_atm(1); | |||||
| ctxdef->set_prefetch_enable_bitmap(1); | |||||
| ctxdef->set_prefetch_once_bitmap(1); | |||||
| ctxdef->set_non_tail_block_ratio_n(1); | |||||
| ctxdef->set_tail_block_ratio_n(1); | |||||
| ctxdef->set_thread_id(1); | |||||
| ctxdef->set_thread_dim(1); | |||||
| ctxdef->set_non_tail_block_dim(1); | |||||
| ctxdef->set_tail_block_dim(1); | |||||
| ctxdef->set_aic_task_param_ptr(1); | |||||
| ctxdef->set_aic_task_param_ptr_offset(1); | |||||
| ctxdef->set_aiv_task_param_ptr(0x147852); | |||||
| ctxdef->set_aiv_task_param_ptr_offset(32); | |||||
| ctxdef->add_kernel_name("mixaic"); | |||||
| // task_addr = {0,200,700,1000,2000, 3500} | |||||
| // task_addr_offset = {20,40,2,100,200} | |||||
| ctxdef->add_task_addr(0); | |||||
| ctxdef->add_task_addr(200); | |||||
| ctxdef->add_task_addr(700); | |||||
| ctxdef->add_task_addr(1000); | |||||
| ctxdef->add_task_addr(2000); | |||||
| ctxdef->add_task_addr(3500); | |||||
| ctxdef->add_task_addr_offset(20); | |||||
| ctxdef->add_task_addr_offset(40); | |||||
| ctxdef->add_task_addr_offset(2); | |||||
| ctxdef->add_task_addr_offset(100); | |||||
| ctxdef->add_task_addr_offset(200); | |||||
| ctxdef->set_input_output_count(1); | |||||
| for (int j = 1; j < 4; ++j) { | |||||
| ctxdef->add_src_slot(1); // len = 4, context ID for source data which is out of subgraph | |||||
| } | |||||
| } | |||||
| void InitSdmaCtx(domi::FftsPlusSdmaCtxDef *ctxdef) { | |||||
| ctxdef->set_successor_num(26); | |||||
| ctxdef->set_aten(1); | |||||
| ctxdef->set_pred_cnt_init(1); | |||||
| ctxdef->set_pred_cnt(1); | |||||
| for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { | |||||
| ctxdef->add_successor_list(1); // len = 26 | |||||
| } | |||||
| ctxdef->set_sat(1); | |||||
| ctxdef->set_atm(1); | |||||
| ctxdef->set_thread_id(1); | |||||
| ctxdef->set_thread_dim(1); | |||||
| ctxdef->set_sdma_sqe_header(1); | |||||
| ctxdef->set_src_stream_id(1); | |||||
| ctxdef->set_src_sub_stream_id(1); | |||||
| ctxdef->set_dst_stream_id(1); | |||||
| ctxdef->set_dst_sub_stream_id(1); | |||||
| ctxdef->set_src_addr_base(0x457878); | |||||
| ctxdef->set_src_addr_offset(32); | |||||
| ctxdef->set_dst_addr_base(0x126547); | |||||
| ctxdef->set_dst_addr_offset(32); | |||||
| ctxdef->set_non_tail_data_len(1); | |||||
| ctxdef->set_tail_data_len(1); | |||||
| } | |||||
| void InitNotifyCtx(domi::FftsPlusNotifyCtxDef *ctxdef) { | |||||
| ctxdef->set_successor_num(26); | |||||
| ctxdef->set_aten(1); | |||||
| ctxdef->set_pred_cnt_init(1); | |||||
| ctxdef->set_pred_cnt(1); | |||||
| for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { | |||||
| ctxdef->add_successor_list(1); // len = 26 | |||||
| } | |||||
| ctxdef->set_atm(1); | |||||
| ctxdef->set_thread_id(1); | |||||
| ctxdef->set_thread_dim(1); | |||||
| ctxdef->set_notify_id_base(1); | |||||
| } | |||||
| void InitWriteValueCtx(domi::FftsPlusWriteValueCtxDef *ctxdef) { | |||||
| ctxdef->set_successor_num(26); | |||||
| ctxdef->set_aten(1); | |||||
| ctxdef->set_pred_cnt_init(1); | |||||
| ctxdef->set_pred_cnt(1); | |||||
| for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { | |||||
| ctxdef->add_successor_list(1); // len = 26 | |||||
| } | |||||
| ctxdef->set_atm(1); | |||||
| ctxdef->set_thread_id(1); | |||||
| ctxdef->set_thread_dim(1); | |||||
| ctxdef->set_aw_size(1); | |||||
| ctxdef->set_snoop(1); | |||||
| ctxdef->set_aw_cache(1); | |||||
| ctxdef->set_aw_prot(1); | |||||
| ctxdef->set_va(1); | |||||
| ctxdef->set_write_addr_base(0x147852); | |||||
| ctxdef->set_write_addr_offset(32); | |||||
| for (int j = 1; j < 4; ++j) { | |||||
| ctxdef->add_write_value(1); | |||||
| } | |||||
| } | |||||
| void InitAicpuCtxCtx(domi::FftsPlusAicpuCtxDef *ctxdef) { | |||||
| ctxdef->set_successor_num(26); | |||||
| ctxdef->set_aten(1); | |||||
| ctxdef->set_pred_cnt_init(1); | |||||
| ctxdef->set_pred_cnt(1); | |||||
| for (int j = 1; j < RT_CTX_SUCCESSOR_NUM; ++j) { | |||||
| ctxdef->add_successor_context_id(1); // len = 26 | |||||
| } | |||||
| ctxdef->set_atm(1); | |||||
| ctxdef->set_sqe_index(1); | |||||
| ctxdef->set_kernel_type(1); | |||||
| ctxdef->set_bm(1); | |||||
| ctxdef->set_topic_type(1); | |||||
| ctxdef->set_qos(1); | |||||
| ctxdef->set_thread_id(1); | |||||
| ctxdef->set_thread_dim(1); | |||||
| ctxdef->set_non_tail_block_dim(1); | |||||
| ctxdef->set_tail_block_dim(1); | |||||
| for (int i = 1; i < 9; ++i) { | |||||
| ctxdef->add_user_data(1); // len = 9 | |||||
| } | |||||
| ctxdef->set_sub_topic_id(1); | |||||
| ctxdef->set_topic_id(1); | |||||
| ctxdef->set_group_id(1); | |||||
| ctxdef->set_user_data_len(64); | |||||
| ctxdef->set_task_param_offset(32); | |||||
| } | |||||
| void InitDataCtx(domi::FftsPlusDataCtxDef *ctxdef) { | |||||
| ctxdef->set_successor_num(26); | |||||
| ctxdef->set_aten(1); | |||||
| ctxdef->set_cnt_init(1); | |||||
| ctxdef->set_cnt(1); | |||||
| for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { | |||||
| ctxdef->add_successor_list(1); // len = 26 | |||||
| } | |||||
| ctxdef->set_atm(1); | |||||
| ctxdef->set_orig_consumer_counter(1); | |||||
| ctxdef->set_run_consumer_counter(1); | |||||
| ctxdef->set_thread_id(1); | |||||
| ctxdef->set_thread_dim(1); | |||||
| ctxdef->set_addr_base(0x125478); | |||||
| ctxdef->set_addr_offset(32); | |||||
| ctxdef->set_non_tail_num_outter(1); | |||||
| ctxdef->set_non_tail_num_inner(1); | |||||
| ctxdef->set_non_tail_len_inner(1); | |||||
| ctxdef->set_non_tail_stride_outter(1); | |||||
| ctxdef->set_non_tail_stride_inner(1); | |||||
| ctxdef->set_tail_num_outter(1); | |||||
| ctxdef->set_tail_num_inner(1); | |||||
| ctxdef->set_tail_len_inner(1); | |||||
| ctxdef->set_tail_stride_outter(1); | |||||
| ctxdef->set_tail_stride_inner(1); | |||||
| } | |||||
| void InitAtStartCtx(domi::FftsPlusAtStartCtxDef *ctxdef) { | |||||
| ctxdef->set_successor_num(26); | |||||
| ctxdef->set_aten(1); | |||||
| ctxdef->set_pred_cnt_init(1); | |||||
| ctxdef->set_pred_cnt(1); | |||||
| for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { | |||||
| ctxdef->add_successor_list(i); // len = 26 | |||||
| } | |||||
| ctxdef->set_thread_id(1); | |||||
| ctxdef->set_thread_dim(1); | |||||
| ctxdef->set_thread_id_init(1); | |||||
| ctxdef->set_thread_window_size(1); | |||||
| } | |||||
| void InitAtEndCtx(domi::FftsPlusAtEndCtxDef *ctxdef) { | |||||
| ctxdef->set_at_start_slot_num(12); | |||||
| ctxdef->set_out_label_slot_num(12); | |||||
| ctxdef->set_aten(1); | |||||
| ctxdef->set_pred_cnt_init(1); | |||||
| ctxdef->set_pred_cnt(1); | |||||
| for (int i = 1; i < RT_CTX_SUCC_AT_START_SLOT_NUM; ++i) { | |||||
| ctxdef->add_succ_at_start_slot(i); // len = 12 | |||||
| ctxdef->add_succ_out_label_slot(1); // len = 12 | |||||
| } | |||||
| ctxdef->set_thread_id(1); | |||||
| } | |||||
| void InitLabelCtx(domi::FftsPlusLabelCtxDef *ctxdef) { | |||||
| ctxdef->set_successor_num(26); | |||||
| ctxdef->set_pred_cnt_init(1); | |||||
| ctxdef->set_pred_cnt(1); | |||||
| for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { | |||||
| ctxdef->add_successor_list(1); // len = 26 | |||||
| } | |||||
| } | |||||
| void InitCaseSwitchCtx(domi::FftsPlusCaseSwitchCtxDef *ctxdef) { | |||||
| ctxdef->set_successor_num(26); | |||||
| ctxdef->set_aten(32); | |||||
| ctxdef->set_start_label_id(32); | |||||
| ctxdef->set_label_list_len(32); | |||||
| ctxdef->set_pred_cnt_init(32); | |||||
| ctxdef->set_pred_cnt(32); | |||||
| for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { | |||||
| ctxdef->add_successor_list(1); // len = 26 | |||||
| } | |||||
| ctxdef->set_atm(32); | |||||
| ctxdef->set_thread_id(32); | |||||
| ctxdef->set_thread_dim(32); | |||||
| ctxdef->set_ar_size(32); | |||||
| ctxdef->set_snoop(32); | |||||
| ctxdef->set_ar_cache(32); | |||||
| ctxdef->set_ar_prot(32); | |||||
| ctxdef->set_va(32); | |||||
| ctxdef->set_load_addr0_base(0x123456); | |||||
| ctxdef->set_ld0_en(32); | |||||
| ctxdef->set_load_addr0_offset(32); | |||||
| ctxdef->set_load_addr1_base(0x12457); | |||||
| ctxdef->set_ld1_en(32); | |||||
| ctxdef->set_load_addr1_offset(32); | |||||
| } | |||||
| void InitCaseDefaultCtx(domi::FftsPlusCaseDefaultCtxDef *ctxdef) { | |||||
| ctxdef->set_successor_num(26); | |||||
| ctxdef->set_aten(32); | |||||
| ctxdef->set_start_label_id(1); | |||||
| ctxdef->set_label_list_len(32); | |||||
| ctxdef->set_pred_cnt_init(1); | |||||
| ctxdef->set_pred_cnt(32); | |||||
| for (int i = 1; i < RT_CTX_SUCCESSOR_NUM; ++i) { | |||||
| ctxdef->add_successor_list(2); // len = 26 | |||||
| } | |||||
| } | |||||
| void InitCondSwitchCtx(domi::FftsPlusCondSwitchCtxDef *ctxdef) { | |||||
| ctxdef->set_true_successor_num(12); | |||||
| ctxdef->set_false_successor_num(14); | |||||
| ctxdef->set_aten(32); | |||||
| ctxdef->set_condition(32); | |||||
| ctxdef->set_pred_cnt_init(32); | |||||
| ctxdef->set_pred_cnt(32); | |||||
| for (int i = 1; i < RT_CTX_FALSE_SUCCESSOR_NUM; ++i) { | |||||
| if (i < RT_CTX_TRUE_SUCCESSOR_NUM) { | |||||
| ctxdef->add_true_successor_list(1); // len = 12 | |||||
| } | |||||
| ctxdef->add_false_successor_list(1); // len = 14 | |||||
| } | |||||
| ctxdef->set_atm(32); | |||||
| ctxdef->set_thread_id(1); | |||||
| ctxdef->set_thread_dim(32); | |||||
| ctxdef->set_ar_size(32); | |||||
| ctxdef->set_snoop(32); | |||||
| ctxdef->set_ar_cache(32); | |||||
| ctxdef->set_ar_prot(32); | |||||
| ctxdef->set_va(32); | |||||
| ctxdef->set_load_addr0_base(0x142545); | |||||
| ctxdef->set_ld0_en(32); | |||||
| ctxdef->set_load_addr0_offset(32); | |||||
| ctxdef->set_load_addr1_base(0x365451); | |||||
| ctxdef->set_ld1_en(64); | |||||
| ctxdef->set_load_addr1_offset(32); | |||||
| ctxdef->set_cmp_value_1(1); | |||||
| ctxdef->set_cmp_value_2(1); | |||||
| } | |||||
| }; | |||||
| // test FftsPlusTaskInfo Init software ctx | |||||
| TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_software_ctx) { | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| rtStream_t stream = nullptr; | |||||
| rtStreamCreate(&stream, 0); | |||||
| davinci_model.stream_list_ = { stream }; | |||||
| domi::TaskDef task_def; | |||||
| task_def.set_stream_id(0); | |||||
| domi::FftsPlusTaskDef *ffts_plus_task_def = task_def.mutable_ffts_plus_task(); | |||||
| FftsPlusTaskInfo ffts_plus_task_info; | |||||
| // init failed when model without op_desc | |||||
| EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), PARAM_INVALID); | |||||
| davinci_model.op_list_[0] = CreateOpDesc("test", PARTITIONEDCALL); | |||||
| ffts_plus_task_def->set_op_index(0); | |||||
| ffts_plus_task_def->set_addr_size(2); | |||||
| rtFftsPlusTaskInfo_t sub_task_info; | |||||
| ffts_plus_task_info.ffts_plus_task_info_ = sub_task_info; | |||||
| ffts_plus_task_info.io_addrs_ = { (void*)0x12345678, (void*)0x22345678 }; | |||||
| InitTaskSQEInfo(ffts_plus_task_def); | |||||
| InitTaskAdditionalDataInfo(ffts_plus_task_def); | |||||
| domi::FftsPlusCtxDef *fftsplusstartctx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| fftsplusstartctx->set_op_index(0); | |||||
| fftsplusstartctx->set_hardware_ctx_type(0); | |||||
| fftsplusstartctx->set_software_ctx_type(static_cast<uint32_t>(RT_SOFT_CTX_TYPE_AT_START)); | |||||
| domi::FftsPlusAtStartCtxDef *startctxdef = fftsplusstartctx->mutable_at_start_ctx(); | |||||
| InitAtStartCtx(startctxdef); | |||||
| EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), FAILED); | |||||
| startctxdef->add_successor_list(1); | |||||
| EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), SUCCESS); | |||||
| domi::FftsPlusCtxDef *fftsplusendctx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| fftsplusendctx->set_op_index(0); | |||||
| fftsplusendctx->set_hardware_ctx_type(0); | |||||
| fftsplusendctx->set_software_ctx_type(static_cast<uint32_t>(RT_SOFT_CTX_TYPE_AT_END)); | |||||
| domi::FftsPlusAtEndCtxDef *endctxdef = fftsplusendctx->mutable_at_end_ctx(); | |||||
| InitAtEndCtx(endctxdef); | |||||
| EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), FAILED); | |||||
| endctxdef->add_succ_at_start_slot(1); | |||||
| EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), FAILED); | |||||
| endctxdef->add_succ_out_label_slot(1); | |||||
| EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), SUCCESS); | |||||
| domi::FftsPlusCtxDef *fftspluslabelctx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| fftspluslabelctx->set_op_index(0); | |||||
| fftspluslabelctx->set_hardware_ctx_type(0); | |||||
| fftspluslabelctx->set_software_ctx_type(static_cast<uint32_t>(RT_SOFT_CTX_TYPE_LABEL)); | |||||
| domi::FftsPlusLabelCtxDef *labelctxdef = fftspluslabelctx->mutable_label_ctx(); | |||||
| InitLabelCtx(labelctxdef); | |||||
| EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), FAILED); | |||||
| labelctxdef->add_successor_list(1); | |||||
| EXPECT_EQ(ffts_plus_task_info.Init(task_def, &davinci_model), SUCCESS); | |||||
| } | |||||
| // test FftsPlusTaskInfo Init hardware ctx | |||||
| TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_hardware_ctx) { | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| domi::TaskDef task_def; | |||||
| FftsPlusTaskInfo task_info; | |||||
| rtStream_t stream = nullptr; | |||||
| rtStreamCreate(&stream, 0); | |||||
| davinci_model.stream_list_ = { stream }; | |||||
| task_def.set_stream_id(0); | |||||
| domi::FftsPlusTaskDef *ffts_plus_task_def = task_def.mutable_ffts_plus_task(); | |||||
| rtFftsPlusTaskInfo_t sub_task_info; | |||||
| task_info.ffts_plus_task_info_ = sub_task_info; | |||||
| davinci_model.op_list_[0] = CreateOpDesc("test", PARTITIONEDCALL); | |||||
| davinci_model.InitTbeHandleWithFfts(davinci_model.op_list_[0]); | |||||
| ffts_plus_task_def->set_op_index(0); | |||||
| ffts_plus_task_def->set_addr_size(2); | |||||
| InitTaskSQEInfo(ffts_plus_task_def); | |||||
| InitTaskAdditionalDataInfo(ffts_plus_task_def); | |||||
| domi::FftsPlusCtxDef *notifyctx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| notifyctx->set_op_index(0); | |||||
| notifyctx->set_hardware_ctx_type(static_cast<uint32_t>(RT_HW_CTX_TYPE_NOTIFY_WAIT)); | |||||
| notifyctx->set_software_ctx_type(0); | |||||
| domi::FftsPlusNotifyCtxDef *notifydef = notifyctx->mutable_notify_ctx(); | |||||
| InitNotifyCtx(notifydef); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| notifydef->add_successor_list(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); | |||||
| domi::FftsPlusCtxDef *sdmactx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| sdmactx->set_op_index(0); | |||||
| sdmactx->set_hardware_ctx_type(static_cast<uint32_t>(RT_HW_CTX_TYPE_SDMA)); | |||||
| sdmactx->set_software_ctx_type(0); | |||||
| domi::FftsPlusSdmaCtxDef *smdadef = sdmactx->mutable_sdma_ctx(); | |||||
| InitSdmaCtx(smdadef); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| smdadef->add_successor_list(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); | |||||
| domi::FftsPlusCtxDef *writevalctx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| writevalctx->set_op_index(0); | |||||
| writevalctx->set_hardware_ctx_type(static_cast<uint32_t>(RT_HW_CTX_TYPE_WRITE_VALUE)); | |||||
| writevalctx->set_software_ctx_type(0); | |||||
| domi::FftsPlusWriteValueCtxDef *writedef = writevalctx->mutable_write_value_ctx(); | |||||
| InitWriteValueCtx(writedef); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| writedef->add_successor_list(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| writedef->add_write_value(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); | |||||
| domi::FftsPlusCtxDef *aicpuctx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| aicpuctx->set_op_index(0); | |||||
| aicpuctx->set_hardware_ctx_type(static_cast<uint32_t>(RT_HW_CTX_TYPE_AICPU)); | |||||
| aicpuctx->set_software_ctx_type(0); | |||||
| domi::FftsPlusAicpuCtxDef *aicpudef = aicpuctx->mutable_aicpu_ctx(); | |||||
| InitAicpuCtxCtx(aicpudef); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| aicpudef->add_successor_context_id(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| aicpudef->add_user_data(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); | |||||
| domi::FftsPlusCtxDef *datactx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| datactx->set_op_index(0); | |||||
| datactx->set_hardware_ctx_type(static_cast<uint32_t>(RT_HW_CTX_TYPE_FLUSH_DATA)); | |||||
| datactx->set_software_ctx_type(0); | |||||
| domi::FftsPlusDataCtxDef *datadef = datactx->mutable_data_ctx(); | |||||
| InitDataCtx(datadef); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| datadef->add_successor_list(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); | |||||
| domi::FftsPlusCtxDef *caseswitchctx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| caseswitchctx->set_op_index(0); | |||||
| caseswitchctx->set_hardware_ctx_type(static_cast<uint32_t>(RT_HW_CTX_TYPE_LOAD)); | |||||
| caseswitchctx->set_software_ctx_type(static_cast<uint32_t>(RT_SOFT_CTX_TYPE_CASE_SWITCH)); | |||||
| domi::FftsPlusCaseSwitchCtxDef *caseswitchdef = caseswitchctx->mutable_case_switch_ctx(); | |||||
| InitCaseSwitchCtx(caseswitchdef); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| caseswitchdef->add_successor_list(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); | |||||
| domi::FftsPlusCtxDef *candswitchctx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| candswitchctx->set_op_index(0); | |||||
| candswitchctx->set_hardware_ctx_type(static_cast<uint32_t>(RT_HW_CTX_TYPE_LOAD)); | |||||
| candswitchctx->set_software_ctx_type(static_cast<uint32_t>(RT_SOFT_CTX_TYPE_COND_SWITCH)); | |||||
| domi::FftsPlusCondSwitchCtxDef *candswitchdef = candswitchctx->mutable_cond_switch_ctx(); | |||||
| InitCondSwitchCtx(candswitchdef); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| candswitchdef->add_true_successor_list(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| candswitchdef->add_false_successor_list(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); | |||||
| domi::FftsPlusCtxDef *aicaivctx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| aicaivctx->set_op_index(0); | |||||
| aicaivctx->set_hardware_ctx_type(static_cast<uint32_t>(RT_HW_CTX_TYPE_AIV)); | |||||
| aicaivctx->set_software_ctx_type(0); | |||||
| domi::FftsPlusAicAivCtxDef *aicaivdef = aicaivctx->mutable_aic_aiv_ctx(); | |||||
| InitAicAivCtx(aicaivdef); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| aicaivdef->add_successor_list(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); | |||||
| aicaivdef->add_kernel_name("aivtest"); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); | |||||
| aicaivdef->add_src_slot(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); | |||||
| domi::FftsPlusCtxDef *mixaicaivctx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| mixaicaivctx->set_op_index(0); | |||||
| mixaicaivctx->set_hardware_ctx_type(static_cast<uint32_t>(RT_HW_CTX_TYPE_MIX_AIC)); | |||||
| mixaicaivctx->set_software_ctx_type(0); | |||||
| domi::FftsPlusMixAicAivCtxDef *mixctxdef = mixaicaivctx->mutable_mix_aic_aiv_ctx(); | |||||
| InitMixAicAivCtx(mixctxdef); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); | |||||
| mixctxdef->add_successor_list(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); | |||||
| mixctxdef->add_kernel_name("mixaiv"); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); | |||||
| mixctxdef->add_src_slot(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), INTERNAL_ERROR); | |||||
| } | |||||
| // test FftsPlusTaskInfo Init hardware ctx | |||||
| TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_hardware_ctx_ex) { | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| domi::TaskDef task_def; | |||||
| FftsPlusTaskInfo task_info; | |||||
| rtStream_t stream = nullptr; | |||||
| rtStreamCreate(&stream, 0); | |||||
| davinci_model.stream_list_ = { stream }; | |||||
| task_def.set_stream_id(0); | |||||
| domi::FftsPlusTaskDef *ffts_plus_task_def = task_def.mutable_ffts_plus_task(); | |||||
| rtFftsPlusTaskInfo_t sub_task_info; | |||||
| task_info.ffts_plus_task_info_ = sub_task_info; | |||||
| davinci_model.op_list_[0] = CreateOpDesc("test", PARTITIONEDCALL); | |||||
| ffts_plus_task_def->set_op_index(0); | |||||
| ffts_plus_task_def->set_addr_size(2); | |||||
| InitTaskSQEInfo(ffts_plus_task_def); | |||||
| InitTaskAdditionalDataInfo(ffts_plus_task_def); | |||||
| domi::FftsPlusCtxDef *casesdefaultctx = ffts_plus_task_def->add_ffts_plus_ctx(); | |||||
| casesdefaultctx->set_op_index(0); | |||||
| casesdefaultctx->set_hardware_ctx_type(static_cast<uint32_t>(RT_HW_CTX_TYPE_LOAD)); | |||||
| casesdefaultctx->set_software_ctx_type(static_cast<uint32_t>(RT_SOFT_CTX_TYPE_CASE_SWITCH)); | |||||
| domi::FftsPlusCaseDefaultCtxDef *casesdefaultdef = casesdefaultctx->mutable_case_default_ctx(); | |||||
| InitCaseDefaultCtx(casesdefaultdef); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), FAILED); | |||||
| casesdefaultdef->add_successor_list(1); | |||||
| EXPECT_EQ(task_info.Init(task_def, &davinci_model), SUCCESS); | |||||
| } | |||||
| // test FftsPlusTaskInfo UpdateArgs | |||||
| TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_update_args) { | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| FftsPlusTaskInfo task_info; | |||||
| task_info.davinci_model_ = &davinci_model; | |||||
| task_info.io_addrs_ = { (void*)0x12345678, (void*)0x22345678 }; | |||||
| EXPECT_EQ(task_info.UpdateArgs(), SUCCESS); | |||||
| } | |||||
| // test FftsPlusTaskInfo CalculateArgs | |||||
| TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_calculate_args) { | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| domi::TaskDef task_def; | |||||
| FftsPlusTaskInfo task_info; | |||||
| EXPECT_EQ(task_info.CalculateArgs(task_def, &davinci_model), SUCCESS); | |||||
| } | |||||
| // test FftsPlusTaskInfo Distribute | |||||
| TEST_F(UtestFftsPlusTaskInfo, success_ffts_plus_task_info_distribute) { | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| FftsPlusTaskInfo task_info; | |||||
| rtFftsPlusTaskInfo_t sub_task_info; | |||||
| task_info.ffts_plus_task_info_ = sub_task_info; | |||||
| rtStream_t stream = nullptr; | |||||
| rtStreamCreate(&stream, 0); | |||||
| task_info.stream_ = stream; | |||||
| EXPECT_EQ(task_info.Distribute(), SUCCESS); | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -23,15 +23,20 @@ | |||||
| #include "graph/load/model_manager/task_info/kernel_ex_task_info.h" | #include "graph/load/model_manager/task_info/kernel_ex_task_info.h" | ||||
| #include "cce/aicpu_engine_struct.h" | #include "cce/aicpu_engine_struct.h" | ||||
| #include "tests/depends/runtime/src/runtime_stub.h" | |||||
| namespace ge { | namespace ge { | ||||
| extern OpDescPtr CreateOpDesc(string name, string type); | extern OpDescPtr CreateOpDesc(string name, string type); | ||||
| class UtestKernelExTaskInfo : public testing::Test { | class UtestKernelExTaskInfo : public testing::Test { | ||||
| protected: | protected: | ||||
| void SetUp() {} | |||||
| void SetUp() { | |||||
| RTS_STUB_SETUP(); | |||||
| } | |||||
| void TearDown() {} | |||||
| void TearDown() { | |||||
| RTS_STUB_TEARDOWN(); | |||||
| } | |||||
| }; | }; | ||||
| // test kernel_ex_task_Release | // test kernel_ex_task_Release | ||||
| @@ -209,4 +214,136 @@ TEST_F(UtestKernelExTaskInfo, parse_topic_type_failed_2) { | |||||
| KernelExTaskInfo kernel_ex_task_info; | KernelExTaskInfo kernel_ex_task_info; | ||||
| EXPECT_NE(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS); | EXPECT_NE(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS); | ||||
| } | } | ||||
| TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op) { | |||||
| int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||||
| vector<char> aicpu_ext_info(len, 0); | |||||
| char *buf = aicpu_ext_info.data(); | |||||
| int offset = 0; | |||||
| hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||||
| ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||||
| ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||||
| offset += sizeof(hybrid::AicpuExtInfo); | |||||
| hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||||
| async_wait_info->waitType = 0; | |||||
| async_wait_info->waitId = 0; | |||||
| async_wait_info->timeOut = 0; | |||||
| async_wait_info->reserved = 0; | |||||
| domi::TaskDef task_def; | |||||
| domi::KernelExDef kernel_ex_def; | |||||
| kernel_ex_def.set_kernel_ext_info(buf, len); | |||||
| kernel_ex_def.set_kernel_ext_info_size(len); | |||||
| domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); | |||||
| *kernel_ex_def_tmp = kernel_ex_def; | |||||
| const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||||
| ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||||
| KernelExTaskInfo kernel_ex_task_info; | |||||
| kernel_ex_task_info.op_desc_ = op_desc; | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| kernel_ex_task_info.davinci_model_ = &davinci_model; | |||||
| EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||||
| EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS); | |||||
| kernel_ex_task_info.op_desc_ = op_desc; | |||||
| EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||||
| EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op_fail_01) { | |||||
| int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||||
| vector<char> aicpu_ext_info(len, 0); | |||||
| char *buf = aicpu_ext_info.data(); | |||||
| int offset = 0; | |||||
| hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||||
| ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||||
| ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||||
| offset += sizeof(hybrid::AicpuExtInfo); | |||||
| hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||||
| async_wait_info->waitType = 0; | |||||
| async_wait_info->waitId = 0; | |||||
| async_wait_info->timeOut = 0; | |||||
| async_wait_info->reserved = 0; | |||||
| domi::TaskDef task_def; | |||||
| domi::KernelExDef kernel_ex_def; | |||||
| kernel_ex_def.set_kernel_ext_info(buf, len); | |||||
| kernel_ex_def.set_kernel_ext_info_size(len); | |||||
| domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); | |||||
| *kernel_ex_def_tmp = kernel_ex_def; | |||||
| const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||||
| KernelExTaskInfo kernel_ex_task_info; | |||||
| kernel_ex_task_info.op_desc_ = op_desc; | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| kernel_ex_task_info.davinci_model_ = &davinci_model; | |||||
| EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||||
| kernel_ex_task_info.is_blocking_aicpu_op_ = true; | |||||
| EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); | |||||
| } | |||||
| TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op_fail_02) { | |||||
| int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||||
| vector<char> aicpu_ext_info(len, 0); | |||||
| char *buf = aicpu_ext_info.data(); | |||||
| int offset = 0; | |||||
| hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||||
| ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||||
| ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||||
| offset += sizeof(hybrid::AicpuExtInfo); | |||||
| hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||||
| async_wait_info->waitType = 0; | |||||
| async_wait_info->waitId = 0; | |||||
| async_wait_info->timeOut = 0; | |||||
| async_wait_info->reserved = 0; | |||||
| domi::TaskDef task_def; | |||||
| domi::KernelExDef kernel_ex_def; | |||||
| kernel_ex_def.set_kernel_ext_info(buf, len); | |||||
| kernel_ex_def.set_kernel_ext_info_size(len); | |||||
| domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); | |||||
| *kernel_ex_def_tmp = kernel_ex_def; | |||||
| const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||||
| ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||||
| KernelExTaskInfo kernel_ex_task_info; | |||||
| kernel_ex_task_info.op_desc_ = op_desc; | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| kernel_ex_task_info.davinci_model_ = &davinci_model; | |||||
| RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); | |||||
| EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); | |||||
| EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); | |||||
| EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||||
| EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||||
| EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -22,15 +22,20 @@ | |||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| #include "graph/load/model_manager/task_info/kernel_task_info.h" | #include "graph/load/model_manager/task_info/kernel_task_info.h" | ||||
| #include "graph/load/model_manager/task_info/hccl_task_info.h" | #include "graph/load/model_manager/task_info/hccl_task_info.h" | ||||
| #include "tests/depends/runtime/src/runtime_stub.h" | |||||
| namespace ge { | namespace ge { | ||||
| extern OpDescPtr CreateOpDesc(string name, string type); | extern OpDescPtr CreateOpDesc(string name, string type); | ||||
| class UtestKernelTaskInfo : public testing::Test { | class UtestKernelTaskInfo : public testing::Test { | ||||
| protected: | protected: | ||||
| void SetUp() {} | |||||
| void SetUp() { | |||||
| RTS_STUB_SETUP(); | |||||
| } | |||||
| void TearDown() {} | |||||
| void TearDown() { | |||||
| RTS_STUB_TEARDOWN(); | |||||
| } | |||||
| }; | }; | ||||
| // test KernelTaskInfo Init. | // test KernelTaskInfo Init. | ||||
| @@ -1240,4 +1245,135 @@ TEST_F(UtestKernelTaskInfo, kernel_task_info_super_kernel_info) { | |||||
| EXPECT_EQ(kernel_task_info.SKTFinalize(), SUCCESS); | EXPECT_EQ(kernel_task_info.SKTFinalize(), SUCCESS); | ||||
| } | } | ||||
| TEST_F(UtestKernelTaskInfo, blocking_aicpu_op) { | |||||
| int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||||
| vector<char> aicpu_ext_info(len, 0); | |||||
| char *buf = aicpu_ext_info.data(); | |||||
| int offset = 0; | |||||
| hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||||
| ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||||
| ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||||
| offset += sizeof(hybrid::AicpuExtInfo); | |||||
| hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||||
| async_wait_info->waitType = 0; | |||||
| async_wait_info->waitId = 0; | |||||
| async_wait_info->timeOut = 0; | |||||
| async_wait_info->reserved = 0; | |||||
| domi::TaskDef task_def; | |||||
| domi::KernelDef kernel_def; | |||||
| kernel_def.set_kernel_ext_info(buf, len); | |||||
| kernel_def.set_kernel_ext_info_size(len); | |||||
| const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||||
| op_desc->SetId(0); | |||||
| ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| davinci_model.op_list_.emplace(0, op_desc); | |||||
| KernelTaskInfo kernel_task_info; | |||||
| kernel_task_info.op_desc_ = op_desc; | |||||
| kernel_task_info.davinci_model_ = &davinci_model; | |||||
| EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||||
| EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS); | |||||
| kernel_task_info.op_desc_ = op_desc; | |||||
| EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||||
| EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestKernelTaskInfo, blocking_aicpu_op_fail_01) { | |||||
| int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||||
| vector<char> aicpu_ext_info(len, 0); | |||||
| char *buf = aicpu_ext_info.data(); | |||||
| int offset = 0; | |||||
| hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||||
| ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||||
| ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||||
| offset += sizeof(hybrid::AicpuExtInfo); | |||||
| hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||||
| async_wait_info->waitType = 0; | |||||
| async_wait_info->waitId = 0; | |||||
| async_wait_info->timeOut = 0; | |||||
| async_wait_info->reserved = 0; | |||||
| domi::KernelDef kernel_def; | |||||
| kernel_def.set_kernel_ext_info(buf, len); | |||||
| kernel_def.set_kernel_ext_info_size(len); | |||||
| const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||||
| op_desc->SetId(0); | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| davinci_model.op_list_.emplace(0, op_desc); | |||||
| KernelTaskInfo kernel_task_info; | |||||
| kernel_task_info.davinci_model_ = &davinci_model; | |||||
| kernel_task_info.op_desc_ = op_desc; | |||||
| EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||||
| kernel_task_info.is_blocking_aicpu_op_ = true; | |||||
| EXPECT_EQ(kernel_task_info.Distribute(), FAILED); | |||||
| } | |||||
| TEST_F(UtestKernelTaskInfo, blocking_aicpu_op_fail_02) { | |||||
| int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||||
| vector<char> aicpu_ext_info(len, 0); | |||||
| char *buf = aicpu_ext_info.data(); | |||||
| int offset = 0; | |||||
| hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||||
| ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||||
| ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||||
| offset += sizeof(hybrid::AicpuExtInfo); | |||||
| hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||||
| async_wait_info->waitType = 0; | |||||
| async_wait_info->waitId = 0; | |||||
| async_wait_info->timeOut = 0; | |||||
| async_wait_info->reserved = 0; | |||||
| domi::KernelDef kernel_def; | |||||
| kernel_def.set_kernel_ext_info(buf, len); | |||||
| kernel_def.set_kernel_ext_info_size(len); | |||||
| const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||||
| ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||||
| op_desc->SetId(0); | |||||
| DavinciModel davinci_model(0, nullptr); | |||||
| davinci_model.op_list_.emplace(0, op_desc); | |||||
| KernelTaskInfo kernel_task_info; | |||||
| kernel_task_info.davinci_model_ = &davinci_model; | |||||
| kernel_task_info.op_desc_ = op_desc; | |||||
| RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); | |||||
| EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_task_info.Distribute(), FAILED); | |||||
| EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_task_info.Distribute(), FAILED); | |||||
| EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); | |||||
| EXPECT_EQ(kernel_task_info.Distribute(), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||||
| EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||||
| EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -367,7 +367,7 @@ TEST(UtestIrBuild, check_data_op_attr_index_valid) { | |||||
| }; | }; | ||||
| ModelBufferData model; | ModelBufferData model; | ||||
| graphStatus ret = aclgrphBuildModel(graph, build_options, model); | graphStatus ret = aclgrphBuildModel(graph, build_options, model); | ||||
| EXPECT_EQ(ret, GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); | |||||
| EXPECT_EQ(ret, ge::FAILED); | |||||
| } | } | ||||
| // set attr index invalid, when not set input shape range | // set attr index invalid, when not set input shape range | ||||
| @@ -377,7 +377,7 @@ TEST(UtestIrBuild, check_data_attr_index_succ_no_input_range) { | |||||
| const map<string, string> build_options; | const map<string, string> build_options; | ||||
| ModelBufferData model; | ModelBufferData model; | ||||
| graphStatus ret = aclgrphBuildModel(graph, build_options, model); | graphStatus ret = aclgrphBuildModel(graph, build_options, model); | ||||
| EXPECT_EQ(ret, GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); | |||||
| EXPECT_EQ(ret, ge::FAILED); | |||||
| } | } | ||||
| TEST(UtestIrBuild, check_modify_mixlist_param) { | TEST(UtestIrBuild, check_modify_mixlist_param) { | ||||
| @@ -27,7 +27,7 @@ | |||||
| #include "hybrid/node_executor/aicpu/aicpu_node_executor.h" | #include "hybrid/node_executor/aicpu/aicpu_node_executor.h" | ||||
| #undef protected | #undef protected | ||||
| #undef private | #undef private | ||||
| #include "tests/depends/runtime/src/runtime_stub.h" | |||||
| using namespace std; | using namespace std; | ||||
| using namespace testing; | using namespace testing; | ||||
| @@ -43,8 +43,12 @@ using namespace hybrid; | |||||
| class UtestAicpuNodeExecutor : public testing::Test { | class UtestAicpuNodeExecutor : public testing::Test { | ||||
| protected: | protected: | ||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| void SetUp() { | |||||
| RTS_STUB_SETUP(); | |||||
| } | |||||
| void TearDown() { | |||||
| RTS_STUB_TEARDOWN(); | |||||
| } | |||||
| }; | }; | ||||
| static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) { | static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) { | ||||
| @@ -164,5 +168,222 @@ TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) { | |||||
| } | } | ||||
| TEST_F(UtestAicpuNodeExecutor, aicpu_blocking_node_task) { | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||||
| GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph); | |||||
| ge_root_model->SetModelName("test_name"); | |||||
| HybridModel hybrid_model(ge_root_model); | |||||
| NodePtr node = CreateNode(graph, "deque", FRAMEWORK_OP_TYPE, 1, 1); | |||||
| ge::AttrUtils::SetBool(node->GetOpDesc(), ATTR_NAME_IS_BLOCKING_OP, true); | |||||
| std::unique_ptr<NodeItem> new_node; | |||||
| ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); | |||||
| NodeItem *node_item = new_node.get(); | |||||
| node_item->input_start = 0; | |||||
| node_item->output_start = 0; | |||||
| node_item->is_dynamic = true; | |||||
| node_item->shape_inference_type = DEPEND_SHAPE_RANGE; | |||||
| GraphItem graph_item; | |||||
| graph_item.node_items_.emplace_back(node_item); | |||||
| graph_item.total_inputs_ = 1; | |||||
| graph_item.total_outputs_ = 1; | |||||
| GraphExecutionContext graph_execution_context; | |||||
| SubgraphContext subgraph_context(&graph_item, &graph_execution_context); | |||||
| ASSERT_EQ(subgraph_context.Init(), SUCCESS); | |||||
| graph_execution_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager()); | |||||
| auto node_state = subgraph_context.GetOrCreateNodeState(node_item); | |||||
| ASSERT_NE(node_state, nullptr); | |||||
| uint64_t value_0 = 512; | |||||
| TensorValue in_tensor0(&value_0, sizeof(value_0)); | |||||
| subgraph_context.SetInput(*node_item, 0, in_tensor0); | |||||
| TensorValue out_tensor0(&value_0, sizeof(value_0)); | |||||
| subgraph_context.SetOutput(*node_item, 0, out_tensor0); | |||||
| int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||||
| vector<char> aicpu_ext_info(len, 0); | |||||
| char *buf = aicpu_ext_info.data(); | |||||
| int offset = 0; | |||||
| hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||||
| ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||||
| ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||||
| offset += sizeof(hybrid::AicpuExtInfo); | |||||
| hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||||
| async_wait_info->waitType = 0; | |||||
| async_wait_info->waitId = 0; | |||||
| async_wait_info->timeOut = 0; | |||||
| async_wait_info->reserved = 0; | |||||
| domi::KernelDef kernel_def; | |||||
| kernel_def.set_kernel_ext_info(buf, len); | |||||
| kernel_def.set_kernel_ext_info_size(len); | |||||
| domi::TaskDef task_def; | |||||
| AicpuTaskStruct args; | |||||
| args.head.length = sizeof(args); | |||||
| args.head.ioAddrNum = 2; | |||||
| kernel_def.set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||||
| kernel_def.set_args_size(args.head.length); | |||||
| domi::KernelDef *kernel_def_tmp = task_def.mutable_kernel(); | |||||
| *kernel_def_tmp = kernel_def; | |||||
| AicpuNodeTask aicpu_node_task(node_item, task_def); | |||||
| ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); | |||||
| ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||||
| node_item->shape_inference_type = DEPEND_COMPUTE; | |||||
| domi::KernelExDef kernel_ex_def; | |||||
| kernel_ex_def.set_kernel_ext_info(buf, len); | |||||
| kernel_ex_def.set_kernel_ext_info_size(len); | |||||
| kernel_ex_def.set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||||
| kernel_ex_def.set_args_size(args.head.length); | |||||
| domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); | |||||
| *kernel_ex_def_tmp = kernel_ex_def; | |||||
| hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def}); | |||||
| AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def); | |||||
| ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||||
| ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestAicpuNodeExecutor, aicpu_blocking_node_task_fail) { | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||||
| GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph); | |||||
| ge_root_model->SetModelName("test_name"); | |||||
| HybridModel hybrid_model(ge_root_model); | |||||
| NodePtr node = CreateNode(graph, "deque", FRAMEWORK_OP_TYPE, 1, 1); | |||||
| ge::AttrUtils::SetBool(node->GetOpDesc(), ATTR_NAME_IS_BLOCKING_OP, true); | |||||
| std::unique_ptr<NodeItem> new_node; | |||||
| ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); | |||||
| NodeItem *node_item = new_node.get(); | |||||
| node_item->input_start = 0; | |||||
| node_item->output_start = 0; | |||||
| node_item->is_dynamic = true; | |||||
| node_item->shape_inference_type = DEPEND_SHAPE_RANGE; | |||||
| GraphItem graph_item; | |||||
| graph_item.node_items_.emplace_back(node_item); | |||||
| graph_item.total_inputs_ = 1; | |||||
| graph_item.total_outputs_ = 1; | |||||
| GraphExecutionContext graph_execution_context; | |||||
| SubgraphContext subgraph_context(&graph_item, &graph_execution_context); | |||||
| ASSERT_EQ(subgraph_context.Init(), SUCCESS); | |||||
| graph_execution_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager()); | |||||
| auto node_state = subgraph_context.GetOrCreateNodeState(node_item); | |||||
| ASSERT_NE(node_state, nullptr); | |||||
| uint64_t value_0 = 512; | |||||
| TensorValue in_tensor0(&value_0, sizeof(value_0)); | |||||
| subgraph_context.SetInput(*node_item, 0, in_tensor0); | |||||
| TensorValue out_tensor0(&value_0, sizeof(value_0)); | |||||
| subgraph_context.SetOutput(*node_item, 0, out_tensor0); | |||||
| int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||||
| vector<char> aicpu_ext_info(len, 0); | |||||
| char *buf = aicpu_ext_info.data(); | |||||
| int offset = 0; | |||||
| hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||||
| ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||||
| ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||||
| offset += sizeof(hybrid::AicpuExtInfo); | |||||
| hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||||
| async_wait_info->waitType = 0; | |||||
| async_wait_info->waitId = 0; | |||||
| async_wait_info->timeOut = 0; | |||||
| async_wait_info->reserved = 0; | |||||
| domi::KernelDef kernel_def; | |||||
| kernel_def.set_kernel_ext_info(buf, len); | |||||
| kernel_def.set_kernel_ext_info_size(len); | |||||
| domi::TaskDef task_def; | |||||
| AicpuTaskStruct args; | |||||
| args.head.length = sizeof(args); | |||||
| args.head.ioAddrNum = 2; | |||||
| kernel_def.set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||||
| kernel_def.set_args_size(args.head.length); | |||||
| domi::KernelDef *kernel_def_tmp = task_def.mutable_kernel(); | |||||
| *kernel_def_tmp = kernel_def; | |||||
| AicpuNodeTask aicpu_node_task(node_item, task_def); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); | |||||
| ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); | |||||
| ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); | |||||
| ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||||
| ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||||
| ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||||
| node_item->shape_inference_type = DEPEND_COMPUTE; | |||||
| domi::KernelExDef kernel_ex_def; | |||||
| kernel_ex_def.set_kernel_ext_info(buf, len); | |||||
| kernel_ex_def.set_kernel_ext_info_size(len); | |||||
| kernel_ex_def.set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||||
| kernel_ex_def.set_args_size(args.head.length); | |||||
| domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); | |||||
| *kernel_ex_def_tmp = kernel_ex_def; | |||||
| hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def}); | |||||
| AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); | |||||
| ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED); | |||||
| ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); | |||||
| ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||||
| EXPECT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||||
| EXPECT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include "graph/load/model_manager/model_utils.h" | #include "graph/load/model_manager/model_utils.h" | ||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||||
| #include "runtime/rt.h" | #include "runtime/rt.h" | ||||
| #define protected public | #define protected public | ||||
| @@ -30,6 +31,7 @@ | |||||
| #include "external/register/op_tiling_registry.h" | #include "external/register/op_tiling_registry.h" | ||||
| #undef private | #undef private | ||||
| #undef protected | #undef protected | ||||
| #include "tests/depends/runtime/src/runtime_stub.h" | |||||
| using namespace std; | using namespace std; | ||||
| using namespace testing; | using namespace testing; | ||||
| @@ -38,9 +40,13 @@ using namespace optiling; | |||||
| class UtestSingleOpTask : public testing::Test { | class UtestSingleOpTask : public testing::Test { | ||||
| protected: | protected: | ||||
| void SetUp() {} | |||||
| void SetUp() { | |||||
| RTS_STUB_SETUP(); | |||||
| } | |||||
| void TearDown() {} | |||||
| void TearDown() { | |||||
| RTS_STUB_TEARDOWN(); | |||||
| } | |||||
| }; | }; | ||||
| TEST_F(UtestSingleOpTask, test_build_kernel_task) { | TEST_F(UtestSingleOpTask, test_build_kernel_task) { | ||||
| @@ -237,3 +243,124 @@ TEST_F(UtestSingleOpTask, test_aicpu_task_update_io_addr) { | |||||
| ASSERT_EQ(ret, PARAM_INVALID); | ASSERT_EQ(ret, PARAM_INVALID); | ||||
| } | } | ||||
| } | } | ||||
| TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_01) { | |||||
| int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||||
| vector<char> aicpu_ext_info(len, 0); | |||||
| char *buf = aicpu_ext_info.data(); | |||||
| int offset = 0; | |||||
| hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||||
| ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||||
| ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||||
| offset += sizeof(hybrid::AicpuExtInfo); | |||||
| hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||||
| async_wait_info->waitType = 0; | |||||
| async_wait_info->waitId = 0; | |||||
| async_wait_info->timeOut = 0; | |||||
| async_wait_info->reserved = 0; | |||||
| domi::KernelDef kernel_def; | |||||
| kernel_def.set_kernel_ext_info(buf, len); | |||||
| kernel_def.set_kernel_ext_info_size(len); | |||||
| auto op_desc = make_shared<OpDesc>("deque", "Deque"); | |||||
| ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||||
| AiCpuCCTask aicpu_task; | |||||
| aicpu_task.SetOpDesc(op_desc); | |||||
| rtStream_t stream; | |||||
| ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE); | |||||
| ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||||
| ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_02) { | |||||
| int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||||
| vector<char> aicpu_ext_info(len, 0); | |||||
| char *buf = aicpu_ext_info.data(); | |||||
| int offset = 0; | |||||
| hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||||
| ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||||
| ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||||
| offset += sizeof(hybrid::AicpuExtInfo); | |||||
| hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||||
| async_wait_info->waitType = 0; | |||||
| async_wait_info->waitId = 0; | |||||
| async_wait_info->timeOut = 0; | |||||
| async_wait_info->reserved = 0; | |||||
| domi::KernelDef kernel_def; | |||||
| kernel_def.set_kernel_ext_info(buf, len); | |||||
| kernel_def.set_kernel_ext_info_size(len); | |||||
| auto op_desc = make_shared<OpDesc>("deque", "Deque"); | |||||
| ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||||
| AiCpuTask aicpu_task; | |||||
| aicpu_task.SetOpDesc(op_desc); | |||||
| rtStream_t stream; | |||||
| ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE); | |||||
| ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||||
| ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_fail) { | |||||
| int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||||
| vector<char> aicpu_ext_info(len, 0); | |||||
| char *buf = aicpu_ext_info.data(); | |||||
| int offset = 0; | |||||
| hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||||
| ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||||
| ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||||
| offset += sizeof(hybrid::AicpuExtInfo); | |||||
| hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||||
| async_wait_info->waitType = 0; | |||||
| async_wait_info->waitId = 0; | |||||
| async_wait_info->timeOut = 0; | |||||
| async_wait_info->reserved = 0; | |||||
| domi::KernelDef kernel_def; | |||||
| kernel_def.set_kernel_ext_info(buf, len); | |||||
| kernel_def.set_kernel_ext_info_size(len); | |||||
| auto op_desc = make_shared<OpDesc>("deque", "Deque"); | |||||
| ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||||
| AiCpuTask aicpu_task; | |||||
| aicpu_task.SetOpDesc(op_desc); | |||||
| rtStream_t stream; | |||||
| ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE); | |||||
| ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||||
| ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); | |||||
| ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED); | |||||
| ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED); | |||||
| ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); | |||||
| ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||||
| EXPECT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||||
| RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||||
| EXPECT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | |||||
| } | |||||
| @@ -62,6 +62,7 @@ enum FWKTaskExtInfoType { | |||||
| FWK_ADPT_EXT_SESSION_INFO, | FWK_ADPT_EXT_SESSION_INFO, | ||||
| FWK_ADPT_EXT_BITMAP, | FWK_ADPT_EXT_BITMAP, | ||||
| FWK_ADPT_EXT_TOPIC_TYPE, | FWK_ADPT_EXT_TOPIC_TYPE, | ||||
| FWK_ADPT_EXT_ASYNCWAIT, | |||||
| FWK_ADPT_EXT_INVALID | FWK_ADPT_EXT_INVALID | ||||
| }; | }; | ||||
| @@ -80,6 +81,12 @@ enum FWKExtUpdateAddrType { | |||||
| FWK_ADPT_UPDATE_INPUT_OUTPUT | FWK_ADPT_UPDATE_INPUT_OUTPUT | ||||
| }; | }; | ||||
| enum FWKExtWaitType { | |||||
| FWK_ADPT_WAIT_TYPE_NULL = 0, | |||||
| FWK_ADPT_WAIT_TYPE_EVENT, | |||||
| FWK_ADPT_WAIT_TYPE_INVALID | |||||
| }; | |||||
| #pragma pack(push, 1) | #pragma pack(push, 1) | ||||
| // API Parameter Structure | // API Parameter Structure | ||||
| struct StrFWKKernel { | struct StrFWKKernel { | ||||
| @@ -133,6 +140,15 @@ struct ResultSummary { | |||||
| uint64_t raw_data_size; // size of raw data | uint64_t raw_data_size; // size of raw data | ||||
| }; | }; | ||||
| #pragma pack(pop) | #pragma pack(pop) | ||||
| #pragma pack(push, 1) | |||||
| struct AsyncWait { | |||||
| uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait | |||||
| uint32_t waitId; // wait id, GE refresh | |||||
| uint32_t timeOut; // reserved | |||||
| uint64_t reserved; | |||||
| }; | |||||
| #pragma pack(pop) | |||||
| } // end namespace FWKAdapter | } // end namespace FWKAdapter | ||||
| } // namespace aicpu | } // namespace aicpu | ||||
| @@ -52,6 +52,14 @@ typedef enum tagRtAicpuScheType { | |||||
| SCHEDULE_HARDWARE, /* HWTS Schedule */ | SCHEDULE_HARDWARE, /* HWTS Schedule */ | ||||
| } rtAicpuScheType; | } rtAicpuScheType; | ||||
| typedef enum tagRtDeviceCapabilityType { | |||||
| RT_SCHEDULE_SOFTWARE = 0, // SoftWare Schedule | |||||
| RT_SCHEDULE_SOFTWARE_OPT, | |||||
| RT_SCHEDULE_HARDWARE, // HWTS Schedule | |||||
| RT_AICPU_BLOCKING_OP_NOT_SUPPORT, | |||||
| RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation | |||||
| } rtDeviceCapabilityType; | |||||
| typedef enum tagRtVersion { | typedef enum tagRtVersion { | ||||
| VER_BEGIN = 0, | VER_BEGIN = 0, | ||||
| VER_NA = VER_BEGIN, | VER_NA = VER_BEGIN, | ||||
| @@ -65,6 +65,7 @@ typedef enum tagRtFeatureType { | |||||
| typedef enum tagRtDeviceFeatureType { | typedef enum tagRtDeviceFeatureType { | ||||
| FEATURE_TYPE_SCHE, | FEATURE_TYPE_SCHE, | ||||
| FEATURE_TYPE_BLOCKING_OPERATOR, | |||||
| FEATURE_TYPE_END, | FEATURE_TYPE_END, | ||||
| } rtDeviceFeatureType_t; | } rtDeviceFeatureType_t; | ||||
| @@ -78,6 +79,17 @@ typedef enum tagMemoryInfo { | |||||
| MEMORY_INFO_RSV | MEMORY_INFO_RSV | ||||
| } rtMemoryInfo_t; | } rtMemoryInfo_t; | ||||
| typedef enum tagRtDeviceModuleType { | |||||
| RT_MODULE_TYPE_SYSTEM = 0, | |||||
| RT_MODULE_TYPE_AICPU, | |||||
| RT_MODULE_TYPE_CCPU, | |||||
| RT_MODULE_TYPE_DCPU, | |||||
| RT_MODULE_TYPE_AICORE, | |||||
| RT_MODULE_TYPE_TSCPU, | |||||
| RT_MODULE_TYPE_PCIE, | |||||
| RT_MODULE_TYPE_VECTOR_CORE | |||||
| } tagRtDeviceModuleType_t; | |||||
| /** | /** | ||||
| * @ingroup dvrt_dev | * @ingroup dvrt_dev | ||||
| * @brief get total device number. | * @brief get total device number. | ||||
| @@ -178,7 +178,7 @@ typedef struct tagFftsTaskInfo { | |||||
| } rtFftsTaskInfo_t; | } rtFftsTaskInfo_t; | ||||
| RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); | RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); | ||||
| RTS_API rtError_t rtGetC2cCtrlAddr(uint64_t *addr, uint32_t *len); | |||||
| #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | ||||
| } | } | ||||
| #endif | #endif | ||||