| @@ -0,0 +1,184 @@ | |||||
| /* | |||||
| * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||||
| * Description: ffts interface | |||||
| */ | |||||
| #ifndef __CCE_RUNTIME_FFTS_H | |||||
| #define __CCE_RUNTIME_FFTS_H | |||||
| #include "base.h" | |||||
| #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
| extern "C" { | |||||
| #endif | |||||
| #define RT_FFTS_MAX_SUB_TASK_NUM 32U | |||||
| #define RT_FFTS_MAX_TICKET_CACHE_NUM 64U | |||||
| #define RT_FFTS_MAX_MANUAL_THREAD_NUM 16U | |||||
| #define RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK 8U | |||||
| #define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN 32U | |||||
| typedef enum tagFftsType { | |||||
| RT_FFTS_TYPE_AUTO_THREAD = 2, // ffts auto thread mode, same as ffts define | |||||
| RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define | |||||
| } rtFftsType_t; | |||||
| typedef enum tagFftsSubTaskType { | |||||
| RT_FFTS_SUB_TASK_TYPE_AIC = 0, | |||||
| RT_FFTS_SUB_TASK_TYPE_AIV = 1, | |||||
| RT_FFTS_SUB_TASK_TYPE_NOP = 2, | |||||
| RT_FFTS_SUB_TASK_TYPE_NOTIFY_WAIT = 3, | |||||
| RT_FFTS_SUB_TASK_TYPE_NOTIFY_RECORD = 4, | |||||
| RT_FFTS_SUB_TASK_TYPE_WRITE_VALUE = 5, | |||||
| RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6, | |||||
| RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7, | |||||
| RT_FFTS_SUB_TASK_TYPE_SDMA = 8, | |||||
| RT_FFTS_SUB_TASK_TYPE_RESERVED, | |||||
| } rtFftsSubTaskType_t; | |||||
| typedef struct tagManualThreadDmuInfo { | |||||
| uint64_t dataAddr; // device mem | |||||
| uint16_t numOuter; | |||||
| uint16_t numInner; | |||||
| uint32_t strideOuter; | |||||
| uint32_t lenInner; | |||||
| uint32_t strideInner; | |||||
| } rtManualThreadDmuInfo_t; | |||||
| typedef struct tagManualThreadDependency { | |||||
| uint8_t dependency[RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN]; | |||||
| } rtManualThreadDependency_t; | |||||
| typedef struct tagManualThreadAicAivInfo { | |||||
| uint64_t taskParamAddr; // device mem | |||||
| uint16_t taskParamOffset; | |||||
| // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 | |||||
| // when satMode=0 and FP16 computation with none INF inputs overflows/underflows, | |||||
| // results will be saturated to +/-MAX of FP16 | |||||
| uint8_t satMode; | |||||
| uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved | |||||
| uint8_t iCachePrefetchCnt; // units is 2K | |||||
| uint8_t prefetchEnableBitmap; // 8 bit bitmap 1 0 1 0 | |||||
| uint8_t prefetchOnceBitmap; // 8 bit bitmap 1 0 1 0 | |||||
| uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts | |||||
| // num: thread0_prefetch_dmu_descriptor_index – prefetch_once_dmu_descriptor_index | |||||
| uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim | |||||
| uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM]; | |||||
| const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM]; | |||||
| rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim-1] | |||||
| rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; | |||||
| } rtManualThreadAicAivInfo_t; | |||||
| typedef struct tagAutoThreadPrefetch { | |||||
| uint64_t dataAddr; // device mem | |||||
| uint32_t dataAddrOffset; | |||||
| uint32_t nonTailDataLen; | |||||
| uint32_t tailDataLen; | |||||
| } rtAutoThreadPrefetch_t; | |||||
| typedef struct tagAutoThreadAicAivInfo { | |||||
| uint64_t taskParamAddr; // device mem | |||||
| uint16_t taskParamOffset; | |||||
| // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 | |||||
| // when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to +/-MAX of FP16 | |||||
| uint8_t satMode; | |||||
| uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved | |||||
| uint8_t iCachePrefetchCnt; // units is 2K | |||||
| uint8_t prefetchEnableBitmap; // 8 bit bitmap | |||||
| uint8_t prefetchOnceBitmap; // 8 bit bitmap | |||||
| uint16_t tailBlkDim; | |||||
| uint16_t nonTailBlkDim; | |||||
| const char *nonTailTaskFuncStub; | |||||
| const char *tailTaskFuncStub; | |||||
| // for prefetch, valid num is prefetchEnableBitmap bit count. | |||||
| // if prefetchEnableBitmap='00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid | |||||
| rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; | |||||
| } rtAutoThreadAicAivInfo_t; | |||||
| typedef struct tagAutoThreadCacheInfo { | |||||
| uint64_t dataAddr; // device mem | |||||
| uint32_t dataAddrOffset; | |||||
| uint32_t nonTailDataLen; | |||||
| uint32_t tailDataLen; | |||||
| uint16_t ticketCacheRefCnt; | |||||
| } rtAutoThreadCacheInfo_t; | |||||
| typedef struct tagManualThreadCacheInfo { | |||||
| rtManualThreadDmuInfo_t *dmuList; // 0-64k | |||||
| uint16_t dmuNum; | |||||
| uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; | |||||
| uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM]; | |||||
| } rtManualThreadCacheInfo_t; | |||||
| typedef enum tagCacheOp { | |||||
| RT_CACHE_OP_NONE = 0, | |||||
| RT_CACHE_OP_FLUSH = 1, | |||||
| RT_CACHE_OP_INVALIDATE = 2, | |||||
| RT_CACHE_OP_WRITE_BACK = 3, | |||||
| } rtCacheOp_t; | |||||
| typedef struct tagTicketCache { | |||||
| rtCacheOp_t cacheOption; | |||||
| uint8_t ticketCacheWindow; | |||||
| union { | |||||
| rtAutoThreadCacheInfo_t autoThreadCache; | |||||
| rtManualThreadCacheInfo_t manualThreadCache; | |||||
| } custom; | |||||
| } rtTicketCache_t; | |||||
| typedef struct tagManualThreadNopInfo { | |||||
| // depend srcTickCacheVldBitmap in rtFftsSubTaskInfo_t | |||||
| rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; | |||||
| } rtManualThreadNopInfo_t; | |||||
| typedef struct tagFftsSubTaskInfo { | |||||
| rtFftsSubTaskType_t subTaskType; | |||||
| uint16_t threadDim; | |||||
| uint8_t dstTickCacheVldBitmap; | |||||
| uint8_t srcTickCacheVldBitmap; | |||||
| uint8_t srcDataOutOfSubGraphBitmap; | |||||
| uint8_t dstTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; | |||||
| uint8_t srcTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; | |||||
| union { | |||||
| rtAutoThreadAicAivInfo_t autoThreadAicAiv; | |||||
| rtManualThreadAicAivInfo_t manualThreadAicAiv; | |||||
| rtManualThreadNopInfo_t manualThreadNop; | |||||
| } custom; | |||||
| } rtFftsSubTaskInfo_t; | |||||
| typedef struct tagFftsDescInfo { | |||||
| uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder | |||||
| uint8_t di; // discard invalidate | |||||
| uint8_t dw; // discard write back | |||||
| uint8_t df; // discard flush | |||||
| uint8_t dataSplitUnit; // split source or ticket cache by 2^dataSplitUnit MB | |||||
| uint8_t prefetchOstNum; | |||||
| uint8_t cacheMaintainOstNum; | |||||
| uint8_t aicPrefetchUpper; | |||||
| uint8_t aicPrefetchLower; | |||||
| uint8_t aivPrefetchUpper; | |||||
| uint8_t aivPrefetchLower; | |||||
| } rtFftsDescInfo_t; | |||||
| typedef struct tagFftsTaskInfo { | |||||
| rtFftsType_t fftsType; | |||||
| uint16_t subTaskNum; | |||||
| uint16_t tickCacheNum; | |||||
| rtFftsDescInfo_t fftsDesc; | |||||
| // sub task desc, real num is subTaskNum | |||||
| rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM]; | |||||
| // ticket cache, real number is tickCacheNum. | |||||
| rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM]; | |||||
| } rtFftsTaskInfo_t; | |||||
| RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); | |||||
| #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
| } | |||||
| #endif | |||||
| #endif // __CCE_RUNTIME_FFTS_H | |||||