| @@ -0,0 +1,184 @@ | |||
| /* | |||
| * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||
| * Description: ffts interface | |||
| */ | |||
| #ifndef __CCE_RUNTIME_FFTS_H | |||
| #define __CCE_RUNTIME_FFTS_H | |||
| #include "base.h" | |||
| #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
| extern "C" { | |||
| #endif | |||
| #define RT_FFTS_MAX_SUB_TASK_NUM 32U | |||
| #define RT_FFTS_MAX_TICKET_CACHE_NUM 64U | |||
| #define RT_FFTS_MAX_MANUAL_THREAD_NUM 16U | |||
| #define RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK 8U | |||
| #define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN 32U | |||
| typedef enum tagFftsType { | |||
| RT_FFTS_TYPE_AUTO_THREAD = 2, // ffts auto thread mode, same as ffts define | |||
| RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define | |||
| } rtFftsType_t; | |||
| typedef enum tagFftsSubTaskType { | |||
| RT_FFTS_SUB_TASK_TYPE_AIC = 0, | |||
| RT_FFTS_SUB_TASK_TYPE_AIV = 1, | |||
| RT_FFTS_SUB_TASK_TYPE_NOP = 2, | |||
| RT_FFTS_SUB_TASK_TYPE_NOTIFY_WAIT = 3, | |||
| RT_FFTS_SUB_TASK_TYPE_NOTIFY_RECORD = 4, | |||
| RT_FFTS_SUB_TASK_TYPE_WRITE_VALUE = 5, | |||
| RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6, | |||
| RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7, | |||
| RT_FFTS_SUB_TASK_TYPE_SDMA = 8, | |||
| RT_FFTS_SUB_TASK_TYPE_RESERVED, | |||
| } rtFftsSubTaskType_t; | |||
| typedef struct tagManualThreadDmuInfo { | |||
| uint64_t dataAddr; // device mem | |||
| uint16_t numOuter; | |||
| uint16_t numInner; | |||
| uint32_t strideOuter; | |||
| uint32_t lenInner; | |||
| uint32_t strideInner; | |||
| } rtManualThreadDmuInfo_t; | |||
| typedef struct tagManualThreadDependency { | |||
| uint8_t dependency[RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN]; | |||
| } rtManualThreadDependency_t; | |||
| typedef struct tagManualThreadAicAivInfo { | |||
| uint64_t taskParamAddr; // device mem | |||
| uint16_t taskParamOffset; | |||
| // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 | |||
| // when satMode=0 and FP16 computation with none INF inputs overflows/underflows, | |||
| // results will be saturated to +/-MAX of FP16 | |||
| uint8_t satMode; | |||
| uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved | |||
| uint8_t iCachePrefetchCnt; // units is 2K | |||
| uint8_t prefetchEnableBitmap; // 8 bit bitmap 1 0 1 0 | |||
| uint8_t prefetchOnceBitmap; // 8 bit bitmap 1 0 1 0 | |||
| uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts | |||
| // num: thread0_prefetch_dmu_descriptor_index – prefetch_once_dmu_descriptor_index | |||
| uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim | |||
| uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM]; | |||
| const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM]; | |||
| rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim-1] | |||
| rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; | |||
| } rtManualThreadAicAivInfo_t; | |||
| typedef struct tagAutoThreadPrefetch { | |||
| uint64_t dataAddr; // device mem | |||
| uint32_t dataAddrOffset; | |||
| uint32_t nonTailDataLen; | |||
| uint32_t tailDataLen; | |||
| } rtAutoThreadPrefetch_t; | |||
| typedef struct tagAutoThreadAicAivInfo { | |||
| uint64_t taskParamAddr; // device mem | |||
| uint16_t taskParamOffset; | |||
| // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 | |||
| // when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to +/-MAX of FP16 | |||
| uint8_t satMode; | |||
| uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved | |||
| uint8_t iCachePrefetchCnt; // units is 2K | |||
| uint8_t prefetchEnableBitmap; // 8 bit bitmap | |||
| uint8_t prefetchOnceBitmap; // 8 bit bitmap | |||
| uint16_t tailBlkDim; | |||
| uint16_t nonTailBlkDim; | |||
| const char *nonTailTaskFuncStub; | |||
| const char *tailTaskFuncStub; | |||
| // for prefetch, valid num is prefetchEnableBitmap bit count. | |||
| // if prefetchEnableBitmap='00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid | |||
| rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; | |||
| } rtAutoThreadAicAivInfo_t; | |||
| typedef struct tagAutoThreadCacheInfo { | |||
| uint64_t dataAddr; // device mem | |||
| uint32_t dataAddrOffset; | |||
| uint32_t nonTailDataLen; | |||
| uint32_t tailDataLen; | |||
| uint16_t ticketCacheRefCnt; | |||
| } rtAutoThreadCacheInfo_t; | |||
| typedef struct tagManualThreadCacheInfo { | |||
| rtManualThreadDmuInfo_t *dmuList; // 0-64k | |||
| uint16_t dmuNum; | |||
| uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; | |||
| uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM]; | |||
| } rtManualThreadCacheInfo_t; | |||
| typedef enum tagCacheOp { | |||
| RT_CACHE_OP_NONE = 0, | |||
| RT_CACHE_OP_FLUSH = 1, | |||
| RT_CACHE_OP_INVALIDATE = 2, | |||
| RT_CACHE_OP_WRITE_BACK = 3, | |||
| } rtCacheOp_t; | |||
| typedef struct tagTicketCache { | |||
| rtCacheOp_t cacheOption; | |||
| uint8_t ticketCacheWindow; | |||
| union { | |||
| rtAutoThreadCacheInfo_t autoThreadCache; | |||
| rtManualThreadCacheInfo_t manualThreadCache; | |||
| } custom; | |||
| } rtTicketCache_t; | |||
| typedef struct tagManualThreadNopInfo { | |||
| // depend srcTickCacheVldBitmap in rtFftsSubTaskInfo_t | |||
| rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; | |||
| } rtManualThreadNopInfo_t; | |||
| typedef struct tagFftsSubTaskInfo { | |||
| rtFftsSubTaskType_t subTaskType; | |||
| uint16_t threadDim; | |||
| uint8_t dstTickCacheVldBitmap; | |||
| uint8_t srcTickCacheVldBitmap; | |||
| uint8_t srcDataOutOfSubGraphBitmap; | |||
| uint8_t dstTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; | |||
| uint8_t srcTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; | |||
| union { | |||
| rtAutoThreadAicAivInfo_t autoThreadAicAiv; | |||
| rtManualThreadAicAivInfo_t manualThreadAicAiv; | |||
| rtManualThreadNopInfo_t manualThreadNop; | |||
| } custom; | |||
| } rtFftsSubTaskInfo_t; | |||
| typedef struct tagFftsDescInfo { | |||
| uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder | |||
| uint8_t di; // discard invalidate | |||
| uint8_t dw; // discard write back | |||
| uint8_t df; // discard flush | |||
| uint8_t dataSplitUnit; // split source or ticket cache by 2^dataSplitUnit MB | |||
| uint8_t prefetchOstNum; | |||
| uint8_t cacheMaintainOstNum; | |||
| uint8_t aicPrefetchUpper; | |||
| uint8_t aicPrefetchLower; | |||
| uint8_t aivPrefetchUpper; | |||
| uint8_t aivPrefetchLower; | |||
| } rtFftsDescInfo_t; | |||
| typedef struct tagFftsTaskInfo { | |||
| rtFftsType_t fftsType; | |||
| uint16_t subTaskNum; | |||
| uint16_t tickCacheNum; | |||
| rtFftsDescInfo_t fftsDesc; | |||
| // sub task desc, real num is subTaskNum | |||
| rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM]; | |||
| // ticket cache, real number is tickCacheNum. | |||
| rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM]; | |||
| } rtFftsTaskInfo_t; | |||
| RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); | |||
| #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
| } | |||
| #endif | |||
| #endif // __CCE_RUNTIME_FFTS_H | |||