|
- /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #ifndef DNN_OP_H__
- #define DNN_OP_H__
-
- #include "cce/blas_struct.h"
- #include "cce/cce.h"
- #include "cce/customize.h"
-
- namespace cce {
-
- /**
- * @ingroup dnn
- * @brief create descriptor of parameters for exponential function
- * @param [in] point to descriptor of parameters for exponential function
- * @return ccStatus_t
- */
- ccStatus_t ccCreateExpDescriptor(ccExpDescriptor_t *expDesc);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of parameters for logarithmic function
- * @param [in] point to descriptor of parameters for logarithmic function
- * @return ccStatus_t
- */
-
- ccStatus_t ccCreateLogDescriptor(ccLogDescriptor_t *logDesc);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of parameters for pow function
- * @param [in] point to descriptor of parameters for pow function
- * @return ccStatus_t
- */
-
- ccStatus_t ccCreatePowDescriptor(ccPowDescriptor_t *powDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of parameters for exponential function
- * @param [in] point to descriptor of parameters for exponential function
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyExpDescriptor(ccExpDescriptor_t *expDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of parameters for logarithmic function
- * @param [in] point to descriptor of parameters for exponential function
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyLogDescriptor(ccLogDescriptor_t *logDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of parameters for pow function
- * @param [in] point to descriptor of parameters for pow function
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyPowDescriptor(ccPowDescriptor_t *powDesc);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of parameters for NonMaxSuppress function
- * @param [in] point to descriptor of parameters for NonMaxSuppress function
- * @return ccStatus_t
- */
- ccStatus_t ccCreateNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t *nonMaxSuppressionDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of parameters for NonMaxSuppress function
- * @param [in] point to descriptor of parameters for NonMaxSuppress function
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t *nonMaxSuppressionDesc);
-
- ccStatus_t ccTransTensorIncertPads(const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
- void *y, uint32_t ySizeInBytes, uint32_t boxTypeNum, bool interweave,
- bool background, uint32_t boxTypeNumMax = 0, bool isScaleVec = false);
-
- ccStatus_t ccTransTensorIncertPadsInt32(const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
- uint32_t boxTypeNum, bool interweave, bool background);
-
- ccStatus_t ccTransMskrcnnBbox(const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
- void *y, uint32_t ySizeInBytes, uint32_t boxTypeNum);
-
- ccStatus_t ccSetTensorDescriptorQuantizeParam(ccTensorDescriptor_t tensorDesc,
- const ccVecQuantizePara_t *vecQuantizePara);
-
- ccStatus_t ccGetTensorDescriptorQuantizeParam(const ccTensorDescriptor_t tensorDesc,
- ccVecQuantizePara_t *vecQuantizePara);
-
- /**
- * @ingroup dnn
- * @brief init tensor to 4d filter
- * @param [in|out] filterDesc descriptor of filter
- * @param [in] format format of filter
- * @param [in] dataType data type in device
- * @param [in] k number of output feature maps
- * @param [in] c number of input feature maps
- * @param [in] h height of filter
- * @param [in] w width of filter
- * @return ccStatus_t
- */
- ccStatus_t ccSetFilter4dDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
- int32_t k, int32_t c, int32_t h, int32_t w);
-
- ccStatus_t ccSetFilter6dDescriptor(ccTensorDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
- int32_t c1, int32_t h, int32_t w, int32_t n, int32_t co, int32_t c0);
- /**
- * @ingroup dnn
- * @brief init tensor to Fractal filter
- * @param [in|out] filterDesc descriptor of filter
- * @param [in] format format of filter
- * @param [in] dataType data type in device
- * @param [in] k number of output feature maps
- * @param [in] c number of input feature maps
- * @param [in] h height of filter
- * @param [in] w width of filter
- * @return ccStatus_t
- */
- ccStatus_t ccSetFilterFractalDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
- int32_t k, int32_t c, int32_t h, int32_t w);
-
- /**
- * @ingroup dnn
- * @brief init tensor to Fractal filter
- * @param [in|out] filterDesc descriptor of filter
- * @param [in] format format of filter
- * @param [in] dataType data type in device
- * @param [in] k number of output feature maps
- * @param [in] c number of input feature maps
- * @param [in] h height of filter
- * @param [in] w width of filter
- * @return ccStatus_t
- */
- ccStatus_t ccSetInt8Filter4dDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
- int32_t k, int32_t c, int32_t h, int32_t w, ccDataType_t outputDataType);
-
- /**
- * @ingroup dnn
- * @brief read 4d filter
- * @param [in] filterDesc descriptor of filter
- * @param [in|out] format point to format of filter
- * @param [in|out] dataType point to data type in device
- * @param [in|out] k point to number of output feature maps
- * @param [in|out] c point to number of input feature maps
- * @param [in|out] h point to height of filter
- * @param [in|out] w point to width of filter
- * @return ccStatus_t
- */
- ccStatus_t ccGetFilterFractalDescriptor(const ccFilterDescriptor_t filterDesc, ccTensorFormat_t *format,
- ccDataType_t *dataType, int32_t *k, int32_t *c, int32_t *h, int32_t *w);
-
- /**
- * @ingroup dnn
- * @brief get data size of 4d filter
- * @param [in] filterDesc descriptor of filter
- * @param [in|out] size point to data size
- * @return ccStatus_t
- */
- ccStatus_t ccGetDepthWiseConvFilterSizeInBytes(const ccFilterDescriptor_t filterDesc, int32_t groupNum, uint32_t *size);
-
- /**
- * @ingroup dnn
- * @brief trans group conv filter to fractal format
- * @param [in] filterSrcInfo descriptor of input filter
- * @param [in] filterSrc input data pointer
- * @param [in] filterDstInfo descriptor of output filter
- * @param [in|out] filterDst output data pointer
- * @param [in] group group size
- * @return ccStatus_t
- */
- ccStatus_t ccTransGroupConvFilter(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
- ccFilterDescriptor_t filterDstInfo, void *filterDst, uint32_t group,
- uint32_t dstSize);
-
- /**
- * @ingroup dnn
- * @brief trans conv filter With BoxTypeNuM to fractal format
- * @param [in] filterSrcInfo descriptor of input filter
- * @param [in] filterSrc input data pointer
- * @param [in] filterDstInfo descriptor of output filter
- * @param [in|out] filterDst output data pointer
- * @param [in] ySizeInBytes the malloc memory size
- * @param [in] boxTypeNum the num of boxType
- * @param [in] interweave whether the axis interweave
- * @return ccStatus_t
- */
- ccStatus_t ccTransFilterWithBoxTypeNum(const ccFilterDescriptor_t xDesc, const void *x,
- const ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
- uint32_t boxTypeNum, bool interweave, uint32_t boxTypeNumMax = 0);
- /**
- * @ingroup dnn
- * @brief trans conv filter With BoxTypeNuM to fractal format
- * @param [in] filterSrcInfo descriptor of input filter
- * @param [in] filterSrc input data pointer
- * @param [in] filterDstInfo descriptor of output filter
- * @param [in|out] filterDst output data pointer
- * @param [in] ySizeInBytes the malloc memory size
- * @param [in] boxTypeNum the num of boxType
- * @param [in] interweave whether the axis interweave
- * @param [in] outputDataType output DataType
- * @return ccStatus_t
- */
- ccStatus_t ccTransFilterInt8WithBoxTypeNum(const ccFilterDescriptor_t wDesc, const void *x,
- const ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
- uint32_t boxTypeNum, bool interweave, ccDataType_t outputDataType);
-
- /**
- * @ingroup dnn
- * @brief trans depthwise conv filter to fractal format
- * @param [in] wDesc descriptor of input filter
- * @param [in] w input data pointer
- * @param [in] groupNum groupNum of conv
- * @param [in]..yDesc descriptor of output filter
- * @param [in|out] y output data pointer
- * @param [in] ySizeInBytes the malloc memory size
- * @return ccStatus_t
- */
-
- ccStatus_t transDepthWiseConvFilterNCHWToFractalZ(const ccFilterDescriptor_t wDesc, const void *w, int32_t groupNum,
- ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief trans depthwise conv filter to fractal format
- * @param [in] wDesc descriptor of input filter
- * @param [in] w input data pointer
- * @param [in] groupNum groupNum of conv
- * @param [in]..yDesc descriptor of output filter
- * @param [in|out] y output data pointer
- * @param [in] ySizeInBytes the malloc memory size
- * @return ccStatus_t
- */
- ccStatus_t transDepthWiseConvFilterInt8NCHWToFractalZ(const ccFilterDescriptor_t wDesc, const void *w, int32_t groupNum,
- ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief trans depthwise conv filter to fractal format, input format CHWN
- * @param [in] wDesc descriptor of input filter
- * @param [in]..yDesc descriptor of output filter
- * @param [in] ySizeInBytes the malloc memory size
- * @param [in] w input data pointer
- * @param [in|out] y output data pointer
- * @return ccStatus_t
- */
- ccStatus_t transDepthWiseConvFilterCHWNToFractalZ(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
- uint32_t group, ccFilterDescriptor_t filterDstInfo, void *filterDst,
- uint32_t destSize);
-
- /**
- * @ingroup dnn
- * @Check if it is surpported by HighPerformance depthwise
- * @param [in] inputN,C,H,W input param
- * @param [in] filterN,C,H,W
- * @param [in] dilationH,W dilation param
- * @param [in] padHHead,padHtail,padWHead,padWTail pad param
- * @param [in] strideH,W stride param
- * @param [in] groupNum Conv groupNum
- * @param [in|out] isHighPerformance isHighPerformance flag
- * @return ccStatus_t
- */
- ccStatus_t ccIsDepthwiseHighPerformance(int32_t inputN, int32_t inputC, int32_t inputH, int32_t inputW, int32_t filterN,
- int32_t filterC, int32_t filterH, int32_t filterW, int32_t dilationH,
- int32_t dilationW, int32_t padHHead, int32_t padHTail, int32_t padWHead,
- int32_t padWTail, int32_t strideH, int32_t strideW, int32_t groupNum,
- bool &isHighPerformance, bool isquant = false,
- ccDataType_t inputDataType = CC_DATA_HALF,
- ccDataType_t outputDataType = CC_DATA_HALF);
-
- /**
- * @ingroup dnn
- * @brief trans depthwise conv filter to fractal format, input format CHWN
- * @param [in] wDesc descriptor of input filter
- * @param [in]..yDesc descriptor of output filter
- * @param [in] ySizeInBytes the malloc memory size
- * @param [in] w input data pointer
- * @param [in|out] y output data pointer
- * @return ccStatus_t
- */
- ccStatus_t transDepthWiseConvFilterCHWNToFractalZ(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
- uint32_t group, ccFilterDescriptor_t filterDstInfo, void *filterDst,
- uint32_t destSize);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of fullconnection operator
- * @param [in|out] fcDesc point to descriptor of fullconnection operator
- * @return ccStatus_t
- */
- ccStatus_t ccCreateFullConnectionDescriptor(ccFullConnectionDescriptor_t *fcDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of fullconnection operator
- * @param [in] *fcDesc descriptor of fullconnection operator
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyFullConnectionDescriptor(ccFullConnectionDescriptor_t *fcDesc);
-
- /**
- * @ingroup dnn
- * @brief init conv descriptor to 2d conv, use for beforeHasPad
- * @param [in|out] convDesc descriptor of convolution operator
- * @param [in] beforepadHHead before padding in height head
- * @param [in] beforepadHTail before padding in height tail
- * @param [in] beforepadWHead before padding in width head
- * @param [in] beforepadWTail before padding in width tail
- * @return ccStatus_t
- */
- ccStatus_t ccSetConvolution2dDescriptorForPad(ccConvolutionDescriptor_t convDesc, int32_t beforepadHHead,
- int32_t beforepadHTail, int32_t beforepadWHead, int32_t beforepadWTail);
-
- /**
- * @ingroup dnn
- * @brief init conv descriptor to 2d conv, use for concat batch size
- * @param [in|out] convDesc descriptor of convolution operator
- * @param [in] concatBatchSize concat batch size
- * @return ccStatus_t
- */
- ccStatus_t ccSetConvolution2dDescriptorForConcatBatchSize(ccConvolutionDescriptor_t convDesc, int64_t concatBatchSize);
-
- /**
- * @ingroup dnn
- * @brief init conv descriptor to 2d conv
- * @param [in|out] convDesc descriptor of convolution operator
- * @param [in] opType operation type for append at convolution operation
- * @param [in] opDesc operation descritpor for the opType
- * @return ccStatus_t
- */
- ccStatus_t ccConvolution2dAppendOp(ccConvolutionDescriptor_t convDesc, ccOpType_t opType, const void *opDesc);
-
- /**
- * @ingroup dnn
- * @brief read 2d conv beforeHasPad
- * @param [in] convDesc descriptor of convolution operator
- * @param [in|out] beforepadHHead before padding in height head, default is 0
- * @param [in|out] beforepadHTail before padding in height tail, default is 0
- * @param [in|out] beforepadWHead before padding in width head, default is 0
- * @param [in|out] beforepadWTail before padding in width tail, default is 0
- */
- ccStatus_t ccGetConvolution2dDescriptorForPad(const ccConvolutionDescriptor_t convDesc, int32_t *beforepadHHead,
- int32_t *beforepadHTail, int32_t *beforepadWHead,
- int32_t *beforepadWTail);
-
- /**
- * @ingroup dnn
- * @brief read 2d conv concat batch size
- * @param [in] convDesc descriptor of convolution operator
- * @param [in|out] concatBatchSize concat batch size, default is 0
- */
- ccStatus_t ccGetConvolution2dDescriptorForConcatBatchSize(const ccConvolutionDescriptor_t convDesc,
- int64_t *concatBatchSize);
-
- /**
- * @ingroup dnn
- * @brief get the temp space size of convolution forward computation, maybe no need temp space
- * @param [in] handle cce handle
- * @param [in] convDesc descriptor of convolution operator
- * @param [in] xDesc descriptor of input tensor
- * @param [in] wDesc descriptor of filter
- * @param [in] yDesc descriptor of output tensor
- * @param [in] algo algorithm of convolution forward
- * @param [in|out] sizeInBytes temp space size need for specified algorithm
- * @return ccStatus_t
- */
- ccStatus_t ccGetConvolutionForwardWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc,
- const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
- const ccTensorDescriptor_t yDesc, ccConvolutionFwdAlgo_t algo,
- uint32_t *sizeInBytes);
- /**
- * @ingroup dnn
- * @brief get the temp space size of convolution backward computation, maybe no need temp space
- * @param [in] handle cce handle
- * @param [in] convDesc descriptor of convolution operator
- * @param [in] dyDesc descriptor of input tensor
- * @param [in] wDesc descriptor of filter
- * @param [in] dxDesc descriptor of output tensor
- * @param [in] algo algorithm of convolution forward
- * @param [in|out] sizeInBytes temp space size need for specified algorithm
- * @return ccStatus_t
- */
- ccStatus_t ccGetConvolutionBackwardDataWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc,
- const ccTensorDescriptor_t dyDesc,
- const ccFilterDescriptor_t wDesc,
- const ccTensorDescriptor_t dxDesc, ccConvolutionBwdAlgo_t algo,
- uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief get the temp space size of fc forward computation, maybe no need temp space
- * @param [in] handle cce handle
- * @param [in] fcDesc descriptor of fc operator
- * @param [in] xDesc descriptor of input tensor
- * @param [in] wDesc descriptor of filter
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] sizeInBytes temp space size need, 0 means no memeory needed
- * @return ccStatus_t
- */
- ccStatus_t ccGetFullConnectionForwardWorkspaceSize(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc,
- const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
- const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief convolution forward computation
- * @param [in] handle cce handle
- * @param [in] convDesc descriptor of convolution operator
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] wDesc descriptor of filter
- * @param [in] w filter data in device memory
- * @param [in] biasDesc descriptor of bias
- * @param [in] bias bias data in device memory
- * @param [in] algo algorithm of convolution forward
- * @param [in] workSpace temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes sizeof workspace
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccConvolutionForward(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
- const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
- ccConvolutionFwdAlgo_t algo, void *workSpace, uint32_t workSpaceSizeInBytes,
- const void *beta, const ccTensorDescriptor_t yDesc, void *y);
- /**
- * @ingroup dnn
- * @brief full alloc float and reset to 0
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] x output data in device memory
- * @param [in] beta scaling factors
- * @return ccStatus_t
- */
- ccStatus_t ccAllocFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta);
-
- /**
- * @ingroup dnn
- * @brief full get data set by op
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] x output data in device memory
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [out] y output data in device memory
- * @return ccStatus_t
- */
-
- ccStatus_t ccGetFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t yDesc, const void *y);
-
- /**
- * @ingroup dnn
- * @brief full clear register
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [out] y output data in device memory
- * @return ccStatus_t
- */
-
- ccStatus_t ccClearFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t yDesc, const void *y);
-
- #ifndef DAVINCI_LITE
- /**
- * @ingroup dnn
- * @brief convolution backward data computation
- * @param [in] handle cce handle
- * @param [in] convDesc descriptor of convolution operator
- * @param [in] alpha scaling factors
- * @param [in] dyDesc descriptor of input tensor
- * @param [in] dy input data in device memory
- * @param [in] wDesc descriptor of filter
- * @param [in] w filter data in device memory
- * @param [in] algo algorithm of convolution backward
- * @param [in] workSpace temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes sizeof workspace
- * @param [in] beta scaling factors
- * @param [in] dxDesc descriptor of output tensor
- * @param [in|out] dx output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccConvolutionBackwardData(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, const void *alpha,
- const ccTensorDescriptor_t dyDesc, const void *dy,
- const ccFilterDescriptor_t wDesc, const void *w, ccConvolutionBwdAlgo_t algo,
- void *workSpace, uint32_t workSpaceSizeInBytes, const void *beta,
- const ccTensorDescriptor_t dxDesc, void *dx);
- #endif
-
- /**
- * @ingroup dnn
- * @brief create descriptor of pooling operator
- * @param [in|out] poolingDesc point to descriptor of pooling operator
- * @return ccStatus_t
- */
- ccStatus_t ccCreatePoolingDescriptor(ccPoolingDescriptor_t *poolingDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of pooling operator
- * @param [in] *poolingDesc descriptor of pooling operator
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyPoolingDescriptor(ccPoolingDescriptor_t *poolingDesc);
-
- /**
- * @ingroup dnn
- * @brief init pooling descriptor to 2d pooling
- * @param [in|out] poolingDesc descriptor of pooling operator
- * @param [in] mode mode of pooling
- * @param [in] padMode mode of padding
- * @param [in] maxpoolingNanOpt Nan propagation mode
- * @param [in] windowH height of pooling window
- * @param [in] windowW width of pooling window
- * @param [in] padHHead zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same
- * value.
- * @param [in] padHTail zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
- * @param [in] padWHead zero padding in width head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same
- * value.
- * @param [in] padWTail zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN..
- * @param [in] strideH stride in height
- * @param [in] strideW stride in width
- * @param [in] dataMode
- * @param [in] ceilMode 0:Floor 1:Ceil
- * @return ccStatus_t
- */
- ccStatus_t ccSetPooling2dDescriptor(ccPoolingDescriptor_t poolingDesc, ccPoolingMode_t mode, ccPaddingMode_t padMode,
- ccNanPropagation_t maxpoolingNanOpt, int32_t windowH, int32_t windowW,
- int32_t padHHead, int32_t padHTail, int32_t padWHead, int32_t padWTail,
- int32_t strideH, int32_t strideW, int32_t dataMode, int32_t ceilMode,
- ccPooingFwdAlgo_t algo = CC_POOLING_FWD_ALGO_HALF);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of 2d pooling
- * @param [in] poolingDesc descriptor of pooling operator
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] n point to batch size
- * @param [in|out] c point to channels
- * @param [in|out] h point to height of feature map
- * @param [in|out] w point to width of feature map
- * @return ccStatus_t
- */
- ccStatus_t ccGetPooling2dForwardOutputDim(const ccPoolingDescriptor_t poolingDesc, const ccTensorDescriptor_t xDesc,
- int32_t *n, int32_t *c, int32_t *h, int32_t *w);
-
- /**
- * @ingroup dnn
- * @brief pooling forward computation
- * @param [in] handle cce handle
- * @param [in] poolingDesc descriptor of pooling operator
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccPoolingForward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief pooling backward computation
- * @param [in] handle cce handle
- * @param [in] poolingDesc descriptor of pooling operator
- * @param [in] alpha scaling factors
- * @param [in] beta scaling factors
- * @param [in] argMaskDesc descriptor of mask tensor
- * @param [in] argMask mask data in device memory
- * @param [in] dyDesc descriptor of input tensor
- * @param [in] dy input data in device memory
- * @param [in] dxDesc descriptor of output tensor
- * @param [in|out] dx output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccMaxPoolingBackward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
- const void *beta, const ccTensorDescriptor_t argMaskDesc, const void *argMask,
- const ccTensorDescriptor_t dyDesc, const void *dy, const ccTensorDescriptor_t dxDesc,
- void *dx);
- /**
- * @ingroup dnn
- * @brief create descriptor of activation operator
- * @param [in|out] activationDesc point to descriptor of activation operator
- * @return ccStatus_t
- */
- ccStatus_t ccCreateActivationDescriptor(ccActivationDescriptor_t *activationDesc);
-
- /**
- * @ingroup dnn
- * @brief init activation descriptor to 2d activation
- * @param [in|out] activationDesc descriptor of activation operator
- * @param [in] mode mode of activation
- * @param [in] reluNanOpt Nan propagation mode
- * @param [in] coef ceiling for clipped RELU, alpha for ELU
- * @param [in] activationPara activation parameter union
- * @return ccStatus_t
- */
- ccStatus_t ccSetActivationDescriptor(ccActivationDescriptor_t activationDesc, ccActivationMode_t mode,
- ccNanPropagation_t reluNanOpt, double coef,
- ccActivationPara_u activationPara = {{0, CC_NAN_NOT_PROPAGATE}});
-
- /**
- * @ingroup dnn
- * @brief read activation param
- * @param [in] activationDesc descriptor of activation operator
- * @param [in|out] mode point to mode of activation
- * @param [in|out] reluNanOpt point to Nan propagation mode
- * @param [in|out] coef point to coef
- * @param [in|out] activationPara point to activation parameter union
- * @return ccStatus_t
- */
- ccStatus_t ccGetActivationDescriptor(const ccActivationDescriptor_t activationDesc, ccActivationMode_t *mode,
- ccNanPropagation_t *reluNanOpt, double *coef,
- ccActivationPara_u *activationPara = NULL);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of activation operator
- * @param [in] *activationDesc descriptor of activation operator
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyActivationDescriptor(ccActivationDescriptor_t *activationDesc);
-
- /**
- * @ingroup dnn
- * @brief activation forward computation
- * @param [in] handle cce handle
- * @param [in] activationDesc descriptor of activation operator
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccActivationForward(ccHandle_t handle, const ccActivationDescriptor_t activationDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief Derives a tensor descriptor from layer data descriptor for BatchNormalization
- * @param [in|out] derivedBnDesc descriptor of mean, variance, bias, scale tensors tensor
- * @param [in] xDesc descriptor of input tensor
- * @param [in] mode mode of BatchNormalization
- * @return ccStatus_t
- */
- ccStatus_t ccDeriveBNTensorDescriptor(ccTensorDescriptor_t derivedBnDesc, const ccTensorDescriptor_t xDesc,
- ccBatchNormMode_t mode);
-
- /**
- * @ingroup dnn
- * @brief batchnorm forward computation
- * @param [in] handle cce handle
- * @param [in] mode mode of batchnorm
- * @param [in] alpha scaling factors
- * @param [in] beta scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @param [in] bnScaleBiasMeanVarDesc descriptor of scale, bias, mean, variance tensor
- * @param [in] bnScale scaling factor
- * @param [in] bnBias bias factor
- * @param [in] estimatedMean mean
- * @param [in] estimatedVariance variance
- * @param [in] epsilon epsilon
- * @return ccStatus_t
- */
- ccStatus_t ccBatchNormForwardInference(ccHandle_t handle, ccBatchNormMode_t mode, const void *alpha, const void *beta,
- const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, void *y,
- const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
- const void *bnBias, const void *estimatedMean, const void *estimatedVariance,
- double epsilon);
-
- /**
- * @ingroup dnn
- * @brief batchnorm forward computation
- * @param [in] handle cce handle
- * @param [in] mode mode of batchnorm
- * @param [in] reluFlag relu fusion flag
- * @param [in] alpha scaling factors
- * @param [in] beta scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @param [in] bnScaleBiasMeanVarDesc descriptor of scale, bias, mean, variance tensor
- * @param [in] bnScale scaling factor
- * @param [in] bnBias bias factor
- * @param [in] estimatedMean mean
- * @param [in] estimatedVariance variance
- * @param [in] epsilon epsilon
- * @return ccStatus_t
- */
- ccStatus_t ccBatchNormFusionForwardInference(ccHandle_t handle, ccBatchNormMode_t mode, ccBatchNormDescriptor_t bnDesc,
- const void *alpha, const void *beta, const ccTensorDescriptor_t xDesc,
- const void *x, const ccTensorDescriptor_t yDesc, void *y,
- const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
- const void *bnBias, const void *estimatedMean,
- const void *estimatedVariance, double epsilon);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of batchnorm operator
- * @param [in|out] bnDesc point to descriptor of batchnorm operator
- * @return ccStatus_t
- */
- ccStatus_t ccCreateBatchNormDescriptor(ccBatchNormDescriptor_t *bnDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy batchnorm descriptor
- * @param [in] descriptor of batchnorm operator
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyBatchNormDescriptor(ccBatchNormDescriptor_t *bnDesc);
-
- /**
- * @ingroup dnn
- * @brief append operation after batchnorm
- * @param [in|out] bnDesc descriptor of batchnorm operator
- * @param [in] opType operation type for append at batchnorm operation
- * @param [in] opDesc operation descritpor for the opType
- * @return ccStatus_t
- */
- ccStatus_t ccBatchNormAppendOp(ccBatchNormDescriptor_t bnDesc, ccOpType_t opType, const void *opDesc);
-
- /**
- * @ingroup dnn
- * @brief full get the output 4d dimension info of full connection
- * @param [in] xDesc descriptor of input tensor
- * @param [in] wDesc descriptor of weight tensor
- * @param [in|out] n point to batch size
- * @param [in|out] c point to channels
- * @param [in|out] h point to height of feature map
- * @param [in|out] w point to width of feature map
- * @return ccStatus_t
- */
- ccStatus_t ccGetFullConnectionFwdOutputDim(const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
- int32_t *n, int32_t *c, int32_t *h, int32_t *w);
-
- /**
- * @ingroup dnn
- * @brief full connection forward computation
- * @param [in] handle cce handle
- * @param [in] fcDesc fc desc
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] wDesc descriptor of weight tensor
- * @param [in] w filter data in device memory
- * @param [in] biasDesc bias data in device memory
- * @param [in] bias descriptor of bias tensor
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccFullConnectionForwardEx2(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
- const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
- const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief full connection forward computation with workspace
- * @param [in] handle cce handle
- * @param [in] fcDesc fc desc
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] wDesc descriptor of weight tensor
- * @param [in] w filter data in device memory
- * @param [in] biasDesc bias data in device memory
- * @param [in] bias descriptor of bias tensor
- * @param [in] workSpace workSpace in device memory
- * @param [in] workSpaceSizeInBytes workSpace size in bytes
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccFullConnectionForwardWithWorkSpace(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc,
- const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccFilterDescriptor_t wDesc, const void *w,
- const ccTensorDescriptor_t biasDesc, const void *bias, void *workSpace,
- uint32_t workSpaceSizeInBytes, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief full softmax forward computation
- * @param [in] handle cce handle
- * @param [in] algo softmax algorithm
- * @param [in] mode mode of softmax
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] workSpace workSpace in device memory
- * @param [in] workSpaceSizeInBytes workSpace size in bytes
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccSoftmaxForward(ccHandle_t handle, ccSoftmaxAlgo_t algo, int32_t softmaxAxis, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
- uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
- /**
- * @ingroup dnn
- * @brief full softmax forward computation
- * @param [in] handle cce handle
- * @param [in] algo softmax algorithm
- * @param [in] softmaxAxis mode of softmax
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] workSpace workSpace in device memory
- * @param [in] workSpaceSizeInBytes workSpace size in bytes
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @param [in] classNum class number
- * @param [in] padNum pad Num
- * @return ccStatus_t
- */
- ccStatus_t ccSoftmaxClassForward(ccHandle_t handle, ccSoftmaxAlgo_t algo, int32_t softmaxAxis, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
- uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc,
- void *y, uint32_t classNum, uint32_t padNum);
-
- /**
- * @ingroup dnn
- * @brief full scale forward computation
- * @param [in] handle cce handle
- * @param [in] scaleBiasDesc descriptor of scale and bias tensor
- * @param [in] scale scaling factor
- * @param [in] bias bias factor
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccScaleForward(ccHandle_t handle, const ccTensorDescriptor_t scaleBiasDesc, const void *scale,
- const void *bias, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief full scale forward computation
- * @param [in] handle cce handle
- * @param [in] scaleDesc descriptor of scale and bias tensor
- * @param [in] scale scaling factor
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccScaleNoBiasForward(ccHandle_t handle, const ccTensorDescriptor_t scaleDesc, const void *scale,
- const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of depth to space
- * @param [in] xDesc descriptor of input tensor
- * @param [in] blockSize the size of block
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @return ccStatus_t
- */
-
- ccStatus_t ccGetDepthToSpaceOutputDim(const ccTensorDescriptor_t xDesc, const int32_t blockSize, int32_t *dimCnt,
- int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief depth to space forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] blockSize the size of block
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
-
- ccStatus_t ccDepthToSpaceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const int32_t blockSize, const void *beta, const ccTensorDescriptor_t outputDesc,
- void *output);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of space to depth
- * @param [in] xDesc descriptor of input tensor
- * @param [in] blockSize the size of block
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @return ccStatus_t
- */
-
- ccStatus_t ccGetSpaceToDepthOutputDim(const ccTensorDescriptor_t xDesc, const int32_t blockSize, int32_t *dimCnt,
- int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief space to depth forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] blockSize the size of block
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
-
- ccStatus_t ccSpaceToDepthForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const int32_t blockSize, const void *beta, const ccTensorDescriptor_t outputDesc,
- void *output);
-
- /**
- * @ingroup dnn
- * @brief full eltwise forward computation
- * @param [in] handle cce handle
- * @param [in] eltDesc eltwise descriptor
- * @param [in] mode mode of eltwise
- * @param [in] alpha scaling factors
- * @param [in] broadcast(Reserve) support tensor broadcasting or not
- * @param [in] xDesc[] array of descriptor for input tensor
- * @param [in] x array of input data in device memory
- * @param [in] inputNum the number of input tensors
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
-
- ccStatus_t ccEltwiseForwardEx(ccHandle_t handle, ccEltwiseDescriptor_t eltDesc, ccEltwiseMode_t mode, int32_t inputNum,
- const void *alpha, bool broadcast, const ccTensorDescriptor_t xDesc[], const void *x[],
- const void *beta, const ccTensorDescriptor_t yDesc, void *y);
- /**
- * @ingroup dnn
- * @brief create descriptor of eltwise operator
- * @param [in|out] eltwiseDesc point to descriptor of eltwise operator
- * @return ccStatus_t
- */
- ccStatus_t ccCreateEltwiseDescriptor(ccEltwiseDescriptor_t *eltDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy eltwise descriptor
- * @param [in] descriptor of eltwise operator
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyEltwiseDescriptor(ccEltwiseDescriptor_t *eltDesc);
-
- /**
- * @ingroup dnn
- * @brief append operation after eltwise
- * @param [in|out] eltDesc descriptor of eltwise operator
- * @param [in] opType operation type for append at eltwise operation
- * @param [in] opDesc operation descritpor for the opType
- * @return ccStatus_t
- */
- ccStatus_t ccEltwiseAppendOp(ccEltwiseDescriptor_t eltDesc, ccOpType_t opType, const void *opDesc);
-
- /**
- * @ingroup dnn
- * @brief set eltwise desciptor's quantize parameters
- * @param [in] eltDesc eltwise descriptor
- * @param [in] quantizeInfo descriptor of quantize parameters
- * @return ccStatus_t
- */
- ccStatus_t ccSetEltwiseQuantizeInfo(ccEltwiseDescriptor_t eltDesc, const ccQuantizeDescriptor_t QuantizeInfo);
-
- /**
- * @ingroup dnn
- * @brief get the temp space size of reshape forward computation, maybe no need temp space
- * @param [in] handle cce handle
- * @param [in] xDesc descriptor of input tensor
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] sizeInBytes temp space size need for specified algorithm
- * @return ccStatus_t
- */
- ccStatus_t ccGetReshapeForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
- const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief reshape the input tensor
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc input tensor
- * @param [in] x input data
- * @param [in] workSpace temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes sizeof workspace
- * @param [in] beta scaling factors
- * @param [in] yDesc output tensor
- * @param [in|out] y output data
- * @return ccStatus_t
- */
- ccStatus_t ccReshapeForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- void *workSpace, uint32_t workSpaceSizeInBytes, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
- /**
- * @ingroup dnn
- * @brief reshape the input tensor for data in ND format
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc input tensor
- * @param [in] x input data
- * @param [in] workSpace temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes sizeof workspace
- * @param [in] beta scaling factors
- * @param [in] yDesc output tensor
- * @param [in|out] y output data
- * @return ccStatus_t
- */
- ccStatus_t ccNdReshapeForward(ccHandle_t handle, const void *alpha, ccTensorFormat_t rawFormat,
- const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
- uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc,
- void *y);
-
- /**
- * @ingroup dnn
- * @brief Four2Five forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in | out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccFour2FiveForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief Five2Four forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in | out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccFive2FourForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief get the temp space size of add forward computation
- * @param [in] handle cce handle
- * @param [in] xDesc descriptor of the first input tensor
- * @param [in] wDesc descriptor of the second input tensor
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] sizeInBytes temp space size need for specified algorithm
- * @return ccStatus_t
- */
- ccStatus_t ccGetAddForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
- const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
- uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief Add forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x one input data in device memory
- * @param [in] wDesc descriptor of input tensor
- * @param [in] w the other input data in device memory
- * @param [in] beta bias factors
- * @param [in] workSpace the address apply in HBM
- * @param [in] workSpaceSizeInBytes the size apply in HBM
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccAddForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
- uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief Stack forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x[] x array is host mem array, the element is device address of input data
- * @param [in] num number of input tensor
- * @param [in] axis along which axis to stack the input tensor
- * @param [in] beta bias factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccStackForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x[],
- uint32_t num, int32_t axis, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of stack
- * @param [in] xDesc descriptor of input tensor
- * @param [in] num number of input tensor
- * @param [in] axis along which axis to stack the input tensor
- * @param [in|out] n point to batch size
- * @param [in|out] c point to channels
- * @param [in|out] h point to height
- * @param [in|out] w point to width
- * @param [in|out] realDimCnt point to real dimCnt after stack
- * @return ccStatus_t
- */
- ccStatus_t ccGetStackOutputDim(const ccTensorDescriptor_t xDesc, uint32_t num, int32_t axis, int32_t *n, int32_t *c,
- int32_t *h, int32_t *w, int32_t *realDimCnt);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of stack
- * @param [in] xDesc descriptor of input tensor
- * @param [in] num number of input tensor
- * @param [in] axis along which axis to stack the input tensor
- * @param [in|out] dimCnt dimcnt
- * @param [in|out] dim save dim value
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetStackOutputDim(const ccTensorDescriptor_t xDesc, uint32_t num, int32_t axis, int32_t *dimCnt,
- int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief return need grid generator or not
- * @param [in] inputH, inputW, outputH, outputW, alignCorner(interp=true,resizeBilinear depends para align corner)
- * @param [out] bool needGridFlag, true mean need, false mean not need
- * @return ccStatus_t
- */
- ccStatus_t ccIsGridGenetatorNeed(int32_t inputH, int32_t inputW, int32_t outputH, int32_t outputW, bool alignCorner,
- bool &needGridFlag);
-
- /**
- * @ingroup dnn
- * @brief get the temp space size of Deconvolution forward computation, maybe no need temp space
- * @param [in] handle cce handle
- * @param [in] deconvDesc descriptor of Deconvolution operator
- * @param [in] xDesc descriptor of input tensor
- * @param [in] wDesc descriptor of filter
- * @param [in] yDesc descriptor of output tensor
- * @param [in] algo algorithm of Deconvolution forward
- * @param [in|out] sizeInBytes temp space size need for specified algorithm
- * @return ccStatus_t
- */
- ccStatus_t ccGetDeconvolutionForwardWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t deconvDesc,
- const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
- const ccTensorDescriptor_t yDesc, ccConvolutionFwdAlgo_t algo,
- uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief Deconvolution forward computation
- * @param [in] handle cce handle
- * @param [in] deconvDesc descriptor of deconvolution operator
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] wDesc descriptor of filter
- * @param [in] w filter data in device memory
- * @param [in] biasDesc descriptor of bias
- * @param [in] bias bias data in device memory
- * @param [in] algo algorithm of deconvolution forward
- * @param [in] workSpace temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes sizeof workspace
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccDeconvolutionForward(ccHandle_t handle, const ccConvolutionDescriptor_t deconvDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
- const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
- ccConvolutionFwdAlgo_t algo, void *workSpace, uint32_t workSpaceSizeInBytes,
- const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
- #define MODE_C_N (0)
- #define MODE_N_C (1)
-
- /**
- * [ccArgMaxForward]
- * @param [in] handle [handle]
- * @param [in] alpha [reserved parameters]
- * @param [in] xDesc [x tensor descriptor]
- * @param [in] x [innput tensor]
- * @param [in] outMaxVaule [Whether to return the maximum value, true: return max value; false: return max value index
- * ]
- * @param [in] topK [The number that returns the maximum index or maximum value]
- * @param [in] axis [Describes which axis of the input Tensor to reduce across]
- * @param [in] beta [reserved parameters]
- * @param [in] yDesc [y tensor descriptor]
- * @param [in] y [The max value index or max value tensor]
- */
- ccStatus_t ccArgMaxForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- bool outMaxVal, uint32_t topK, int32_t axis, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
- /**
- * [ccGetArgMaxOutputDim]
- * @param [in] xDesc [x tensor descriptor]
- * @param [in] outMaxVaule [Whether to return the maximum value, true: return max value; false: return max value index
- * ]
- * @param [in] topK [The number that returns the maximum index or maximum value]
- * @param [in] axis [Describes which axis of the input Tensor to reduce across]
- * @param [in|out] dimCnt [point to the output dimCnt]
- * @param [in|out] dim [arrays to save dims]
- * @param [in| dimlen length of dim
- */
- ccStatus_t ccGetArgMaxOutputDim(const ccTensorDescriptor_t xDesc, bool outMaxVal, uint32_t topK, int32_t axis,
- int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
- /**
- * [ccGetArgMaxOutputDim]
- * @param [in] xDesc [x tensor descriptor]
- * @param [in] outMaxVaule [Whether to return the maximum value, true: return max value; false: return max value index
- * ]
- * @param [in] topK [The number that returns the maximum index or maximum value]
- * @param [in] axis [Describes which axis of the input Tensor to reduce across]
- * @param [in] n [Batch number of the output tensor]
- * @param [in] c [Channel of the output tensor]
- * @param [in] h [Height number of the output tensor]
- * @param [in] w [Weight number of the output tensor]
- */
- ccStatus_t ccGetArgMaxOutputDim(const ccTensorDescriptor_t xDesc, bool outMaxVal, uint32_t topK, int32_t axis,
- int32_t *n, int32_t *c, int32_t *h, int32_t *w);
-
- /**
- * @ingroup dnn
- * @brief Yolo2ReorgForward computation
- * @param [in] handle CCE handle
- * @param [in] stride scale parameter
- * @param [in] reverse reverse parameter
- * @param [in] alpha alpha factor
- * @param [in] beta beta factor
- * @param [in] xDesc x-tensor descriptor
- * @param [in] x x-tensor in device memory
- * @param [out] workSpaceSizeInBytes temporary work sapce size
- * @param [out] workSpace temporary work sapce in device memory
- * @param [in] yDesc y-tensor descriptor
- * @param [out] y y-tensor in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccYolo2ReorgForward(ccHandle_t handle, int32_t stride, bool reverse, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, uint32_t workSpaceSizeInBytes,
- void *workSpace, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
- /**
- * @param [in] stride scale parameter
- * @param [in] reverse reverse parameter
- * @param [in] xDesc x-tensor descriptor
- * @param [in|out] n point to batch size
- * @param [in|out] c point to channels
- * @param [in|out] h point to height of feature map
- * @param [in|out] w point to width of feature map
- */
- ccStatus_t ccGetReorgOutPutDim(int32_t stride, bool reverse, const ccTensorDescriptor_t xDesc, int32_t *n, int32_t *c,
- int32_t *h, int32_t *w);
-
- /**
- * @param [in] stride scale parameter
- * @param [in] reverse reverse parameter
- * @param [in] xDesc x-tensor descriptor
- * @param [out] dimCnt output tensor dim cnt
- * @param [out] dim output tensor dim
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetReorgOutPutDim(int32_t stride, bool reverse, const ccTensorDescriptor_t xDesc, int32_t *dimCnt,
- int32_t dim[], int32_t dimLen);
-
- /**
- * @param [in] xDesc x-tensor descriptor
- * @param [out] temporary work sapce size
- */
- ccStatus_t ccGetYolo2ReorgForwardWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief full shuffle channel forward computation
- * @param [in] handle cce handle
- * @param [in] groupNum number of groups in a channal
- * @param [in] subgroupNum number of sub-groups in a group
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccShuffleChannelForward(ccHandle_t handle, int32_t groupNum, int32_t subgroupNum, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
- /**
- * @ingroup dnn
- * @brief get the temp space size of permute forward computation, maybe no need temp space
- * @param [in] handle cce handle
- * @param [in] xDesc descriptor of input tensor
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] sizeInBytes temp space size need for specified algorithm
- * @return ccStatus_t
- */
- ccStatus_t ccGetPermuteForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
- const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief get the output dim of permute forward computation
- * @param [in] xDesc descriptor of input tensor
- * @param [in] dimIndex dim Index
- * @param [in|out] dimCnt dim count
- * @param [in|out] dim dim value
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetPermuteOutputDim(const ccTensorDescriptor_t xDesc, const int32_t dimIndex[], const int32_t dimIndexLen,
- int32_t *dimCnt, int32_t *dim, int32_t dimLen);
- /**
- * @ingroup dnn
- * @brief full permute forward computation
- * @param [in] handle cce handle
- * @param [in] dimIndex dim Index,only support [0,1,2,3]
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] workSpace temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes sizeof workspace
- * @param [in] beta scaling factors
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccPermuteForward(ccHandle_t handle, const int32_t dimIndex[], const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, void *workspace,
- uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief full split forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] axis the dimension along which to split. Must be in the range [-xDesc->dimCnt, xDesc->dimCnt)
- * @param [in] num the number of outputs
- * @param [in] beta scaling factors
- * @param [in] yDescArr descriptors of output tensors
- * @param [in|out] yArr output data array in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccSplitForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- int32_t axis, uint32_t num, const void *beta, const ccTensorDescriptor_t yDescArr[],
- void *yArr[]);
-
- /**
- * @ingroup dnn
- * @brief get the output dimensions info of split
- * @param [in] xDesc descriptor of input tensor
- * @param [in] axis the dimension along which to split. Must be in the range [-xDesc->dimCnt, xDesc->dimCnt)
- * @param [in] num the number of outputs
- * @param [in] sizes Optional, used to specify the sizes of each output tensor along split dim. The tensor x would
- * be split evenly along split dim if sizes is NULL
- * @param [in|out] nArr point to the first element of batch sizes
- * @param [in|out] cArr point to the first element of channels
- * @param [in|out] hArr point to the first element of heights of feature map
- * @param [in|out] wArr point to the first element of widths of feature map
- * @return ccStatus_t
- */
- ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num,
- const uint32_t sizes[], uint32_t nArr[], uint32_t cArr[], uint32_t hArr[],
- uint32_t wArr[]);
-
- /**
- * @ingroup dnn
- * @brief Get split output shape(s).
- * @param [in] xDesc input tensor, support ND and NC1HWC0
- * @param [in] axis split axis, negtive axis will increased by dimCnt once time.
- * @param [in] num splited nums.
- * @param [in] sizes splited dim size on axis. if NULL was set, The input will be divided into num equally.
- * @param [output] dimCnt splited dimCnt array. One to one correspondence with the splited output.
- * @param [output] dim array of splited dim array. One to one correspondence with the splited output.
- * @param [in| dimlen length of dim(Pass in the length of the entire space pointed to by dim,
- not just the length of the dim array, because dim is a level 2 array
- dimlen = lengthof dim[][], not just lengthof dim[])
- * @return ccStatus_t
- */
- ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num,
- const uint32_t sizes[], int32_t *dimCnt, int32_t *dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief create weight compress info
- * @param [in|out] compressInfo point to CompressInfo
- * @return ccStatus_t
- */
- ccStatus_t ccCreateWeightCompressInfo(ccWeightCompressInfo_t **compressInfo);
-
- /**
- * @ingroup dnn
- * @brief destory weight compress info
- * @param [in] *compressInfo point to CompressInfo
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyWeightCompressInfo(ccWeightCompressInfo_t **compressInfo);
-
- /**
- * @ingroup dnn
- * @brief create compress table
- * @param [in|out] compressTab point to weight compress table
- * @return ccStatus_t
- */
- ccStatus_t ccCreateWeightCompressTab(ccWeightCompressTab_t **compressTab);
-
- /**
- * @ingroup dnn
- * @brief destory compress table
- * @param [in] compressTab point to weight compress table
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyWeightCompressTab(ccWeightCompressTab_t **compressTab);
-
- /**
- * @ingroup dnn
- * @brief get fc compress info
- * @param [in] xDesc descriptor of input tensor
- * @param [in] wDesc descriptor of weight tensor
- * @param [in] biasDesc descriptor of bias tensor
- * @param [in] dataTypeTransmode mode of data type transform
- * @param [in] weightCompressInfo compress info, compute based on tiling method
- * @param [in|out] outputSize output data size in byte
- * @param [in|out] infoTabSize compress info table
- * @return ccStatus_t
- */
- ccStatus_t ccGetCompressedFcWeightInfo(const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
- const ccTensorDescriptor_t biasDesc, ccDataTypeTransMode_t dataTypeTransmode,
- ccWeightCompressInfo_t *weightCompressInfo, uint32_t *outputSize,
- uint32_t *infoTabSize);
- /**
- * @ingroup dnn
- * @brief compress fc
- * @param [in] wDesc descriptor of weight tensor
- * @param [in] w filter data in device memory
- * @param [in] weightCompressInfo compress info, compute based on tiling method
- * @param [in] dataTypeTransmode mode of data type transform
- * @param [in|out] y output data in device memory
- * @param [in] ySize transformed data size in byte
- * @param [in|out] yCompressedSize compressed output data size in byte
- * @param [in|out] infoTab compressed info table
- * @param [in] infoTabSize compressed info table size in byte
- * @return ccStatus_t
- */
- ccStatus_t ccCompressWeight(const ccFilterDescriptor_t wDesc, const void *w,
- const ccWeightCompressInfo_t *weightCompressInfo, ccDataTypeTransMode_t dataTypeTransmode,
- ccFilterDescriptor_t yDesc, void *y, uint32_t ySize, uint32_t *yCompressedSize,
- void *infoTab, uint32_t infoTabSize);
-
- /**
- * @ingroup dnn
- * @brief restore compressed fc data
- * @param [in] x input data in device memory
- * @param [in] xSizeInBytes input compressed weight data size in byte
- * @param [in|out] y output data in device memory
- * @param [in] ySizeInBytes output data size in byte
- * @return ccStatus_t
- */
- ccStatus_t ccRestoreCompressedWeight(const void *x, uint32_t xSizeInBytes, void *y, uint32_t ySizeInBytes,
- rtMemcpyKind_t kind);
-
- /**
- * @ingroup dnn
- * @brief create quantize parameters struct
- * @param [in|out] quantizeInfo descriptor of quantize parameters
- * @return ccStatus_t
- */
- ccStatus_t ccCreateQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo);
-
- /**
- * @ingroup dnn
- * @brief destroy quantize parameters struct
- * @param [in] quantizeInfo descriptor of quantize parameters
- * @return ccStatus_t
- */
- ccStatus_t ccDestoryQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo);
-
- /**
- * @ingroup dnn
- * @brief set quantize parameters
- * @param [in] quantizeInfo descriptor of quantize parameters
- * @param [in] scaleValMode enmu type for quantize scale value type (normal or sqrt)
- * @param [in] scale quantize scale value
- * @param [in] offset quantize offset(when quantize algorithm is half offset or full offset,this should be
- * configed)
- * @param [in] offsetPad padding value for load3d (only for half offset or full offset)
- * @return ccStatus_t
- */
- ccStatus_t ccSetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
- const uint16_t *scale, const uint16_t *offset, const uint8_t *offsetPad);
-
- /**
- * @ingroup dnn
- * @brief set Requantize parameters
- * @param [in] quantizeInfo descriptor of quantize parameters
- * @param [in] scaleValMode enmu type for requantize scale value type (normal or sqrt)
- * @param [in] scale quantize scale value
- * @param [in] offset quantize offset(when quantize algorithm is half offset or full offset,this should be
- * configed)
- * @param [in] offsetw offset for filter (only config for full offset quantize)
- * @return ccStatus_t
- */
- ccStatus_t ccSetReQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
- const uint16_t *scaleRq, const uint16_t *nextLayerOffset, const int32_t *offsetw);
-
- /**
- * @ingroup dnn
- * @brief set Dequantize parameters
- * @param [in] quantizeInfo descriptor of quantize parameters
- * @param [in] scaleValMode enmu type for dequantize scale value type (normal or sqrt)
- * @param [in] scaleDq quantize scale value
- * @param [in] offsetw offset for filter (only config for full offset quantize)
- * @return ccStatus_t
- */
- ccStatus_t ccSetDeQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
- const uint16_t *scaleDq, const int32_t *offsetw);
-
- /**
- * @ingroup dnn
- * @brief set convolution desciptor's quantize parameters
- * @param [in] convDesc convolution descriptor
- * @param [in] quantizeInfo descriptor of quantize parameters
- * @return ccStatus_t
- */
- ccStatus_t ccSetConvolutionQuantizeInfo(ccConvolutionDescriptor_t convDesc, const ccQuantizeDescriptor_t QuantizeInfo);
-
- /**
- * @ingroup dnn
- * @brief set convolution desciptor's all offset quantize parameters
- * @param [in] convDesc convolution descriptor
- * @param [in] offsetw descriptor of quantize parameters
- * @param [in] scaleReq descriptor of quantize parameters
- * @param [in] offset_d_next descriptor of quantize parameters
- * @return ccStatus_t
- */
- ccStatus_t ccSetAllOffsetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, const uint8_t *offsetW,
- const uint8_t *offsetD, const uint16_t *scaleReq, const uint16_t *offsetDNext);
-
- /**
- * @ingroup dnn
- * @brief set full connection desciptor's quantize parameters
- * @param [in] fcDesc full connection descriptor
- * @param [in] quantizeInfo descriptor of quantize parameters
- * @return ccStatus_t
- */
- ccStatus_t ccSetFullConnectionQuantizeInfo(ccFullConnectionDescriptor_t fcDesc,
- const ccQuantizeDescriptor_t QuantizeInfo);
-
- /**
- * @ingroup dnn
- * @brief set pooling desciptor's quantize parameters
- * @param [in] poolingDesc pooling descriptor
- * @param [in] quantizeInfo descriptor of quantize parameters
- * @return ccStatus_t
- */
- ccStatus_t ccSetPoolingQuantizeInfo(ccPoolingDescriptor_t poolingDesc, const ccQuantizeDescriptor_t QuantizeInfo);
-
- /**
- * @ingroup dnn
- * @brief set full connection desciptor's info table
- * @param [in] fcDesc full connection descriptor
- * @param [in] infoTabSize table size
- * @param [in] infoTab pointer to info table
- * @return ccStatus_t
- */
- ccStatus_t ccSetFullConnectionDescriptor(ccFullConnectionDescriptor_t fcDesc, uint32_t infoTabSize, const void *infoTab,
- ccFullConnectFwdAlgo_t algo = CC_FULLCONNECT_FWD_ALGO_HALF);
-
- /**
- * @ingroup dnn
- * @brief set full connection desciptor's relu flag
- * @param [in] fcDesc full connection descriptor
- * @param [in] opType operation type for append at convolution operation
- * @param [in] opDesc operation descritpor for the opType
- * @return ccStatus_t
- */
- ccStatus_t ccFullConnectionAppendOp(ccFullConnectionDescriptor_t fcDesc, tagCcOpType opType, const void *opDesc);
-
- /**
- * @ingroup dnn
- * @brief check aipp basic info
- * @param [in] inputFormat format of input image
- * @param [in] loadStartPosH vertical start position in source image
- * @param [in] loadStartPosW horizontal start position in source image
- * @param [in] srcImageSizeH vertical size of source image
- * @param [in] srcImageSizeW horizontal size of source image
- * @param [in] cpaddingValue C direction padding value
- * @param [in] cscSwitch csc enable or not
- * @param [in] rbuvSwapSwitch swap R/U and B/V position of the image
- * @param [in] axSwapSwitch swap RGBA->ARGB, YUVA->AYUV
- * @param [in] singleLineMode when set this bit to 1, only read 1 line. Under this case, vertical size configuration is
- * not useful.
- * @return ccStatus_t
- */
- ccStatus_t ccCheckConvolutionAippCommInfo(ccAippInputFormat_t inputFormat, int32_t loadStartPosW, int32_t loadStartPosH,
- int32_t srcImageSizeW, int32_t srcImageSizeH, float cpaddingValue,
- bool cscSwitch, bool rbuvSwapSwitch, bool axSwapSwitch, bool singleLineMode);
-
- /**
- * @ingroup dnn
- * @brief check aipp dtc info
- * @param [in] dtcPixelMeanChnx Mean value for YUV or RGB data channel x
- * @param [in] dtcPixelMinChnx Min value for YUV or RGB data channel x
- * @param [in] dtcPixelVarReciChnx Reciprocal of variance or (max-min) for YUV or RGB data channel x
- * @return ccStatus_t
- */
- ccStatus_t ccCheckConvolutionAippDtcInfo(int32_t dtcPixelMeanChn0, int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2,
- float dtcPixelMinChn0, float dtcPixelMinChn1, float dtcPixelMinChn2,
- float dtcPixelVarReciChn0, float dtcPixelVarReciChn1,
- float dtcPixelVarReciChn2);
-
- /**
- * @ingroup dnn
- * @brief check aipp pad info
- * @param [in] paddingMode padding mode
- * @param [in] leftPaddingSize left hblank/padding size
- * @param [in] rightPaddingSize right hblank/padding size
- * @param [in] topPaddingSize top padding size
- * @param [in] bottomPaddingSize bottom padding size
- * @return ccStatus_t
- */
- ccStatus_t ccCheckConvolutionAippPadInfo(ccAippPaddingMode_t paddingMode, int32_t leftPaddingSize,
- int32_t rightPaddingSize, int32_t topPaddingSize, int32_t bottomPaddingSize);
-
- /**
- * @ingroup dnn
- * @brief check aipp csc info
- * @param [in] cscMatrixRmCn 3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n
- * @param [in] cscOutputBiasm output Bias for RGB to YUV, element of row m
- * @param [in] cscInputBiasm input Bias for YUV to RGB, element of row m
- * @return ccStatus_t
- */
- ccStatus_t ccCheckConvolutionAippCscInfo(int32_t cscMatrixR0C0, int32_t cscMatrixR0C1, int32_t cscMatrixR0C2,
- int32_t cscMatrixR1C0, int32_t cscMatrixR1C1, int32_t cscMatrixR1C2,
- int32_t cscMatrixR2C0, int32_t cscMatrixR2C1, int32_t cscMatrixR2C2,
- int32_t cscOutputBias0, int32_t cscOutputBias1, int32_t cscOutputBias2,
- int32_t cscInputBias0, int32_t cscInputBias1, int32_t cscInputBias2);
-
- /**
- * @ingroup dnn
- * @brief check aipp scf info
- * @param [in] scfSwitch scaling enable or not
- * @param [in] scfInputW input width of scaling
- * @param [in] scfInputH input height of scaling
- * @param [in] scfOutputW output width of scaling
- * @param [in] scfOutputH output height of scaling
- * @return ccStatus_t
- */
- ccStatus_t ccCheckConvolutionAippScfInfo(bool scfSwitch, int32_t scfInputW, int32_t scfInputH, int32_t scfOutputW,
- int32_t scfOutputH);
-
- /**
- * @ingroup dnn
- * @brief check aipp param
- * @param [in] convDesc descriptor of conv operator
- * @param [in] xDesc input tensor info
- * @param [in] yDesc output tensor info
- * @return ccStatus_t
- */
- ccStatus_t ccCheckConvFwdAippParam(const ccConvolutionDescriptor_t convDesc, const ccTensorDescriptor_t xDesc,
- const ccTensorDescriptor_t yDesc);
-
- /**
- * @ingroup dnn
- * @brief init aipp basic info
- * @param [in|out] convDesc descriptor of conv operator
- * @param [in] inputFormat format of input image
- * @param [in] loadStartPosH vertical start position in source image
- * @param [in] loadStartPosW horizontal start position in source image
- * @param [in] srcImageSizeH vertical size of source image
- * @param [in] srcImageSizeW horizontal size of source image
- * @param [in] cpaddingValue C direction padding value
- * @param [in] cscSwitch csc enable or not
- * @param [in] rbuvSwapSwitch swap R/U and B/V position of the image
- * @param [in] axSwapSwitch swap RGBA->ARGB, YUVA->AYUV
- * @param [in] singleLineMode when set this bit to 1, only read 1 line. Under this case, vertical size configuration is
- * not useful.
- * @return ccStatus_t
- */
- ccStatus_t ccSetConvolutionAippCommInfo(ccConvolutionDescriptor_t convDesc, ccAippInputFormat_t inputFormat,
- int32_t loadStartPosW, int32_t loadStartPosH, int32_t srcImageSizeW,
- int32_t srcImageSizeH, float cpaddingValue, bool cscSwitch, bool rbuvSwapSwitch,
- bool axSwapSwitch, bool singleLineMode);
- /**
- * @ingroup dnn
- * @brief init aipp dtc info
- * @param [in|out] convDesc descriptor of conv operator
- * @param [in] dtcPixelMeanChnx Mean value for YUV or RGB data channel x
- * @param [in] dtcPixelMinChnx Min value for YUV or RGB data channel x
- * @param [in] dtcPixelVarReciChnx Reciprocal of variance or (max-min) for YUV or RGB data channel x
- * @return ccStatus_t
- */
- ccStatus_t ccSetConvolutionAippDtcInfo(ccConvolutionDescriptor_t convDesc, int32_t dtcPixelMeanChn0,
- int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2, float dtcPixelMinChn0,
- float dtcPixelMinChn1, float dtcPixelMinChn2, float dtcPixelVarReciChn0,
- float dtcPixelVarReciChn1, float dtcPixelVarReciChn2);
- /**
- * @ingroup dnn
- * @brief init aipp pad info
- * @param [in|out] convDesc descriptor of conv operator
- * @param [in] paddingMode padding mode
- * @param [in] leftPaddingSize left hblank/padding size
- * @param [in] rightPaddingSize right hblank/padding size
- * @param [in] topPaddingSize top padding size
- * @param [in] bottomPaddingSize bottom padding size
- * @return ccStatus_t
- */
- ccStatus_t ccSetConvolutionAippPadInfo(ccConvolutionDescriptor_t convDesc, ccAippPaddingMode_t paddingMode,
- int32_t leftPaddingSize, int32_t rightPaddingSize, int32_t topPaddingSize,
- int32_t bottomPaddingSize);
-
- /**
- * @ingroup dnn
- * @brief init aipp csc info
- * @param [in|out] convDesc descriptor of conv operator
- * @param [in] cscMatrixRmCn 3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n
- * @param [in] cscOutputBiasm output Bias for RGB to YUV, element of row m
- * @param [in] cscInputBiasm input Bias for YUV to RGB, element of row m
- * @return ccStatus_t
- */
- ccStatus_t ccSetConvolutionAippCscInfo(ccConvolutionDescriptor_t convDesc, int32_t cscMatrixR0C0, int32_t cscMatrixR0C1,
- int32_t cscMatrixR0C2, int32_t cscMatrixR1C0, int32_t cscMatrixR1C1,
- int32_t cscMatrixR1C2, int32_t cscMatrixR2C0, int32_t cscMatrixR2C1,
- int32_t cscMatrixR2C2, int32_t cscOutputBias0, int32_t cscOutputBias1,
- int32_t cscOutputBias2, int32_t cscInputBias0, int32_t cscInputBias1,
- int32_t cscInputBias2);
-
- /**
- * @ingroup dnn
- * @brief init aipp scf info
- * @param [in|out] convDesc descriptor of conv operator
- * @param [in] scfSwitch scaling enable or not
- * @param [in] scfInputW input width of scaling
- * @param [in] scfInputH input height of scaling
- * @param [in] scfOutputW output width of scaling
- * @param [in] scfOutputH output height of scaling
- * @return ccStatus_t
- */
- ccStatus_t ccSetConvolutionAippScfInfo(ccConvolutionDescriptor_t convDesc, bool scfSwitch, int32_t scfInputW,
- int32_t scfInputH, int32_t scfOutputW, int32_t scfOutputH);
-
- /**
- * @ingroup dnn
- * @brief set dynamic aipp parameter address and enflag info
- * @param [in|out] convDesc descriptor of conv operator
- * @param [in] dyncParaAddr aipp parameter address
- * @param [in] dyncAippFlag flag to show whether to use dynamic aipp
- * @return ccStatus_t
- */
- ccStatus_t ccSetConvolutionAippDyncParaAddr(ccConvolutionDescriptor_t convDesc, const void *dyncParaAddr,
- bool dyncAippFlag, bool rotationFlag = false);
-
- /**
- * @ingroup dnn
- * @brief check dynamic aipp parameter
- * @param [in] dyncParaAddr aipp parameter address
- * @param [in] dataLength parameter lenght
- * @param [in] convolutionDimW convDimW
- * @param [in] convolutionDimH convDimH
- * @return ccStatus_t
- */
- ccStatus_t ccCheckDynamicAippParam(const void *dynamicParamAddr, uint32_t dataLength, int64_t convolutionDimW,
- int64_t convolutionDimH);
-
- /*** @ingroup dnn
- * @brief trans mean and var
- * @param [in|out] mean' = bnScale/sqrt(var)
- * @param [in|out] var' = -bnScale * mean / sqrt(var) + bnBias
- * @return ccStatus_t
- */
-
- ccStatus_t ccTransBatchnormMeanAndVar(void *mean, void *var, const ccTensorDescriptor_t bnScaleBiasMeanVarDesc,
- const void *alpha, const void *beta, void *bnScale, void *bnBias, double epsilon);
-
- /**
- * @ingroup dnn
- * @brief init deconvolution adj or targetShape info.
- * @param [in] convDesc conv descriptor.
- * @param [in] adjH, adjust H output.
- * @param [in] adjW, adjust W output.
- * @param [in] targetShape, values of output shape, if this pointer was set, ignore adj.
- * @return ccStatus_t
- */
- ccStatus_t ccSetDeconvolutionOutShapeInfo(ccConvolutionDescriptor_t convDesc, uint32_t adjSize, const uint32_t *adj,
- uint32_t targetShapeSize, const uint32_t *targetShape);
-
- /**
- * @ingroup dnn
- * @brief gather elements according to the indices.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the tensor from which to gather elements.
- * @param [in] x data point of the tensor from which to gather elements.
- * @param [in] indicesDesc description of the tensor of indices.
- * @param [in] indices data point of the tensor of indices.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
- ccStatus_t ccGatherNdForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t indicesDesc, const void *indices, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get output shape of gather_nd.
- * @param [in] xDesc description of the tensor from which to gather elements.
- * @param [in] indicesDesc description of the tensor of indices.
- * @param [output] n dim-size of n-dim.
- * @param [output] c dim-size of c-dim.
- * @param [output] h dim-size of h-dim.
- * @param [output] w dim-size of w-dim.
- * @param [output] realDimCnt real dim.
- * @return ccStatus_t
- */
- ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc, int32_t *n,
- int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
- /**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc,
- int32_t *dimCnt, int32_t *dim, int32_t dimLen);
- /**
- * @ingroup dnn
- * @brief tile tensor by multiples.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the tensor which to be tiled.
- * @param [in] x data point of the tensor which to be tiled.
- * @param [in] multiples tile coefficient of each dim.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
- ccStatus_t ccTileForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccIntArray_t *multiples, const void *beta, const ccTensorDescriptor_t outputDesc,
- void *output);
-
- /**
- * @ingroup dnn
- * @brief get output shape of tile.
- * @param [in] xDesc description of the dividend tensor.
- * @param [in] multiples multiples of each dim.
- * @param [in|out] dimCnt [point to the output dimCnt]
- * @param [in|out] dim [arrays to save dims]
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *multiples, int32_t *dimCnt,
- int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief get output shape of tile.
- * @param [in] xDesc description of the dividend tensor.
- * @param [in] multiples multiples of each dim.
- * @param [output] n dim-size of n-dim.
- * @param [output] c dim-size of c-dim.
- * @param [output] h dim-size of h-dim.
- * @param [output] w dim-size of w-dim.
- * @param [output] realDimCnt real dim.
- * @return ccStatus_t
- */
- ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc,
- // const ccIntArrayDescriptor_t multiples,
- const ccIntArray_t *multiples, int32_t *n, int32_t *c, int32_t *h, int32_t *w,
- int32_t *realDimCnt);
- /**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the dividend tensor.
- * @param [in] x data point of the dividend tensor.
- * @param [in] yDesc description of the divisor tensor.
- * @param [in] y data point of the divisor tensor.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
- ccStatus_t ccRealdivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc description of the dividend tensor.
- * @param [in] yDesc description of the divisor tensor.
- * @param [output] n dim-size of n-dim.
- * @param [output] c dim-size of c-dim.
- * @param [output] h dim-size of h-dim.
- * @param [output] w dim-size of w-dim.
- * @param [output] realDimCnt real dim.
- * @return ccStatus_t
- */
- ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *n,
- int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
-
- /**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] x data point of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [in] y data point of the right operator tensor.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
- ccStatus_t ccFloordivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [output] realDimCnt real dim.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetFloordivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] x data point of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [in] y data point of the right operator tensor.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
- ccStatus_t ccGreaterForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetGreaterOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] x data point of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [in] y data point of the right operator tensor.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
- ccStatus_t ccLessForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetLessOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief get output shape of LogicalOr.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetLogicalOrOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief get output shape of LogicalXor.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in] dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetLogicalXorOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief sqrt forward:
- * data type only support bool
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccLogicalNotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief equal between two tensors.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] x data point of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [in] y data point of the right operator tensor.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
-
- ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief dump data during inference, only for eng ver.
- * @param [in] handle cce handle
- * @return ccStatus_t
- */
- ccStatus_t ccDataDumpForward(ccHandle_t handle, const void *buffer, const uint64_t bufLen, const uint32_t taskIndex);
-
- /**
- * @ingroup dnn
- * @brief logicaland between two tensors.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] x data point of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [in] y data point of the right operator tensor.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
- ccStatus_t ccLogicalAndForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief logical or between two tensors.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] x data point of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [in] y data point of the right operator tensor.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
- ccStatus_t ccLogicalOrForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
- /**
- * @ingroup dnn
- * @brief logical Xor between two tensors(x ^ y = (x | y) & ~(x & y).
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] x data point of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [in] y data point of the right operator tensor.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
- ccStatus_t ccLogicalXorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get output shape of equal.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
- /**
- * @ingroup dnn
- * @brief get output shape of logicaland.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetLogicalAndOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
- /**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] x data point of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [in] y data point of the right operator tensor.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
- ccStatus_t ccFloormodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetFloormodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief compare between two tensors.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] x data point of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [in] y data point of the right operator tensor.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
- ccStatus_t ccCompareForward(ccHandle_t handle, ccCompareType_t compareType, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
- const void *y, const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [output] dimCnt dim nums.
- * @param [output] dim dim size.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetCompareOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of FillParam
- * @param [in|out] fillParamDesc point to descriptor of fill param
- * @return ccStatus_t
- */
- ccStatus_t ccCreateFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of FillParam
- * @param [in] *fillParamDesc point to descriptor of fill param
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc);
-
- /**
- * @ingroup dnn
- * @brief get output shape of broadcat operations.
- * @param [in] inputNum input number of the operation tensors.
- * @param [in] xDesc[] description of the input operation tensors list.
- * @param [output] dimCnt dim-size of output tensor.
- * @param [output] dim dim of output tensor.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetMultiNdBroadcastOpOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief get output shape of maximultitensor.
- * @param [in] inputNum the num of input operator tensors.
- * @param [in] xDesc[] description of the input operator tensors list.
- * @param [output] dimCnt dim count of output tensor.
- * @param [output] dim array of output tensor.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetMaxMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief get output shape of minmultitensor.
- * @param [in] inputNum the num of input operator tensors.
- * @param [in] xDesc[] description of the input operator tensors list.
- * @param [output] dimCnt dim count of output tensor.
- * @param [output] dim array of output tensor.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetMinMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief MaxMultitensor forward:
- * data type only support float float16 and int32
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] inputNum input tensor number
- * @param [in] alpha common scale factor
- * @param [in] xDesc[] descriptor of input tensors list
- * @param [in] x[] input data in device memory list
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccMaxMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha,
- const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief MinMultitensor forward:
- * data type only support float float16 and int32
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] inputNum input tensor number
- * @param [in] alpha common scale factor
- * @param [in] xDesc[] descriptor of input data list
- * @param [in] x[] input data in device memory list
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccMinMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha,
- const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of StridedSlice
- * @param [in|out] stridedSliceDesc point to descriptor of StridedSlice param
- * @return ccStatus_t
- */
- ccStatus_t ccCreateStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of StridedSlice
- * @param [in] *stridedSliceDesc point to descriptor of StridedSlice param
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc);
-
- /**
- * @ingroup dnn
- * @brief init stridedSlice descriptor_t.
- * @param [out] stridedSliceDesc struct of stridedslice param
- * @param [in] dimCnt dimension of the input tensor
- * @param [in] begin slice begin(include)
- * @param [in] end slice end index(not include)
- * @param [in] strides slice stride
- * @return ccStatus_t
- */
- ccStatus_t ccSetStridedSliceDescriptor(ccStridedSliceDescriptor_t stridedSliceDesc, int32_t dimCnt, int32_t begin[],
- int32_t end[], int32_t strides[]);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of StridedSlice
- * @param [in|out] stridedSliceDesc point to descriptor of StridedSlice attr
- * @return ccStatus_t
- */
- ccStatus_t ccCreateStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of StridedSlice
- * @param [in] *stridedSliceDesc point to descriptor of StridedSlice attr
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc);
-
- /**
- * @ingroup dnn
- * @brief init stridedSlice mask attrs desescriptor.
- * @param [out] attrDesc struct of stridedslice mask attrs
- * @param [in] beginMask begin mask
- * @param [in] endMask end mask
- * @param [in] ellipsisMask ellipsis mask
- * @param [in] newAxisMask new axis mask
- * @param [in] shrinkAxisMask shrink axis mask
- * @return ccStatus_t
- */
- ccStatus_t ccSetStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t attrDesc, int32_t beginMask,
- int32_t endMask, int32_t ellipsisMask, int32_t newAxisMask,
- int32_t shrinkAxisMask);
-
- /**
- * @ingroup dnn
- * @brief Extracts a strided slice of a tensor.
- * @param [in] xDesc descriptor of input data
- * @param [in] stridedSliceDesc specifies the begin, end, strides of slice
- * @param [in] attrDesc reserve for optional attributes.
- * @param [out] n point to n size
- * @param [out] c point to c size
- * @param [out] h point to h size
- * @param [out] w point to w size
- * @return ccStatus_t
- */
- ccStatus_t ccGetStridedSliceOutputDim(const ccTensorDescriptor_t xDesc,
- const ccStridedSliceDescriptor_t stridedSliceDesc,
- const ccStridedSliceAttrsDescriptor_t attrDesc, int32_t *n, int32_t *c,
- int32_t *h, int32_t *w, int32_t *realDimCnt);
-
- /**
- * @ingroup dnn
- * @brief Extracts a strided slice of a tensor.
- * @param [in] handle cce handle
- * @param [in] stridedSliceDesc specifies the begin, end, strides of slice
- * @param [in] attrDesc reserve for optional attributes.
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] yDesc descriptor of output data
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccStridedSliceForward(ccHandle_t handle, const ccStridedSliceDescriptor_t stridedSliceDesc,
- const ccStridedSliceAttrsDescriptor_t attrDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @
- * @brief get out put descrition of slice tensor.
- * @param [in] xDesc descriptor of input data
- * @param [in] begin begin position of tensor
- * @param [in] size size to slice
- * @param [out] n point to n size
- * @param [out] c point to c size
- * @param [out] h point to h size
- * @param [out] w point to w size
- * @param [out] realDimCnt realdim count
- * @return ccStatus_t
- */
- ccStatus_t ccGetSliceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *begin, const ccIntArray_t *size,
- int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
-
- /**
- * @ingroup dnn
- * @brief slice of a tensor.
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] begin begin position of tensor
- * @param [in] size size to slice
- * @param [in] beta common scale factor
- * @param [in] yDesc descriptor of output data
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccSliceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccIntArray_t *begin, const ccIntArray_t *size, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief gather forward computation
- * @param [in] handle cce handle
- * @param [in] paramsDesc descriptor of params tensor
- * @param [in] params input data in device memory
- * @param [in] indicesDesc descriptor of indices tensor
- * @param [in] indices indices data in device memory
- * @param [in] axis descriptor of roi tensor
- * @param [in] alpha reserved
- * @param [in] beta reserved
- * @param [in] outputDesc descriptor of output tensor
- * @param [out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccGatherForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc,
- const void *params, const ccTensorDescriptor_t indicesDesc, const void *indices,
- const int32_t axis, const void *beta, ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief gather output dim computation, for NC1HWC0
- * @param [in] paramsDesc descriptor of params tensor
- * @param [in] indicesDesc descriptor of indices tensor
- * @param [in] axis descriptor of roi tensor
- * @param [out] n dim of n
- * @param [out] c dim of c
- * @param [out] h dim of h
- * @param [out] w dim of w
- * @param [out] realDimCnt real dim count
- * @return ccStatus_t
- */
- ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc,
- int32_t axis, int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
-
- /**
- * @ingroup dnn
- * @brief gather output dim computation
- * @param [in] paramsDesc descriptor of params tensor
- * @param [in] indicesDesc descriptor of indices tensor
- * @param [in] axis descriptor of roi tensor
- * @param [out] dimCnt dimcnt of output
- * @param [out] dim dim of output
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc,
- int32_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief exp forward computation
- * @param [in] handle cce handle
- * @param [in] expDesc descriptor of expParam
- * @param [in] expParam a ternary array
- * @param [in] alpha reserved parameter
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta reserved parameter
- * @param [in] yDesc descriptor of output tensor
- * @param [out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccExpForward(ccHandle_t handle, const ccExpDescriptor_t expDesc, const void *expParam, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief expm1 forward:
- * data type only support float float16 and double
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccExpm1Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief log1p forward:
- * data type only support float float16 and double
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccLog1pForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief init descriptor for parameter of exp function
- * @param [in|out] powDesc descriptor of tensor
- * @param [in] dataType data type in device
- * @param [in] paramCnt number of parameters
- * @return ccStatus_t
- */
- ccStatus_t ccSetExpDescriptor(ccExpDescriptor_t expDesc, ccDataType_t dataType, uint32_t paramCnt);
-
- /**
- * @ingroup dnn
- * @brief exp forward computation
- * @param [in] handle cce handle
- * @param [in] logDesc descriptor of logParam
- * @param [in] logParam a ternary array
- * @param [in] alpha reserved parameter
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta reserved parameter
- * @param [in] yDesc descriptor of output tensor
- * @param [in] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccLogForward(ccHandle_t handle, const ccLogDescriptor_t logDesc, const void *logParam, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief init descriptor for parameter of log function
- * @param [in|out] logDesc descriptor of tensor
- * @param [in] dataType data type in device
- * @param [in] paramCnt number of parameters
- * @return ccStatus_t
- */
- ccStatus_t ccSetLogDescriptor(ccLogDescriptor_t logDesc, ccDataType_t dataType, uint32_t paramCnt);
-
- /**
- * @ingroup dnn
- * @brief pow forward computation
- * @param [in] handle cce handle
- * @param [in] powDesc descriptor of logParam
- * @param [in] powParam a ternary array
- * @param [in] alpha reserved parameter
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta reserved parameter
- * @param [in] yDesc descriptor of input tensor
- * @param [in] y input data in device memory
- * @param [in] zDesc descriptor of output tensor
- * @param [out] z output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccPowForward(ccHandle_t handle, const ccPowDescriptor_t powDesc, const void *powParam, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
- const void *y, const void *beta, const ccTensorDescriptor_t zDesc, void *z);
-
- /**
- * @brief init descriptor for parameter of pow function
- * @param [in|out] powDesc descriptor of tensor
- * @param [in] dataType data type in device
- * @param [in] paramCnt number of parameters
- * @return ccStatus_t
- */
- ccStatus_t ccSetPowDescriptor(ccPowDescriptor_t powDesc, ccDataType_t dataType, uint32_t paramCnt);
-
- /**
- * @ingroup dnn
- * @brief non max suppression forward.
- * @param [in] handle cce handle
- * @param [in] nonmaxParaDesc descriptor of para
- * @param [in] nonmaxPara input para in host memory
- * @param [in] maxoutputsizex input para in host memory
- * @param [in] alpha common scale factor
- * @param [in] boxesDesc descriptor of input data boxesDesc
- * @param [in] boxes input data boxes in device memory
- * @param [in] scoresDesc descriptor of input data boxesDesc
- * @param [in] scores input data scores in device memory
- * @param [in] workSpaceSizeInBytes workspace size
- * @param [in] workSpace input workspace in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccNonMaxSuppressionForward(ccHandle_t handle, const ccNonMaxSuppressionDescriptor_t nonmaxParaDesc,
- const void *nonmaxPara, const int *maxoutputsize, const void *alpha,
- const ccTensorDescriptor_t boxesDesc, const void *boxes,
- const ccTensorDescriptor_t scoresDesc, const void *scores,
- const uint32_t workSpaceSizeInBytes, void *workSpace, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
- /**
- * @brief init descriptor for parameter of NonMaxSuppression function
- * @param [in|out] powDesc descriptor of tensor
- * @param [in] dataType data type in device
- * @param [in] paramCnt number of parameters
- * @return ccStatus_t
- */
- ccStatus_t ccSetNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t nonMaxSuppressionDesc,
- ccDataType_t dataType, uint32_t paramCnt);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of resizeBilinear op.
- * @param [in] xDesc descriptor of input data
- * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
- * @param [out] dimCnt
- * @param [out] dim[] dim of output
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetResizeBilinearOutputDim(const ccTensorDescriptor_t xDesc,
- const ccResizeBilinearDescriptor_t resizeBilinearDesc, int32_t *dimCnt,
- int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of interp op.
- * @param [in] xDesc descriptor of input data
- * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
- * @param [out] dimCnt
- * @param [out] dim[] dim of output
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetInterpOutputDim(const ccTensorDescriptor_t xDesc, const ccResizeBilinearDescriptor_t resizeBilinearDesc,
- int32_t *dimCnt, int32_t dim[], int32_t dimLen);
- /**
- * @ingroup dnn
- * @brief resize bilinear forward for t network.
- * @param [in] handle cce handle
- * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] yDesc descriptor of output data
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccResizeBilinearForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc,
- const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief resize bilinear forward for c network.
- * @param [in] handle cce handle
- * @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] yDesc descriptor of output data
- * @param [in|out] y output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccInterpForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of ResizeBilinear
- * @param [in|out] resizeBilinearDesc point to descriptor of resizeBilinear attr
- * @return ccStatus_t
- */
- ccStatus_t ccCreateResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of Interp
- * @param [in|out] resizeBilinearDesc point to descriptor of resizeBilinear attr
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc);
-
- /**
- * @ingroup dnn
- * @brief set descriptor of resizeBilinear.
- * @param [in|out] resizeBilinearDesc descriptor of resize_bilinear operator
- * @param [in] resizeOutputDimMode way to decide output dimensions
- * @param [in] alignCorners whether the centers of input and output are aligned
- * @param [in] zoom_factor zoom factor
- * @param [in] shrink_factor shrink factor
- * @param [in] height height of output
- * @param [in] width width of output
- * @param [in] pad_begin padding at begin of input
- * @param [in] pad_end padding at end of input
- * @return ccStatus_t
- */
- ccStatus_t ccSetResizeBilinearDescriptor(ccResizeBilinearDescriptor_t resizeBilinearDesc,
- ccResizeOutputDimMode_t resizeOutputDimMode, bool alignCorners,
- int32_t zoom_factor, int32_t shrink_factor, int32_t height, int32_t width,
- int32_t pad_begin, int32_t pad_end);
-
- /**
- * @ingroup dnn
- * @brief fill forward computation
- * @param [in] handle cce handle
- * @param [in] fillParamDesc descriptor of fill parameter
- * @param [in] alpha reserved
- * @param [in] givenDesc descriptor of given tensor
- * @param [in] givenData given data in device memory
- * @param [in] workspace space for fill algorithm
- * @param [in] workSpaceSizeInBytes space size in byte
- * @param [in] beta reserved
- * @param [in] outputDesc descriptor of output tensor
- * @param [out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccFillForward(ccHandle_t handle, const ccFillParamDescriptor_t fillParamDesc, const void *alpha,
- const ccTensorDescriptor_t givenDesc, const void *givenData, const void *workspace,
- const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
- void *output);
-
- /**
- * @ingroup dnn
- *[ccGetFillWorkspaceSize]
- *@param fillType [fill type]
- *@param givenDesc [given tensor descriptor]
- *@param xDesc [input tensor descriptor]
- *@param sizeInBytes [output size]
- *@return ccStatus_t [status]
- */
- ccStatus_t ccGetFillWorkspaceSize(const ccFillOpType_t fillType, const ccTensorDescriptor_t xDesc,
- uint32_t *sizeInBytes);
-
- /**
- *[ccCast]
- *@param handle [cce handler]
- *@param alpha [alpha]
- *@param xDesc [tensor Description of tensor x]
- *@param x [input tensor x]
- *@param beta [beta
- *@param yDesc [tensor Description of tensor y]
- *@param y [output tensor y]
- *@return ccStatus_t [status]
- */
- ccStatus_t ccCast(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief round forward:
- * data type only support float float16 and int32
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccRoundForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief rint forward:
- * data type only support float float16
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccRintForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief sqrt forward:
- * data type only support float float16
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccSqrtForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- *[ccCast]
- *@param filterSrcInfo [cce filtersrc descriptor]
- *@param filterSrc [filterSrc address]
- *@param filterDstInfo [cce filterdst descriptor]
- *@param filterDst [filterdst address]
- *@param group [group]
- *@param ySizeInBytes [fraczfilter size]
- *@param outputDataType [datatype]
- *@return ccStatus_t [status]
- */
- ccStatus_t ccTransGroupConvFilterInt8(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
- ccFilterDescriptor_t filterDstInfo, void *filterDst, uint32_t group,
- uint32_t ySizeInBytes, ccDataType_t outputDataType);
-
- /**
- *[ccGetConcatOutputDim]
- *@param xDesc[] [input tensor descriptor]
- *@param axis [concat axis]
- *@param inputNum [input tensor numbers]
- *@param dim[] [output dim]
- *@param [in| dimlen length of dim
- *@return ccStatus_t [status]
- */
- ccStatus_t ccGetConcatOutputDim(const ccTensorDescriptor_t xDesc[], int32_t axis, int32_t inputNum, int32_t *dimCnt,
- int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of reduce.
- * @param [in] xDesc descriptor of input tensor
- * @param [in] axis The dimensions to reduce
- * @param [in] keepDims If true, retains reduced dimensions with length 1.
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetReduceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *axis, bool keepDims,
- int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief reduce sum forward computation
- * @param [in] handle cce handle
- * @param [in] axis The dimensions to reduce
- * @param [in] keepDims If true, retains reduced dimensions with length 1.
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccReduceSumForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief reduce max forward computation
- * @param [in] handle cce handle
- * @param [in] axis The dimensions to reduce
- * @param [in] keepDims If true, retains reduced dimensions with length 1.
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccReduceMaxForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief reduce min forward computation
- * @param [in] handle cce handle
- * @param [in] axis The dimensions to reduce
- * @param [in] keepDims If true, retains reduced dimensions with length 1.
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccReduceMinForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief reduce mean forward computation
- * @param [in] handle cce handle
- * @param [in] axis The dimensions to reduce
- * @param [in] keepDims If true, retains reduced dimensions with length 1.
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccReduceMeanForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief reduce prod forward computation
- * @param [in] handle cce handle
- * @param [in] axis The dimensions to reduce
- * @param [in] keepDims If true, retains reduced dimensions with length 1.
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccReduceProdForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief reduce all forward computation
- * @param [in] handle cce handle
- * @param [in] axis The dimensions to reduce
- * @param [in] keepDims If true, retains reduced dimensions with length 1.
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccReduceAllForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- *@brief print times stats
- *@return ccStatus_t [status]
- */
- ccStatus_t ccPrintTimeStat();
-
- /**
- * @ingroup dnn
- * @brief reduce abs sum forward computation
- * @param [in] handle cce handle
- * @param [in] axis The dimensions to reduce
- * @param [in] keepDims If true, retains reduced dimensions with length 1.
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccReduceAbsSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief reduce square sum forward computation
- * @param [in] handle cce handle
- * @param [in] axis The dimensions to reduce
- * @param [in] keepDims If true, retains reduced dimensions with length 1.
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccReduceSquareSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of crop and resize
- * @param [in] imageDesc descriptor of images
- * @param [in] boxesDesc descriptor of boxes
- * @param [in] boxidxDesc descriptor of boxidx
- * @param [in] resizeHeight resize height
- * @param [in] resizeWidth resize width
- * @param [out] dimCnt dimcnt of output
- * @param [out] dim dim of output
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetCropAndResizeOutputDim(const ccTensorDescriptor_t imageDesc, const ccTensorDescriptor_t boxesDesc,
- const ccTensorDescriptor_t boxidxDesc, const int32_t resizeHeight,
- const int32_t resizeWidth, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief crop and resize forward.
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] imageDesc descriptor of images
- * @param [in] image input data in device memory
- * @param [in] boxesDesc descriptor of boxes
- * @param [in] boxes input data in device memory
- * @param [in] boxidxDesc descriptor of boxidx
- * @param [in] boxidx input data in device memory
- * @param [in] method enum of resize method
- * @param [in] extrapolationValue Value used for extrapolation, when applicable
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccCropAndResizeForward(ccHandle_t handle, const ccResizeMethod_t method, const float extrapolationValue,
- const void *alpha, const ccTensorDescriptor_t imageDesc, const void *image,
- const ccTensorDescriptor_t boxesDesc, const void *boxes,
- const ccTensorDescriptor_t boxidxDesc, const void *boxidx, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief select forward computation
- * @param [in] handle cce handle
- * @param [in] alpha reserved
- * @param [in] condDesc descriptor of cond tensor
- * @param [in] cond cond data in device memory
- * @param [in] xDesc descriptor of x tensor
- * @param [in] x x data in device memory
- * @param [in] yDesc descriptor of y tensor
- * @param [in] y y data in device memory
- * @param [in] beta reserved
- * @param [in] outputDesc descriptor of output tensor
- * @param [out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccSelect(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t condDesc, const void *cond,
- const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc, const void *y,
- const void *beta, const ccTensorDescriptor_t outDesc, void *out);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of where
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @return ccStatus_t
- */
- ccStatus_t ccGetWhereOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief where forward computation
- * @param [in] handle cce handle
- * @param [in] alpha reserved
- * @param [in] condDesc descriptor of cond tensor
- * @param [in] cond cond data in device memory
- * @param [in] xDesc descriptor of x tensor
- * @param [in] x x data in device memory
- * @param [in] yDesc descriptor of y tensor
- * @param [out] y y data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccWhere(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief reverse forward.
- * @param [in] handle cce handle
- * @param [in] axis dim that need reverse
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccReverseForward(ccHandle_t handle, const ccIntArray_t *axis, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief floor forward:
- * data type only support float float16
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccFloorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief ceil forward:
- * data type only support float float16
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccCeilForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of truncate mod
- * @param [in] xDesc descriptor of input tensor
- * @param [in] yDesc descriptor of input tensor
- * @param [out] dimCnt [dim count of the output tensor]
- * @param [out] dim[] [shape of the output tensor]
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetTruncatemodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc,
- int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief truncate mod forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] yDesc descriptor of input tensor
- * @param [in] y input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccTruncatemodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
- /**
- * @ingroup dnn
- * @brief Spatial Pyramid Pooling
- * @param [in] handle cce handle
- * @param [in] alpha reserved
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] workspace temp workspace
- * @param [in] workspaceSizeInBytes temp workspace size
- * @param [in] pyramidHeight pyramid height
- * @param [in] poolingMode pooling mode
- * @param [in] beta reserved
- * @param [in] outputDesc descriptor of output tensor
- * @param [out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccSPPForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- void *workspace, const uint32_t workspaceSizeInBytes, const uint32_t pyramidHeight,
- const ccPoolingMode_t poolingMode, const void *beta, const ccTensorDescriptor_t outputDesc,
- void *output);
- /**
- * @ingroup dnn
- * @brief Get Spatial Pyramid Pooling output dim
- * @param [in] xDesc descriptor of input tensor
- * @param [in] pyramidHeight pyramid height
- * @param [in] dimLen length of dim
- * @param [out] dimCnt output tensor dim cnt
- * @param [out] dim output tensor dim
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetSPPOutputDim(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight, int32_t *dimCnt,
- int32_t dim[], const int32_t dimLen);
- /**
- * @ingroup dnn
- * @brief Get Spatial Pyramid Pooling workspace size
- * @param [in] xDesc descriptor of input tensor
- * @param [in] pyramidHeight pyramid height
- * @param [out] workspaceSizeInBytes workspace size
- * @return ccStatus_t
- */
- ccStatus_t ccGetSPPWorkspaceSize(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight,
- uint32_t *workspaceSizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief BNLL forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccBNLLForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief bias forward.
- * @param [in] handle cce handle
- * @param [in] axis axis
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data x
- * @param [in] x input data x in device memory
- * @param [in] biasDesc descriptor of input data bias
- * @param [in] bias input data bias in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccBiasForward(ccHandle_t handle, const int axis, const void *alpha, const ccTensorDescriptor_t xDesc,
- const void *x, const ccTensorDescriptor_t biasDesc, const void *bias, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief threshold forward computation
- * @param [in] handle cce handle
- * @param [in] threshold threshold
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccThresholdForward(ccHandle_t handle, const void *threshold, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief shufflechannel forward.
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] group number of groups
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- // TODO AICPU: please add shufflechannel custom params and comment
- ccStatus_t ccShuffleChannelForward(ccHandle_t handle, const void *alpha, uint32_t group,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief mvn forward.
- * @param [in] handle cce handle
- * @param [in] acrossChannel across channel. true: across, false: not
- * @param [in] normalizeVariance normalizeVariance. true: normalizeVariance, false: not
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccMVNForward(ccHandle_t handle, bool acrossChannel, bool normalizeVariance, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, void *workSpace, uint32_t workSpaceSizeInBytes,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get the workspace size of mvn
- * @param [in] xDesc descriptor of input data
- * @param [in] acrossChannel across channel. true: across, false: not
- * @param [in|out] sizeInBytes Workspace size need for whole computation
- */
- ccStatus_t ccGetMVNWorkspaceSize(const ccTensorDescriptor_t xDesc, bool acrossChannel, uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief heatmap2coord forward output is hotspot value and corresponding coordinates
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] coordh calibration high
- * @param [in] coordw calibration wide
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccHeatmap2coordForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- int32_t coordh, int32_t coordw, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
- /**
- * @ingroup dnn
- * @brief get the output dimension info of heatmap2coord
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetHeatmap2coordOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief swish forward.
- * @param [in] handle cce handle
- * @param [in] scale param of swish function, y = x / (1 + sigmoid(scale * x))
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
-
- ccStatus_t ccSwishForward(ccHandle_t handle, const float scale, const void *alpha, const ccTensorDescriptor_t xDesc,
- const void *x, const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- ccStatus_t ccTeForward(ccHandle_t handle, const void *stubFunc, uint32_t coreDim, const void *args, uint32_t argsSize,
- const rtL2Ctrl_t *l2ctrl, int32_t inputNum, const ccTensorDescriptor_t xDesc[], const void *x[],
- int32_t outputNum, const ccTensorDescriptor_t yDesc[], void *y[], bool isAiCore);
-
- #ifndef DAVINCI_LITE
- ccStatus_t ccAiCpuCustomizeForward(ccHandle_t handle, aicpu_run_func stubFunc, opTensor_t *xOpDesc[], void *x[],
- int32_t inputNum, opTensor_t *yOpDesc[], void *y[], void *op_attr_handle,
- int32_t outputNum, const ccTensorDescriptor_t xDesc[],
- const ccTensorDescriptor_t yDesc[], const void *op_attr_str, uint32_t op_attr_size);
- #endif
- /**
- * @ingroup dnn
- * @brief embedding lookup forward.
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data x
- * @param [in] x input data x in device memory
- * @param [in] idxDesc descriptor of input data idx
- * @param [in] idx input data idx in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
- const void *x, const ccTensorDescriptor_t idxDesc, const void *idx,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup
- * @brief embedding lookup forward.
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] inputNum inputNum
- * @param [in] xDesc[] descriptor array of input data x
- * @param [in] x[] input data x array in device memory
- * @param [in] workSpace workSpace addr
- * @param [in] workSpaceSizeInBytes workSpace size
- * @param [in] idxDesc descriptor of input data idx
- * @param [in] idx input data idx in device memory
- * @param [in] partitionStrategy partitionStrategy
- * @param [in] maxNorm addr of maxNorm
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const int32_t inputNum,
- const ccTensorDescriptor_t xDesc[], const void *x[], void *workSpace,
- const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t idxDesc,
- const void *idx, ccPartitionStrategy_t partitionStrategy, const void *maxNorm,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- *[ccGetEmbeddingLookupOutputDim]
- *@param inputNum [input tensor numbers]
- *@param xDesc[] [input tensor descriptor]
- *@param idxDesc [idx tensor descriptor]
- *@param dimCnt [output dim count]
- *@param dim[] [output dim]
- *@param [in| dimlen length of dim
- *@return ccStatus_t [status]
- */
- ccStatus_t ccGetEmbeddingLookupOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[],
- const ccTensorDescriptor_t idxDesc, int32_t *dimCnt, int32_t dim[],
- int32_t dimLen);
-
- /**
- * @ingroup dnn
- *[ccGetEmbeddingLookupWorkspaceSize]
- *@param inputNum [input tensor numbers]
- *@param idxDesc [input tensor descriptor]
- *@param isMaxNormExist [isMaxNormExist]
- *@param sizeInBytes [output size]
- *@return ccStatus_t [status]
- */
- ccStatus_t ccGetEmbeddingLookupWorkspaceSize(const int32_t inputNum, const ccTensorDescriptor_t idxDesc,
- const bool isMaxNormExist, uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief check if it is the first layer of resnet50 and semecefc
- * @param [in] tensorDesc descriptor of input tensor.
- * @param [in] convDesc conv descriptor.
- * @param [in] filterDesc descriptor of weight tensor.
- * @return ccStatus_t
- */
- ccStatus_t c04DescParamCheck(const ccTensorDescriptor_t tensorDesc, const ccConvolutionDescriptor_t convDesc,
- const ccFilterDescriptor_t filterDesc);
-
- #ifndef DAVINCI_LITE
- /**
- * @ingroup dnn
- * @brief convolution forward computation
- * @param [in] handle cce handle
- * @param [in] convDesc descriptor of convolution operator
- * @param [in] alpha scaling factors
- * @param [in] beta scaling factors
- * @param [in] xDesc x descriptor of input tensor
- * @param [in] x x data in device memory
- * @param [in] dyDesc descriptor of dy
- * @param [in] dy dy data in device memory
- * @param [in] dwDesc descriptor of dwDesc
- * @param [out] dw dw data in device memory
- * @param [in] algo algorithm of convolution forward
- * @param [in] workSpace temp space, maybe NULL if no need temp space
- * @param [in] workSpaceSizeInBytes sizeof workspace
- * @return ccStatus_t
- */
- ccStatus_t ccConvolutionBackwardFilter(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, void *alpha,
- void *beta, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t dyDesc, const void *dy,
- const ccFilterDescriptor_t dwDesc, void *dw, ccConvolutionBwdAlgo_t algo,
- void *workSpace, uint32_t workSpaceSizeInBytes);
- #endif
-
- /**
- * @ingroup dnn
- * @brief get the temp space size of convolution forward computation, maybe no need temp space
- * @param [in] handle cce handle
- * @param [in] dyDesc descriptor of input tensor dy
- * @param [in] convDesc descriptor of convolution operator
- * @param [in] xDesc descriptor of input tensor
- * @param [in] dwDesc descriptor of filter
- * @param [in] algo algorithm of convolution forward
- * @param [in|out] sizeInBytes temp space size need for specified algorithm
- * @return ccStatus_t
- */
- ccStatus_t ccGetConvolutionBackwardFilterWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t dyDesc,
- const ccConvolutionDescriptor_t convDesc,
- const ccTensorDescriptor_t xDesc,
- const ccFilterDescriptor_t dwDesc, ccConvolutionBwdAlgo_t algo,
- uint32_t *sizeInBytes);
-
- #ifndef DAVINCI_LITE
- ccStatus_t ccBatchNormalizationBackward(ccHandle_t handle, ccBatchNormMode_t mode, const void *alphaDataDiff,
- const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff,
- const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t dyDesc, const void *dy,
- const ccTensorDescriptor_t dxDesc, void *dx,
- const ccTensorDescriptor_t bnScaleBiasDiffDesc, const void *bnScale,
- void *resultBnScaleDiff, void *resultBnBiasDiff, const void *workSpace,
- const uint32_t workSpaceSizeInBytes, double epsilon, const void *SaveMean,
- const void *SaveInvVariance);
- #endif
-
- ccStatus_t ccGetBatchNormalizationBackwardWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode,
- ccTensorDescriptor_t xDesc, ccTensorDescriptor_t dyDesc,
- ccTensorDescriptor_t dxDesc,
- ccTensorDescriptor_t bnScaleBiasDesc, uint32_t *sizeInBytes);
-
- #ifndef DAVINCI_LITE
- ccStatus_t ccBatchNormalizationForwardTraining(ccHandle_t handle, ccBatchNormMode_t mode, const void *alpha,
- const void *beta, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, void *y,
- const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
- const void *bnBias, double exponentialAverageFactor,
- void *resultRunningMean, void *resultRunningVariance, void *workSpace,
- uint32_t workSpaceSizeInBytes, double epsilon, void *resultSaveMean,
- void *resultSaveInvVariance, const bool isTraining);
- #endif
-
- ccStatus_t ccGetBatchNormalizationForwardTrainingWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode,
- ccTensorDescriptor_t xDesc, ccTensorDescriptor_t yDesc,
- const ccTensorDescriptor_t bnScaleBiasMeanVarDesc,
- uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief generate an random normal Tensor use given on/off scale.
- * @param [in] handle Stream handle.
- * @param [in] alpha reserved.
- * @param [in] meanDesc Mean description of one-hot position.
- * @param [in] mean Data pointer of mean.
- * @param [in] scaleDesc On/off scale description.
- * @param [in] scale Data pointer of on/off scale.
- * @param [in] seed random seed used to generate random number
- * @param [in] seed2 random seed used to generate random number
- * @param [in] beta reserved.
- * @param [in] outputDesc Description of the generated one-hot tensor.
- * @param [output] output Data pointer of output.
- * @return ccStatus_t
- */
- ccStatus_t ccRandomNormalForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t meanDesc,
- const void *mean, const ccTensorDescriptor_t scaleDesc, const void *scale,
- const int64_t seed1, const int64_t seed2, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief generate random uniform tensor.
- * @param [in] handle Stream handle.
- * @param [in] alpha reserved.
- * @param [in] minvalDesc Mean description of one-hot position.
- * @param [in] minval Data pointer of mean.
- * @param [in] maxvalDesc On/off scale description.
- * @param [in] maxval Data pointer of on/off scale.
- * @param [in] seed random seed used to generate random number
- * @param [in] seed2 random seed used to generate random number
- * @param [in] beta reserved.
- * @param [in] outputDesc Description of the generated one-hot tensor.
- * @param [output] output Data pointer of output.
- * @return ccStatus_t
- */
- ccStatus_t ccRandomUniformForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t minvalDesc,
- const void *minval, const ccTensorDescriptor_t maxvalDesc, const void *maxval,
- const int64_t seed1, const int64_t seed2, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**^M
- * @ingroup dnn^M\r 10932
- * @brief generate BatchMatMul tensor.^M\r 10933
- * @param [in] handle Stream handle.^M\r 10934
- * @param [in] alpha reserved.^M\r 10935
- * @param [in] xDesc tensorA Desc.^M\r 10936
- * @param [in] x Data pointer of tensorA.^M\r 10937
- * @param [in] yDesc tensorB Desc.^M\r 10938
- * @param [in] y Data pointer of tensorB.^M\r 10939
- * @param [in] beta reserved.^M\r 10940
- * @param [in] adj_x tensorA transpose flag^M\r 10941
- * @param [in] adj_y tensorB transpose flag^M\r 10942
- * @param [in] outpDesc Description of the tensor output .^M\r 10943
- * @param [output] out Data pointer of output.^M\r 10944
- * @return ccStatus_t^M
- */
- ccStatus_t ccBatchMatMulForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta, const bool adj_x,
- const bool adj_y, const ccTensorDescriptor_t outDesc, void *out);
-
- ccStatus_t ccGetBatchMatMulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, bool adj_x,
- bool adj_y, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief generator conv int8 all offset factor
- * @param [in] para the struct for scale and offset of input, filter and output
- * @param [in|out] offsetW offset of filter
- * @param [in|out] offsetPad offset of input
- * @param [in|out] scaledQrq scale computing result of input , filter and output
- * @param [in|out] nextoffsetq offset of output
- * @return ccStatus_t
- */
- ccStatus_t ccGenQuantAllOffsetFactor(const ccQuantAllOffsetPara_t *para, uint8_t &offsetW, uint8_t &offsetPad,
- uint16_t &scaledQrq, uint16_t &nextoffsetq);
-
- /**
- * @ingroup dnn
- * @brief get conv int8 all offset fracZ size
- * @param [in] filterDesc descriptor of filter tensor
- * @param [in|out] conv int8 all offset fracZ size
- * @param [in] groupNum group conv num
- * @return ccStatus_t
- */
- ccStatus_t ccSetGroupConvScene(const ccFilterDescriptor_t tensorDesc, ccConvolutionDescriptor_t convDesc);
-
- ccStatus_t ccGetInt8AllOffsetFilterFracZSizeInBytes(const ccFilterDescriptor_t filterSrcDesc,
- const ccFilterDescriptor_t filterDesc, uint32_t &size,
- uint32_t groupNum);
-
- /**
- * @ingroup dnn
- * @brief transform filter in conv int8 all offset scene
- * @param [in] filterSrcInfo descriptor of filter tensor before fracZ transform
- * @param [in] filterSrc filter addr before fracZ transform
- * @param [in] filterDstInfo descriptor of filter tensor after fracZ transform
- * @param [in] filterDst filter addr after fracZ transform
- * @param [in] quantPara the struct for scale and offset of input, filter and output
- * @param [in] ySizeInBytes filter size after fracZ transform
- * @param [in|out] outputDataType output data type
- * @param [in] groupNum group conv num
- * @return ccStatus_t
- */
- ccStatus_t ccTransFilterInt8AllOffset(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
- ccFilterDescriptor_t filterDstInfo, void *filterDst,
- const ccQuantAllOffsetPara_t *quantPara, uint32_t ySizeInBytes,
- ccDataType_t outputDataType, uint32_t groupNum);
-
- /**
- * @ingroup dnn
- * @brief transform bias in conv int8 all offset scene
- * @param [in] filterDesc descriptor of filter tensor
- * @param [in] biasDesc descriptor of bias tensor
- * @param [in] quantPara the struct for scale and offset of input, filter and output
- * @param [in] w filter addr
- * @param [in] bias bias addr
- * @return ccStatus_t
- */
- ccStatus_t ccTransInt8AllOffsetBias(const ccFilterDescriptor_t filterDesc, const ccTensorDescriptor_t biasDesc,
- const ccQuantAllOffsetPara_t *quantPara, const void *w, const void *bias);
-
- /**
- * @ingroup dnn
- * @get dequantize
- * @param [in] handle handle id
- * @param [in] alpha alpha addr
- * @param [in] xDesc the input Desc descriptor
- * @param [in] x x data addr
- * @param [in] beta beta data addr
- * @param [in] yDesc the output Desc descriptor
- * @param [in] y y data addr
- * @return ccStatus_t
- */
- ccStatus_t ccDequantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
- const void *x, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
- /**
- * @ingroup dnn
- * @get quantize
- * @param [in] handle handle id
- * @param [in] alpha alpha addr
- * @param [in] xDesc the input Desc descriptor
- * @param [in] x x data addr
- * @param [in] beta beta data addr
- * @param [in] yDesc the output Desc descriptor
- * @param [in] y y data addr
- * @return ccStatus_t
- */
- ccStatus_t ccQuantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t yDesc, void *y);
-
- #ifndef DAVINCI_LITE
- ccStatus_t ccActivationBackward(ccHandle_t handle, const ccActivationDescriptor_t activationDesc, const void *alpha,
- const ccTensorDescriptor_t dyDesc, const void *dy, const ccTensorDescriptor_t xDesc,
- const void *x, const void *beta, const ccTensorDescriptor_t dxDesc, void *dx);
- #endif
-
- ccStatus_t ccL2LossForward(ccHandle_t handle, const ccL2LossDescriptor_t l2lossDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of top k v2
- * @param [in] xDesc descriptor of input tensor x
- * @param [in] yDesc descriptor of input tensor y
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetTopKV2OutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc, const void *k,
- const int64_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief top k v2 forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor x
- * @param [in] x input data x in device memory
- * @param [in] yDesc descriptor of input tensor y
- * @param [in] y input data y in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccTopKV2Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t kDesc, const void *k, const void *beta, const bool sorted,
- const int64_t axis, void *workSpace, const uint32_t workSpaceSizeInBytes,
- const ccTensorDescriptor_t outputValuesDesc, void *outputValues,
- const ccTensorDescriptor_t outputIndicesDesc, void *outputIndices);
-
- /**
- * @ingroup dnn
- * @brief get the workspace size of top k v2
- * @param [in] xDesc descriptor of input tensor x
- * @param [in] yDesc descriptor of input tensor y
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] sizeInBytes point to workspace size
- * @return ccStatus_t
- */
- ccStatus_t ccGetTopKV2ForwardWorkspaceSize(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc,
- const ccTensorDescriptor_t indiceDesc, const void *k, const int64_t axis,
- uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief Get unsorted segment reduction output dim
- * @param [in] xDesc descriptor of input tensor
- * @param [in] segmentIdsDesc descriptor of input segmentIds tensor
- * @param [in] segmentsNum output slice num
- * @param [out] dimCnt output tensor dim cnt
- * @param [out] dim output tensor dim
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetUnsortedSegmentReductionOutputDim(const ccTensorDescriptor_t xDesc,
- const ccTensorDescriptor_t segmentIdsDesc, int32_t segmentsNum,
- int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief reduce all forward computation
- * @param [in] handle cce handle
- * @param [in] segmentsNum output slice num
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] segmentIdsDesc descriptor of input segmentIds tensor
- * @param [in] x input segmentIds data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccUnsortedSegmentSumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
- const void *x, const ccTensorDescriptor_t segmentIdsDesc, const void *segmentIds,
- const int32_t segmentsNum, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief reverse sequence forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor x
- * @param [in] x input data x in device memory
- * @param [in] yDesc descriptor of input tensor y
- * @param [in] y input data y in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccReverseSequenceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t inputDesc,
- const void *input, const ccTensorDescriptor_t seqLengthsDesc,
- const void *seqLengths, int64_t seqAxis, int64_t batchAxis, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief realdiv between two tensors.
- * @param [in] alpha reserved.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] x data point of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [in] y data point of the right operator tensor.
- * @param [in] beta reserved.
- * @param [in] outputDesc description of the output tensor.
- * @param [output] output data point of the output tensor.
- * @return ccStatus_t
- */
-
- ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get output shape of realdiv.
- * @param [in] xDesc description of the left operator tensor.
- * @param [in] yDesc description of the right operator tensor.
- * @param [out] dimCnt output tensor dim cnt
- * @param [out] dim output tensor dim
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief invert permutation forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccInvertPermutationForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
- const void *x, const void *beta, const ccTensorDescriptor_t outputDesc,
- void *output);
-
- /**
- * @ingroup dnn
- * @brief get the workspace size of non max suppression
- * @param [in] handle descriptor of handle
- * @param [in] scoresDesc descriptor of input tensor scoresDesc
- * @param [in] boxesDesc descriptor of input tensor boxesDesc
- * @param [in|out] sizeInBytes point to workspace size
- * @return ccStatus_t
- */
- ccStatus_t ccGetNonMaxSuppressionWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t scoresDesc,
- const ccTensorDescriptor_t boxesDesc, uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief get the output dim of non max suppression
- * @param [in] scoresDesc descriptor of input tensor scoresDesc
- * @param [in] maxOutPutSize the max size of output
- * @param [in|out] dimCnt point to the count of dim
- * @param [in|out] dim[] the array of output dim
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetNonMaxSuppressionOutputDim(const ccTensorDescriptor_t scoresDesc, const int32_t maxOutPutSize,
- int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief multinomial forward.
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] numSamples number of independent samples to draw for each row slice
- * @param [in] seed1 sed to create a random seed for the distribution
- * @param [in] seed2 sed to create a random seed for the distribution
- * @param [in] workSpace work space for inter access
- * @param [in] workSpaceSizeInBytes work space size
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccMultinomialForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- int32_t numSamples, int64_t seed1, int64_t seed2, void *workSpace,
- uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
- void *output);
- /**
- * @ingroup dnn
- * @brief get output dim of generated one-hot tensor.
- * @param [in] indicesDesc Indices description of one-hot position.
- * @param [in] depth On/off value description.
- * @param [in] axis Data pointer of on/off value.
- * @param [output] dimCnt Description of the generated one-hot tensor.
- * @param [output] dim Data pointer of output.
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetOneHotOutputDim(const ccTensorDescriptor_t indicesDesc, int32_t depth, int32_t axis, int32_t *dimCnt,
- int32_t *dim, int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief generate an one-hot Tensor use given on/off value.
- * @param [in] handle Stream handle.
- * @param [in] alpha reserved.
- * @param [in] indicesDesc Indices description of one-hot position.
- * @param [in] indices Data pointer of indices.
- * @param [in] onDesc On value description.
- * @param [in] on Data pointer of on value.
- * @param [in] offDesc Off value description.
- * @param [in] off Data pointer of off value.
- * @param [in] depth On/off value description.
- * @param [in] axis Data pointer of on/off value.
- * @param [in] beta reserved.
- * @param [in] outputDesc Description of the generated one-hot tensor.
- * @param [output] output Data pointer of output.
- * @return ccStatus_t
- */
- ccStatus_t ccOneHotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t indicesDesc,
- const void *indices, const ccTensorDescriptor_t onDesc, const void *on,
- const ccTensorDescriptor_t offDesc, const void *off, const int32_t depth, const int32_t axis,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
- /**
- * @ingroup dnn
- * @brief get the workspaceSize of multinomial
- * @param [in] xDesc descriptor of input tensor
- * @param [in] numSamples number sample
- * @param [out] sizeInBytes wor space size of byte
- * @return ccStatus_t
- */
- ccStatus_t ccGetMultinomialWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
- /**
- * @ingroup dnn
- * @brief get the output dimension info of multinomial
- * @param [in] xDesc descriptor of input tensor
- * @param [in] numSample number of independent samples to draw for each row slice
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetMultinomialOutputDim(const ccTensorDescriptor_t xDesc, int32_t numSample, int32_t *dimCnt,
- int32_t dim[], int32_t dimLen);
- /**
- * @ingroup dnn
- * @brief get the output dimension info of BiasAddBackward
- * @param [in] dyDesc descriptor of input tensor
- * @param [in] out] n outputTensor [N]CHW
- * @param [in|out] c outputTensor N[C]HW
- * @param [in|out] h outputTensor NC[H]W
- * @param [in|out] w outputTensor NCH[W]
- * @return ccStatus_t
- */
- ccStatus_t ccGetBiasAddBackwardOutputDim(const ccTensorDescriptor_t dyDesc, int32_t *n, int32_t *c, int32_t *h,
- int32_t *w);
-
- /**
- * @ingroup dnn
- * @brief biasadd backward.
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] dyDesc descriptor of input data
- * @param [in] dy input data in device memory
- * @param [in] beta common scale factor
- * @param [in] dbDesc descriptor of output data
- * @param [in|out] db output data in device memory
- * @return ccStatus_t
- */
- #ifndef DAVINCI_LITE
- ccStatus_t ccBiasAddBackward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t dyDesc, const void *dy,
- const void *beta, const ccTensorDescriptor_t dbDesc, void *db);
-
- ccStatus_t ccMaxPoolWithArgmaxForward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t yDesc, void *y, const ccTensorDescriptor_t argMaskDesc,
- void *argMask);
- #endif
-
- ccStatus_t ccCreatePoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc);
-
- ccStatus_t ccDestroyPoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc);
-
- ccStatus_t ccSetPoolingMaskTensorDescriptor(ccTensorDescriptor_t poolingMaskDesc, ccTensorFormat_t format,
- ccDataType_t dataType, int32_t n, int32_t c, int32_t h, int32_t w,
- int32_t windowH, int32_t windowW);
-
- ccStatus_t ccGetPoolingMaskTensorSizeInBytes(ccTensorDescriptor_t poolingMaskDesc, uint32_t *size);
-
- /**
- * @ingroup dnn
- * @brief get the mask output dimension info of maxpooling training forward
- * @param [in] pooling descriptor of convolution operator
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] n point to batch size
- * @param [in|out] c point to channels
- * @param [in|out] h point to height of feature map
- * @param [in|out] w point to width of feature map
- * @param [in|out] windowH point to height of window
- * @param [in|out] windowW point to width of windowW
- * @return ccStatus_t
- */
- ccStatus_t ccGetPoolingMaskDim(const ccPoolingDescriptor_t poolingDesc, const ccTensorDescriptor_t xDesc, int32_t *n,
- int32_t *c, int32_t *h, int32_t *w, int32_t *windowH, int32_t *windowW);
-
- #ifndef DAVINCI_LITE
- ccStatus_t ccSoftmaxCrossEntropyLoss(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode,
- ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale,
- const ccTensorDescriptor_t logitsDesc, const void *logits,
- const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth,
- const void *beta, const ccTensorDescriptor_t lossDesc, void *loss);
-
- ccStatus_t ccSoftmaxCrossEntropyDx(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode,
- ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale,
- const ccTensorDescriptor_t logitsDesc, const void *logits,
- const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth,
- const void *beta, const ccTensorDescriptor_t dxDesc, void *dx);
-
- ccStatus_t ccAvgPoolingBackward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
- const ccTensorDescriptor_t dyDesc, const void *dy, const void *beta,
- const ccTensorDescriptor_t dxDesc, const void *dx);
-
- ccStatus_t ccTrainingAssignOp(ccHandle_t handle, const ccAssignOpMode_t assignOpDesc, const void *alpha,
- const void *beta, const ccTensorDescriptor_t aDesc, void *a,
- const ccTensorDescriptor_t bDesc, const void *b);
-
- /**
- * @ingroup dnn
- * @brief momentum optimizer for variable update
- * @param [in] handle cce handle
- * @param [in] inputDesc descriptor of input tensor: gradient,accumulation,variable
- * @param [in] gradient gradient input
- * @param [in|out] accumulation accumulation input and updated output
- * @param [in|out] variable variable input and updated output
- * @param [in] algo indicate whether need FP16 output
- * @param [in] momentum scaler to control accumulation
- * @param [in] learningRate scaler
- * @param [in] lossScaleReciprocal scaler
- * @param [in] workSpace additional memory address
- * @param [in] workSpaceSizeInBytes additional memory size
- * @param [out] variableUpdatedFP16Desc descriptor of FP16 output tensor: variableUpdatedFP16
- * @param [out] variableUpdatedFP16 variableUpdatedFP16
- * @return ccStatus_t
- */
- ccStatus_t ccApplyMomentum(ccHandle_t handle, const ccTensorDescriptor_t inputDesc, const void *gradient,
- void *accumulation, void *variable, const ccMomentumAlgo_t algo, const void *momentum,
- const void *learningRate, const void *lossScaleReciprocal, void *workSpace,
- const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t variableUpdatedFP16Desc,
- void *variableUpdatedFP16);
-
- ccStatus_t ccSsdClassifyLossTrain(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t labelDesc,
- const void *label, const ccTensorDescriptor_t greaterConstDesc,
- const void *greaterConst, const ccTensorDescriptor_t subConstDesc,
- const void *subConst, const ccTensorDescriptor_t sparseDesc, const void *sparse,
- const void *beta, const ccTensorDescriptor_t castoutDesc, const void *castout,
- const ccTensorDescriptor_t muloutDesc, const void *mulout);
-
- #endif
-
- /**
- * @ingroup dnn
- * @brief get the workspace size of applymomentum
- * @param [in] inputDesc descriptor of input tensor
- * @return ccStatus_t
- */
- ccStatus_t ccGetApplyMomentumWorkspaceSize(const ccTensorDescriptor_t inputDesc, uint32_t *sizeInBytes);
- #ifndef DAVINCI_LITE
- ccStatus_t ccHwck2FracZ(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x,
- const ccFilterDescriptor_t yDesc, void *y);
-
- ccStatus_t ccFracZ2Hwck(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x,
- const ccFilterDescriptor_t yDesc, void *y);
- ccStatus_t ccAddNForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const int32_t inputNum,
- const void *x[], const void *beta, void *workSpace, uint32_t workSpaceSizeInBytes,
- const ccTensorDescriptor_t yDesc, void *y);
- #endif
- ccStatus_t ccGetAddNForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc, const int32_t inputNum,
- const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
- ccStatus_t ccGetAddNForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
- ccStatus_t ccAddTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
- uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
- ccStatus_t ccGetAddTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
- const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
- uint32_t *sizeInBytes);
- ccStatus_t ccGetAddTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
- int32_t *dimCnt, int32_t dim[], int32_t dimLen);
- ccStatus_t ccMulTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
- uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
- ccStatus_t ccGetMulTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
- const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
- uint32_t *sizeInBytes);
- ccStatus_t ccGetMulTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
- int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief get workspace size
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] sizeInBytes workspace size
- * @return ccStatus_t
- */
- ccStatus_t ccGetRandomShuffleWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief random shuffle forward computation
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] workspace temporary space
- * @param [in] workspaceSizeInBytes temporary space size
- * @param [in] seed random seed used to generate random number
- * @param [in] seed2 random seed used to generate random number
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccRandomShuffleForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- void *workspace, const uint32_t workspaceSizeInBytes, const int64_t seed1,
- const int64_t seed2, const void *beta, const ccTensorDescriptor_t outputDesc,
- void *output);
- /**
- * @ingroup dnn
- * @brief sin forward:
- * data type only support float float16 double
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] input input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccSinForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief cos forward:
- * data type only support float float16 double
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] input input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccCosForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief tan forward:
- * data type only support float float16 double
- * data format only support ND
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] input input data in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccTanForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of unstack
- * @param [in] xDesc descriptor of input tensor
- * @param [in] axis the axis to unstack along
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetUnstackOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, int32_t *dimCnt, int32_t dim[],
- int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief unstack forward.
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data
- * @param [in] x input data in device memory
- * @param [in] num the length of the dimension axis
- * @param [in] axis the axis to unstack along
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
-
- ccStatus_t ccUnstackForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- int32_t num, int32_t axis, const void *beta, const ccTensorDescriptor_t outputDesc,
- void *output[]);
-
- ccStatus_t ccResizeNearestNeighborCpuForward(ccHandle_t handle, const ccResizeNearestNeighborDescriptor_t resizeDesc,
- const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
- /**
- * @ingroup dnn
- * @brief get the output dimension info of resize nearest neighbor
- * @param [in] resizeDesc descriptor of resize
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetResizeNearestNeighborOutputDim(const ccResizeNearestNeighborDescriptor_t resizeDesc,
- const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t dim[],
- int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of ResizeNearestNeighbor
- * @param [in|out] resizeDesc point to descriptor of ResizeNearestNeighbor attr
- * @return ccStatus_t
- */
- ccStatus_t ccCreateResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of ResizeNearestNeighbor
- * @param [in|out] resizeDesc point to descriptor of ResizeNearestNeighbor attr
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc);
-
- /**
- * @ingroup dnn
- * @brief set descriptor of ResizeNearestNeighbor.
- * @param [in|out] resizeDesc descriptor of resize nearest neighbor operator
- * @param [in] alignCorners whether the centers of input and output are aligned
- * @param [in] height height of output
- * @param [in] width width of output
- * @return ccStatus_t
- */
- ccStatus_t ccSetResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t resizeDesc, bool alignCorners,
- int32_t height, int32_t width);
-
- /**
- * @ingroup dnn
- * [ccGetPadV2OutputDim]
- * @brief get the output dimension info of pad
- * @param [in] xDesc descriptor of input tensor x
- * @param [in] padDesc descriptor of input paddings
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetPadV2OutputDim(const ccTensorDescriptor_t xDesc, const ccPadV2Descriptor_t padDesc, int32_t *dimCnt,
- int32_t dim[], int32_t dimLen);
-
- ccStatus_t ccPadV2CpuForward(ccHandle_t handle, const ccPadV2Descriptor_t padDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of parameters for padv2 function
- * @param [in] point to descriptor of parameters for padv2 function
- * @return ccStatus_t
- */
- ccStatus_t ccCreatePadV2Descriptor(ccPadV2Descriptor_t *padDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of parameters for padv2 function
- * @param [in] point to descriptor of parameters for padv2 function
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyPadV2Descriptor(ccPadV2Descriptor_t *padDesc);
-
- /**
- * @brief init descriptor for parameter of padv2 function
- * @param [in|out] padDesc descriptor of pad
- * @param [in] padShapeCnt padshape count
- * @param [in] padShapeLow padshape low
- * @param [in] padShapeHigh padshape high
- * @param [in] padMode pad mode
- * @param [in] padValue pad value ptr
- * @param [in] padValueType pad value data type
- * @return ccStatus_t
- */
- ccStatus_t ccSetPadV2Descriptor(ccPadV2Descriptor_t padDesc, const int32_t padShapeCnt, const int32_t padShapeLow[],
- const int32_t padShapeHigh[], const ccPadMode_t padMode, const void *padValue,
- const ccDataType_t padValueType);
- /**
- * @ingroup dnn
- * @brief create descriptor of batchToSpace
- * @param [in|out] batchToSpaceDesc point to descriptor of batchToSpace
- * @return ccStatus_t
- */
- ccStatus_t ccCreateBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc);
-
- /**
- * @ingroup dnn
- * @brief set batchToSpaceDesc
- * @param [in|out] batchToSpaceDesc descriptor of batchToSpace
- * @param [in] blockShape blockShape of batchToSpace
- * @param [in] crops crops of batchToSpace
- * @param [in] blockShapeLength blockShapeLength of batchToSpace
- * @return ccStatus_t
- */
- ccStatus_t ccSetBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t paramsDesc, const int32_t *blockShape,
- const int32_t *crops, const int32_t blockShapeLength);
-
- /**
- * @ingroup dnn
- * @brief get batchToSpaceDesc
- * @param [in|out] batchToSpaceDesc descriptor of batchToSpace
- * @param [in] blockShape blockShape of batchToSpace
- * @param [in] crops crops of batchToSpace
- * @param [in] blockShapeLength blockShapeLength of batchToSpace
- * @return ccStatus_t
- */
- ccStatus_t ccGetBatchToSpaceDescriptor(const ccBatchToSpaceDescriptor_t paramsDesc, int32_t *blockShape, int32_t *crops,
- int32_t *blockShapeLength);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of batchToSpace
- * @param [in] *batchToSpaceDesc descriptor of batchToSpace
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of batch to space
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
-
- ccStatus_t ccGetBatchToSpaceOutputDim(const ccTensorDescriptor_t xDesc,
- const ccBatchToSpaceDescriptor_t batchToSpaceDesc, int32_t *dimCnt, int32_t dim[],
- int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief batch to space forward computation
- * @param [in] handle cce handle
- * @param [in] paramsDesc descriptor of input params
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
-
- ccStatus_t ccBatchToSpaceForward(ccHandle_t handle, const ccBatchToSpaceDescriptor_t paramsDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of spaceToBatch
- * @param [in|out] spaceToBatchDesc point to descriptor of spaceToBatch
- * @return ccStatus_t
- */
- ccStatus_t ccCreateSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc);
-
- /**
- * @ingroup dnn
- * @brief set spaceToBatchDesc
- * @param [in|out] spaceToBatchDesc descriptor of spaceToBatch
- * @param [in] blockShape blockShape of spaceToBatch
- * @param [in] paddings paddings of spaceToBatch
- * @param [in] blockShapeLength blockShapeLength of spaceToBatch
- * @return ccStatus_t
- */
- ccStatus_t ccSetSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t paramsDesc, const int32_t *blockShape,
- const int32_t *paddings, const int32_t blockShapeLength);
-
- /**
- * @ingroup dnn
- * @brief get spaceToBatchDesc
- * @param [in|out] spaceToBatchDesc descriptor of spaceToBatch
- * @param [in] blockShape blockShape of spaceToBatch
- * @param [in] paddings paddings of spaceToBatch
- * @param [in] blockShapeLength blockShapeLength of spaceToBatch
- * @return ccStatus_t
- */
- ccStatus_t ccGetSpaceToBatchDescriptor(const ccSpaceToBatchDescriptor_t paramsDesc, int32_t *blockShape,
- int32_t *paddings, int32_t *blockShapeLength);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of spaceToBatch
- * @param [in] *spaceToBatchDesc descriptor of spaceToBatch
- * @return ccStatus_t
- */
- ccStatus_t ccDestroySpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of space to batch
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
-
- ccStatus_t ccGetSpaceToBatchOutputDim(const ccTensorDescriptor_t xDesc,
- const ccSpaceToBatchDescriptor_t spaceToBatchDesc, int32_t *dimCnt, int32_t dim[],
- int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief space to batch forward computation
- * @param [in] handle cce handle
- * @param [in] paramsDesc descriptor of input params
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
-
- ccStatus_t ccSpaceToBatchForward(ccHandle_t handle, const ccSpaceToBatchDescriptor_t paramsDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- ccStatus_t ccTransFilterDesc2TensorDesc(ccFilterDescriptor_t wDesc, ccTensorDescriptor_t tensorDesc);
-
- /*
- * @brief get the output dimension info of extractImagePatches
- * @param [in] xDesc descriptor of input tensor x
- * @param [in] ksizes ksizes array
- * @param [in] strides strides array
- * @param [in] rates rates array
- * @param [in] padding padding type
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @return ccStatus_t
- */
- ccStatus_t ccGetExtractImagePatchesOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *ksizes,
- const ccIntArray_t *strides, const ccIntArray_t *rates,
- const ccExtractImagePatchesPadType_t padding, int32_t *dimCnt,
- int32_t dim[], const int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief cum forward.
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data, dimCnt:1~8
- * @param [in] x input data in device memory
- * @param [in] axisDesc scale factor, dimCnt:0
- * @param [in] axis which axis to cum calc, device memory
- * @param [in] beta common scale factor
- * @param [in] opType calc type, eg. sum, prod....
- * @param [in] exclusive cum flag, true or false
- * @param [in] reverse cum flag, true or false
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccCumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t axisDesc, const void *axis, const void *beta, const CumOpType opType,
- const bool exclusive, const bool reverse, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @ingroup dnn
- * @brief ExtractImagePatches forward.
- * @param [in] handle cce handle
- * @param [in] ksizes ksizes array
- * @param [in] strides strides array
- * @param [in] rates rates array
- * @param [in] padding padding type
- * @param [in] alpha common scale factor
- * @param [in] xDesc descriptor of input data x
- * @param [in] x input data x in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccExtractImagePatchesForward(ccHandle_t handle, const ccIntArray_t *ksizes, const ccIntArray_t *strides,
- const ccIntArray_t *rates, const ccExtractImagePatchesPadType_t padding,
- const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @brief get argmax output dim info
- * @param [in] argDesc argmaxmin descriptor
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] dimCnt output dim count
- * @param [in|out] dim output dim
- * @param [in| dimlen length of dim
- * @return ccStatus_t
- */
- ccStatus_t ccGetArgMaxOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
- int32_t *dimCnt, int32_t dim[], int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief argmax forward computation
- * @param [in] handle cce handle
- * @param [in] argDesc argmaxmin descriptor
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] workSpace workspace pointer
- * @param [in] workSpaceSizeInBytes workspace size in bytes
- * @param [in] beta bias factors
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccArgMaxForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
- const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
- void *output);
-
- /**
- * @ingroup dnn
- * @brief get the output dimension info of argmaxmin
- * @param [in] argDesc descriptor of tagCcArgmaxmin
- * @param [in] xDesc descriptor of input tensor
- * @param [in|out] sizeInBytes workspace size
- * @return ccStatus_t
- */
- ccStatus_t ccGetArgMaxWorkspaceSize(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
- uint32_t *sizeInBytes);
-
- /**
- * @ingroup dnn
- * @brief create descriptor of Argmaxmin
- * @param [in|out] resizeDesc point to descriptor of Argmaxmin attr
- * @return ccStatus_t
- */
- ccStatus_t ccCreateArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of Interp
- * @param [in|out] resizeDesc point to descriptor of Argmaxmin attr
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc);
-
- /**
- * @ingroup dnn
- * @brief destroy descriptor of Interp
- * @param [in|out] argDesc descriptor of tagCcArgmaxmin
- * @param [in] axisType
- * @param [in] outMaxVal whether to return the maximum value
- * @param [in] topK number that returns the maximum index or maximum value
- * @param [in] axis Describes which axis of the input Tensor to reduce across
- * @param [in] keepDims whether to keep reduced dim
- * @param [in] reduceSize the num of elements to be reduce to get topK elements, reduceSize=-1 means the total num
- * of elements in axis dimension
- * @param [in] reduceStride the stride for reduce operation, reduceStride=1 means the layout of target data is
- * continuous
- * @return ccStatus_t
- */
- ccStatus_t ccSetArgmaxminDescriptor(ccArgmaxminDescriptor_t argDesc, int32_t axisType, bool outMaxVal, int64_t topK,
- int64_t axis, bool keepDims, int64_t reduceSize = -1, int64_t reduceDStride = 1);
-
- ccStatus_t ccArgMinForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha,
- const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- ccStatus_t ccGetArgMinOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
- int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
- /**
- * @ingroup dnn
- * @brief lsh projection forward computation
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] hashDesc descriptor of input tensor hashDesc
- * @param [in] hash input data hash in device memory
- * @param [in] weightDesc descriptor of input tensor weightDesc
- * @param [in] weight input data weight in device memory
- * @param [in] inputDesc descriptor of input tensor inputDesc
- * @param [in] lookup input data lookup in device memory
- * @param [in] type 1:SPARSE 2.DENSE
- * @param [in] beta bias factors
- * @param [in] workSpace workSpace data in device memory
- * @param [in] workSpaceSizeInBytes workSpace length
- * @param [in] outputDesc descriptor of output tensor
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccLshProjectionForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t hashDesc,
- const void *hash, const ccTensorDescriptor_t weightDesc, const void *weight,
- const ccTensorDescriptor_t inputDesc, const void *input, const LSHProjectionType type,
- const void *beta, void *workSpace, const uint32_t workSpaceSizeInBytes,
- const ccTensorDescriptor_t outputDesc, void *output);
- /**
- * @ingroup dnn
- * @brief get the workspace size of lsh projection
- * @param [in] inputDesc descriptor of input tensor input
- * @param [in] hashDataType data type of hash
- * @param [in|out] sizeInBytes workspace size
- * @return ccStatus_t
- */
- ccStatus_t ccGetLshProjectionForwardWorkspaceSize(const ccTensorDescriptor_t inputDesc, const ccDataType_t hashDataType,
- uint32_t *sizeInBytes);
- /**
- * @ingroup dnn
- * @brief get the output dimension info of LshProjection,
- * @param [in] hashDesc descriptor of hash
- * @param [in] type type of mode
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in] dimLen dim length
- * @return ccStatus_t
- */
- ccStatus_t ccGetLshProjectionOutputDim(const ccTensorDescriptor_t hashDesc, const LSHProjectionType type,
- int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
- /**
- * @ingroup dnn
- * @brief get the weight dimension info of LshProjection,
- * @param [in] inputDesc descriptor of input
- * @param [in|out] dimCnt point to the weight dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in] dimLen dim length
- * @return ccStatus_t
- */
- ccStatus_t ccGetLshProjectionWeightDim(const ccTensorDescriptor_t inputDesc, int32_t *dimCnt, int32_t dim[],
- const int32_t dimLen);
-
- /**
- * @ingroup dnn
- * @brief init descriptor for parameter of upsample function
- * @param [in] handle cce handle
- * @param [in] upsamplePara input para in host memory
- * @param [in] alpha common scale factor
- * @param [in] bottomDesc descriptor of input data bottomDesc
- * @param [in] bottom input data bottom in device memory
- * @param [in] bottomMaskDesc descriptor of input data bottomMaskDesc
- * @param [in] bottomMask input data bottomMask in device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor of output data
- * @param [in|out] output output data in device memory
- * @return ccStatus_t
- */
- ccStatus_t ccUpsampleForward(ccHandle_t handle, const ccUpsampleParaDescriptor_t upsamplePara, const void *alpha,
- const ccTensorDescriptor_t bottomDesc, const void *bottom,
- const ccTensorDescriptor_t bottomMaskDesc, const void *bottomMask, const void *beta,
- const ccTensorDescriptor_t outputDesc, void *output);
-
- /**
- * @brief creat descriptor for parameter of usample function
- * @param [in|out] upsampleDesc descriptor of upsamplepara
- * @return ccStatus_t
- */
- ccStatus_t ccCreateUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc);
-
- /**
- * @brief destroy descriptor for parameter of upsample function
- * @param [in|out] upsampleDesc descriptor of upsamplepara
- * @return ccStatus_t
- */
- ccStatus_t ccDestroyUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc);
-
- /**
- * @brief set descriptor for parameter of upsample function
- * @param [in|out] upsampleDesc descriptor of upsamplepara
- * @param [in] scale the scale of height and width
- * @param [in] scaleHeight the scale of height
- * @param [in] scaleWidth the scale of Width
- * @param [in] upsampleHeight the height of output
- * @param [in] upsampleWidth the width of output
- * @param [in] padOutHeight pad value height
- * @param [in] padOutWidth pad value width
- * @return ccStatus_t
- */
- ccStatus_t ccSetUpsampleDescriptor(ccUpsampleParaDescriptor_t upsampleDesc, const int32_t scale,
- const int32_t scaleHeight, const int32_t scaleWidth, const int32_t upsampleHeight,
- const int32_t upsampleWidth, const bool padOutHeight, const bool padOutWidth);
- /**
- * @ingroup dnn
- * @brief get the output dimension info of upsample
- * @param [in] upsamplePara para of upsample
- * @param [in] bottomDesc descriptor of input bottom tensor
- * @param [in|out] dimCnt point to the output dimCnt
- * @param [in|out] dim arrays to save dims
- * @param [in] dimLen the len of dim array
- * @return ccStatus_t
- */
- ccStatus_t ccGetUpsampleOutputDim(const ccUpsampleParaDescriptor_t upsamplePara, const ccTensorDescriptor_t bottomDesc,
- int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
-
- #ifndef DAVINCI_LITE
- ccStatus_t ccMatmul(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t wDesc, const void *w, const ccTensorDescriptor_t biasDesc,
- const void *bias, const ccFullConnectFwdAlgo_t algo, void *workSpace,
- const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y,
- const bool transposeA, const bool transposeB);
- ccStatus_t ccGetMatmulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc, int32_t *n,
- int32_t *c, int32_t *h, int32_t *w, bool transposeA, bool transposeB);
- ccStatus_t ccGetMatmulWorkspaceSize(ccHandle_t handle, const ccFullConnectFwdAlgo_t algo,
- const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
- const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes, bool transposeA,
- bool transposeB);
- #endif
-
- /**
- * @ingroup dnn
- * @brief gather_v2 function
- * @param [in] handle cce handle
- * @param [in] alpha common scale factor
- * @param [in] paramsDesc descriptor
- * @param [in] params device memory
- * @param [in] indicesDesc descriptor
- * @param [in] indices device memory
- * @param [in] axisDesc descriptor
- * @param [in] axis device memory
- * @param [in] beta common scale factor
- * @param [in] outputDesc descriptor
- * @param [in|out] output device memory
- * @return ccStatus_t
- */
- ccStatus_t ccGatherV2(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc, const void *params,
- const ccTensorDescriptor_t indicesDesc, const void *indices, const ccTensorDescriptor_t axisDesc,
- const void *axis, const void *beta, const ccTensorDescriptor_t outputDesc, const void *output);
-
- /**
- * @ingroup dnn
- * @brief memory_clear function
- * @param [in] handle cce handle
- * @param [in] addrSpaceSizeInBytes addr space size
- * @param [in|out] addr device memory
- * @return ccStatus_t
- */
- ccStatus_t ccMemoryClear(ccHandle_t handle, const uint64_t addrSpaceSizeInBytes, const void *addr);
-
- /**
- * @ingroup dnn
- * @brief check input is overflow
- * @param [in] handle cce handle
- * @param [in] alpha scaling factors
- * @param [in] xDesc descriptor of input tensor
- * @param [in] x input data in device memory
- * @param [in] yDesc descriptor of output tensor
- * @param [in|out] y output data in device memory
- * @param [in] beta scaling factors
- * @return ccStatus_t
- */
- ccStatus_t ccIsFinite(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
- const ccTensorDescriptor_t yDesc, const void *y, const void *beta);
- }; // namespace cce
-
- #endif // DNN_OP_H__
|