Browse Source

add cce back

tags/v1.8.0^2
yanghaoran 2 years ago
parent
commit
f85064db07
20 changed files with 8562 additions and 0 deletions
  1. +63
    -0
      third_party/fwkacllib/inc/cce/aicpu_engine.h
  2. +56
    -0
      third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
  3. +31
    -0
      third_party/fwkacllib/inc/cce/blas_struct.h
  4. +101
    -0
      third_party/fwkacllib/inc/cce/cce.h
  5. +152
    -0
      third_party/fwkacllib/inc/cce/cce_def.hpp
  6. +82
    -0
      third_party/fwkacllib/inc/cce/common/attr_list.hpp
  7. +95
    -0
      third_party/fwkacllib/inc/cce/common/catch.hpp
  8. +36
    -0
      third_party/fwkacllib/inc/cce/compiler_stub.h
  9. +60
    -0
      third_party/fwkacllib/inc/cce/customize.h
  10. +23
    -0
      third_party/fwkacllib/inc/cce/dnn.h
  11. +676
    -0
      third_party/fwkacllib/inc/cce/dnn_base.h
  12. +994
    -0
      third_party/fwkacllib/inc/cce/dnn_base_def.hpp
  13. +4838
    -0
      third_party/fwkacllib/inc/cce/dnn_op.h
  14. +23
    -0
      third_party/fwkacllib/inc/cce/dnn_struct.hpp
  15. +894
    -0
      third_party/fwkacllib/inc/cce/dnn_struct_base.hpp
  16. +155
    -0
      third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
  17. +56
    -0
      third_party/fwkacllib/inc/cce/l2fusion_struct.hpp
  18. +65
    -0
      third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h
  19. +54
    -0
      third_party/fwkacllib/inc/cce/taskdown_api.h
  20. +108
    -0
      third_party/fwkacllib/inc/cce/taskdown_common.hpp

+ 63
- 0
third_party/fwkacllib/inc/cce/aicpu_engine.h View File

@@ -0,0 +1,63 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef AICPU_ENGINE_H__
#define AICPU_ENGINE_H__

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef enum {
AE_STATUS_SUCCESS = 0,
AE_STATUS_BAD_PARAM = 1,
AE_STATUS_OPEN_SO_FAILED = 2,
AE_STATUS_GET_KERNEL_NAME_FAILED = 3,
AE_STATUS_INNER_ERROR = 4,
AE_STATUS_KERNEL_API_INNER_ERROR = 5,
AE_STATUS_END_OF_SEQUENCE = 6,
AE_STATUS_DUMP_FAILED = 7,
AE_STATUS_TASK_WAIT = 101,
AE_STATUS_RESERVED
} aeStatus_t;

/**
* @ingroup aicpu engine
* @brief aeCallInterface:
* a interface to call a function in a op kernfel lib
* @param [in] addr void *, should be STR_KERNEL * format
* @return aeStatus_t
*/
aeStatus_t aeCallInterface(void *addr);

/**
* @ingroup aicpu engine
* @brief aeBatchLoadKernelSo:
* a interface to load kernel so
* @param [in] loadSoNum load so number
* @param [in] soPaths load so paths
* @param [in] soNames load so names
* @return aeStatus_t
*/
aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]);

#ifdef __cplusplus
}
#endif

#endif // AICPU_ENGINE_H__

+ 56
- 0
third_party/fwkacllib/inc/cce/aicpu_engine_struct.h View File

@@ -0,0 +1,56 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef AICPU_ENGINE_STRUCT_H__
#define AICPU_ENGINE_STRUCT_H__

#include "fwk_adpt_struct.h"

#ifdef __cplusplus
extern "C" {
#endif

/*
The different framwork we adapted for.
*/
typedef enum {
FMK_KERNEL_TYPE_TF = 0,
FMK_KERNEL_TYPE_CF = 10,
FMK_KERNEL_TYPE_PT = 20,
FMK_KERNEL_TYPE_RESERVED
} FwkkernelType_t;

#pragma pack(push, 1)
typedef struct {
uint32_t fwkKernelType; // FwkkernelType_t
union {
::aicpu::FWKAdapter::FWKOperateParam fwk_kernel;
} fwkKernelBase;
} STR_FWK_OP_KERNEL;
#pragma pack(pop)

#pragma pack(push, 1)
struct SessionInfo {
uint64_t sessionId;
uint64_t kernelId;
bool sessFlag;
};
#pragma pack(pop)

#ifdef __cplusplus
}
#endif
#endif // AICPU_ENGINE_STRUCT_H__

+ 31
- 0
third_party/fwkacllib/inc/cce/blas_struct.h View File

@@ -0,0 +1,31 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef CC_BLAS_STRUCT_API__
#define CC_BLAS_STRUCT_API__

#include <stdint.h>

typedef enum { CCBLAS_FILL_MODE_LOWER = 0, CCBLAS_FILL_MODE_UPPER = 1 } ccblasFillMode_t;

typedef enum {
CCBLAS_OP_N = 0,
CCBLAS_OP_T = 1,
} ccblasOperation_t;

typedef enum { CCBLAS_DIAG_NON_UNIT = 0, CCBLAS_DIAG_UNIT = 1 } ccblasDiagType_t;

#endif // CC_BLAS_STRUCT_API__

+ 101
- 0
third_party/fwkacllib/inc/cce/cce.h View File

@@ -0,0 +1,101 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef CCE_H__
#define CCE_H__

#include <stdint.h>
#include "cce_def.hpp"

namespace cce {

/**
* @ingroup cce
* @brief create cc handler
* @param [in|out] handle point of cc handler
* @return ccStatus_t
*/
ccStatus_t ccCreate(ccHandle_t *handle);

/**
* @ingroup cce
* @brief destroy cc handler
* @param [in] *handle cc handler
* @return ccStatus_t
*/
ccStatus_t ccDestroy(ccHandle_t *handle);

/**
* @ingroup cce
* @brief bind stream with specified cc handler
* @param [in] handle cc handler
* @param [in] streamId stream
* @return ccStatus_t
*/
ccStatus_t ccSetStream(ccHandle_t handle, rtStream_t streamId);

/**
* @ingroup cce
* @brief get the stream from cc handler
* @param [in] handle cc handler
* @param [in|out] streamId point of stream
* @return ccStatus_t
*/
ccStatus_t ccGetStream(ccHandle_t handle, rtStream_t *streamId);

/**
* @ingroup cce
* @brief get the stream from cc handler
* @param [in] dataTypeTransMode mode of data type transform
* @param [in] inputData input data point
* @param [in] inputDataSize input data size
* @param [in|out] outputData output data point
* @param [in] outputDataSize output data size
* @return ccStatus_t
*/
ccStatus_t ccTransDataType(ccDataTypeTransMode_t dataTypeTransMode, const void *inputData, uint32_t inputDataSize,
void *outputData, const uint32_t outputDataSize);
/**
* @ingroup cce
* @brief cce sys init func
*/
void cceSysInit();

/**
* @ingroup cce
* @brief cce Log Start up func
*/
void cceLogStartup();

/**
* @ingroup cce
* @brief cce Log Shut down func
*/
void cceLogShutdown();

/**
* @ingroup cce
* @brief set the profiling on or off
* @param [in] const unsigned char* target: The engine gets it from ENV. Don't need care about it.
* @param const char* job_ctx: identifies profiling job
* @param [in] uint32_t flag: value: 0, on ; 1, off.
* @return ccStatus_t value: 0, success; 1, fail.
*/
ccStatus_t CceProfilingConfig(const char *target, const char *job_ctx, uint32_t flag);

}; // namespace cce

#endif // CCE_H__

+ 152
- 0
third_party/fwkacllib/inc/cce/cce_def.hpp View File

@@ -0,0 +1,152 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef CCE_DEF_H__
#define CCE_DEF_H__

#include "runtime/rt.h"

namespace cce {

/**
* @ingroup cce
* @brief memory configure for fusion
*/
typedef struct TagCceFusionMemCfg {
uint64_t memAddr; /**< memAddr */
uint32_t memSize; /**< memSize */
uint32_t addrChangeFlag; /**< op data addr change flag. value:0,valid;1,not valid */
uint32_t poolFlag; /**< mempool flag : value:0,is valid; value: 1, not valid */
TagCceFusionMemCfg() {
memAddr = 0;
memSize = 0;
addrChangeFlag = 0;
poolFlag = 0;
}
} CceFusionMemCfg_t;
/**
* @ingroup cce
* @brief return value
*/
typedef enum tagCcStatus {
CC_STATUS_SUCCESS = 0, /**< succ */
CC_STATUS_NOT_INITIALIZED = 1, /**< not init */
CC_STATUS_ALLOC_FAILED = 2, /**< alloc mem failed */
CC_STATUS_BAD_PARAM = 3, /**< para check failed */
CC_STATUS_INTERNAL_ERROR = 4, /**< internal error */
CC_STATUS_KERNEL_ERROR = 5, /**< kernel error */
CC_STATUS_RUNTIME_ERROR = 6, /**< runtime error */
CC_STATUS_NOT_SUPPORTED = 7, /**< unsupport error */
CC_STATUS_INVALID_VALUE = 7, /**< invalid value error for blas*/
CC_STATUS_RESERVED /**< just for check */
} ccStatus_t;

/**
* @ingroup cce
* @brief original data type
*/
typedef enum tagCcDataType {
CC_DATA_FLOAT = 0, /**< float type */
CC_DATA_HALF, /**< fp16 type */
CC_DATA_INT8, /**< int8 type */
CC_DATA_INT32, /**< int32 type */
CC_DATA_UINT8, /**< uint8 type */
CC_DATA_HALF_UINT16_PROPOSAL, /**<mixed type for proposal*/
CC_DATA_INT16, /**< int16 type */
CC_DATA_UINT16, /**< uint16 type */
CC_DATA_UINT32, /**< uint32 type */
CC_DATA_INT64, /**< int64 type */
CC_DATA_UINT64, /**< uint64 type */
CC_DATA_DOUBLE, /**< double type */
CC_DATA_BOOL, /**< bool type */
CC_DATA_DUAL, /**< dual output type */
CC_DATA_DUAL_SUB_INT8, /**< dual output int8 type */
CC_DATA_DUAL_SUB_UINT8, /**< dual output uint8 type */
CC_DATA_COMPLEX64,
CC_DATA_COMPLEX128,
CC_DATA_QINT8,
CC_DATA_QINT16,
CC_DATA_QINT32,
CC_DATA_QUINT8,
CC_DATA_QUINT16,
CC_DATA_RESERVED
} ccDataType_t;

/**
* @ingroup cce
* @brief save context of cce library
*/
typedef struct tagCcContext {
rtStream_t streamId;
uint32_t opIndex;
} ccContext_t;

typedef struct tagCcContext *ccHandle_t;

/**
* @ingroup cce
* @brief mode of data type transform
*/
typedef enum tagCcDataTypeTransMode {
CC_DATATYPE_TRANS_FLOAT_NO_TRANS = 0, /**< origin data is float, no trans */
CC_DATATYPE_TRANS_FP16_NO_TRANS, /**< origin data is fp16, no trans */
CC_DATATYPE_TRANS_INT8_NO_TRANS, /**< origin data is int8, no trans */
CC_DATATYPE_TRANS_FLOAT_TO_FP16, /**< data type float trans to fp16 */
CC_DATATYPE_TRANS_FP16_TO_FLOAT, /**< data type fp16 trans to float */
CC_DATATYPE_TRANS_FLOAT_TO_INT8, /**< data type float trans to int8 */
CC_DATATYPE_TRANS_INT8_TO_FLOAT, /**< data type int8 trans to float */
CC_DATATYPE_TRANS_UINT8_TO_FLOAT, /**< data type uint8 trans to float */
CC_DATATYPE_TRANS_UINT8_NO_TRANS, /**< origin data is uint8, no trans */
CC_DATATYPE_TRANS_INT32_NO_TRANS, /**< data type uint8 trans to float */
CC_DATATYPE_TRANS_UINT16_NO_TRANS, /** < origin data is uint16, no trans*/
CC_DATATYPE_TRANS_UINT16_TO_FLOAT, /** < data type uint16 trans to float*/
CC_DATATYPE_TRANS_MODE_RESERVED
} ccDataTypeTransMode_t;

typedef struct tagContextInfo {
ccHandle_t handle;
rtStream_t stream;
uint8_t *memBase;
uint64_t totalMemSize;
uint8_t *weightsMemBase;
uint64_t weightsMemSize;
uint8_t *weightsMemBaseHost;
} ContextInfo;

/**
* @ingroup cce
* @brief cce function parameter type
*/
typedef enum tagCcFuncType {
CC_FUSION_L2,
GLOBAL_MEMORY_CLEAR,
MAX_NUM,
} ccFuncParamType_t;

/**
* @ingroup cce
* @brief cce set function point state
*/
ccStatus_t ccSetFuncState(ccFuncParamType_t type, bool isOpen);

/**
* @ingroup cce
* @brief cce get function point state
*/
bool ccGetFuncState(ccFuncParamType_t type);

} // namespace cce
#endif // CCE_DEF_H__

+ 82
- 0
third_party/fwkacllib/inc/cce/common/attr_list.hpp View File

@@ -0,0 +1,82 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef ATTR_LIST_HPP__
#define ATTR_LIST_HPP__

#include "catch.hpp"

/**
* @ingroup util
* @brief frame Error Value
*/
#define ATTR_SUCCESS (0)
#define ATTR_ERROR_NULL_POINT (1)
#define ATTR_ERROR_ALREADY_EXIST (2)
#define ATTR_ERROR_NOT_EXIST (3)
#define ATTR_ERROR_BUFFER_NOT_ENOUGH (4)
#define ATTR_ERROR_BAD_PARAM (5)
#define ATTR_ERROR_ALLOC_FAIL (6)
#define ATTR_ERROR_FREE_FAIL (7)
#define ATTR_ERROR_RESERVED (8)

struct AttrListPrivate;
/**
* @ingroup util
* @brief attribute list
*/
class AttrList {
public:
AttrList();
AttrList(uint32_t initLen);
~AttrList();
AttrList(const AttrList &rhs) = delete;
AttrList &operator=(const AttrList &rhs);

public:
/**
* @ingroup util
* @brief add paras
* @param [in] attrId attribute id
* @param [in] attrLen length of attribute
* @param [in] attrValue point to attribute
* @return ccStatus_t
*/
uint32_t Add(uint32_t attrId, uint32_t attrLen, const void *attrValue);

/**
* @ingroup util
* @brief read paras
* @param [in] attrId attribute id
* @param [in] attrLen point to length of attribute
* @param [in] attrValue reference of point to attribute
* @return ccStatus_t
*/
uint32_t Get(uint32_t attrId, uint32_t &attrLen, const void *&attr_value) const;

/**
* @ingroup util
* @brief get the length of attribute list
* @return length of attribute
*/
uint32_t Length() const;

private:
AttrListPrivate *impl_;
uint32_t initLen_;
uint32_t Init();
};
#endif // ATTR_LIST_HPP__

+ 95
- 0
third_party/fwkacllib/inc/cce/common/catch.hpp View File

@@ -0,0 +1,95 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef CATCH_HPP_
#define CATCH_HPP_

#include <stdint.h>
#include <iostream>

#define ERROR_CODE() __catch_error_code
#define ERROR_LINE_NO() __catch_error_line_no
#define ERROR_PROC() __catch_error_line_no = __LINE__;

#define PROC \
uint32_t __catch_error_code = 0x7FFFFFCC; \
uint32_t __catch_error_line_no = 0xFFFFFFFF; \
{
#define END_PROC \
} \
__tabErrorCode:
#define THROW(errcode) \
{ \
__catch_error_code = (errcode); \
ERROR_PROC(); \
goto __tabErrorCode; \
}
#define EXEC(func) \
{ \
if (0 != (__catch_error_code = (func))) THROW(__catch_error_code) \
}
#define EXEC_EX1(func, error_code) \
{ \
if (0 != (func)) THROW(error_code) \
}
#define EXEC_EX(func, succRet, error_code) \
{ \
if (succRet != (__catch_error_code = (func))) THROW(error_code) \
}
#define ASSERT_EXEC(func, succRet) \
{ \
if (succRet != (__catch_error_code = (func))) /*GO_ASSERT_FALSE();*/ \
THROW(__catch_error_code) \
} \
}
#define NEW_ERROR_EXEC(errcode, func, succRet) \
{ \
if (succRet != (func)) { \
THROW(errcode) \
} \
}
#define JUDGE(errcode, expr) \
{ \
if (!(expr)) { \
THROW(errcode) \
} \
}
#define ASSERT_JUDGE(errcode, expr) \
{ \
if (!(expr)) { /*GO_ASSERT_FALSE();*/ \
THROW(errcode) \
} \
}
#define JUDGE_FALSE(errcode, expr) \
{ \
if (expr) { \
THROW(errcode) \
} \
}
#define JUDGE_CONTINUE(expr) \
{ \
if (expr) { \
continue; \
} \
}
#define CATCH_ERROR(errcode) if (__catch_error_code == (errcode)) { // ERROR_LOG();
#define CATCH_ALL_ERROR {
#define END_CATCH_ERROR }
#define FINAL \
__tabFinal:
#define END_FINAL /*GO_ASSERT_FALSE()*/ ;
#define GOTO_FINAL() goto __tabFinal;
#endif // CATCH_HPP_

+ 36
- 0
third_party/fwkacllib/inc/cce/compiler_stub.h View File

@@ -0,0 +1,36 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef COMPILER_STUB_H__
#define COMPILER_STUB_H__

namespace cce {

/**
* @ingroup cce
* @brief compiler stub init func
*/
bool compilerStubInit();

/**
* @ingroup cce
* @brief compiler stub free func
*/
bool compilerStubFree();

}; // namespace cce

#endif // COMPILER_STUB_H__

+ 60
- 0
third_party/fwkacllib/inc/cce/customize.h View File

@@ -0,0 +1,60 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef CC_CUSTOMIZE_API__
#define CC_CUSTOMIZE_API__

#include <stdint.h>

#define CC_DEVICE_DIM_MAX 8
typedef enum tagOpTensorFormat
{
OP_TENSOR_FORMAT_NC1HWC0 = 0,
OP_TENSOR_FORMAT_ND,
OP_TENSOR_FORMAT_RESERVED,

} opTensorFormat_t;


typedef enum tagOpDataType
{
OP_DATA_FLOAT = 0, /**< float type */
OP_DATA_HALF, /**< fp16 type */
OP_DATA_INT8, /**< int8 type */
OP_DATA_INT32, /**< int32 type */
OP_DATA_UINT8, /**< uint8 type */
OP_DATA_HALF_UINT16_PROPOSAL, /**<mixed type for proposal*/
OP_DATA_RESERVED
} opDataType_t;

typedef struct tagOpTensor
{
// real dim info
opTensorFormat_t format;
opDataType_t data_type;
int32_t dim_cnt;
int32_t mm;
int32_t dim[CC_DEVICE_DIM_MAX];
} opTensor_t;

typedef opTensor_t tagCcAICPUTensor;
typedef void * rtStream_t;
typedef void (*aicpu_run_func)(opTensor_t **, void **, int32_t,
opTensor_t **, void **, int32_t, void *, rtStream_t);


#endif // CC_CUSTOMIZE_API__


+ 23
- 0
third_party/fwkacllib/inc/cce/dnn.h View File

@@ -0,0 +1,23 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DNN_H__
#define DNN_H__

#include "cce/dnn_base.h"
#include "cce/dnn_op.h"

#endif // DNN_H__

+ 676
- 0
third_party/fwkacllib/inc/cce/dnn_base.h View File

@@ -0,0 +1,676 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DNN_BASE_H__
#define DNN_BASE_H__

#include "cce/blas_struct.h"
#include "cce/customize.h"
#include "cce/dnn_base_def.hpp"

namespace cce {
/**
* @ingroup dnn
* @brief Minimum epsilon allowed to be used in the Batch Normalization formula
*/
#define CC_BN_MIN_EPSILON (1e-7)

#ifndef NULL
#ifdef __cplusplus
#define NULL 0
#else
#define NULL ((void *)0)
#endif
#endif

/**
* @ingroup dnn
* @brief max number of dimensions
*/
#define CC_DIM_MAX (8)

typedef struct cCTagL2LossDescriptor * ccL2LossDescriptor_t;

/**
* @ingroup dnn
* @brief mode of concatfive2fout
*/
typedef enum tagTransForLossMode {
CC_TRANS_FOR_BOX = 0,
CC_TRANS_FOR_SCORE,
} ccTransForLossMode_t;

/**
* @ingroup dnn
* @brief descriptor of concatfive2fout
*/
typedef struct tagCcConcatFive2Four_t *ccConcatFive2FourDescriptor_t;

}; /* end cce */

namespace cce {

/**
* @ingroup dnn
* @brief create descriptor of tensor
* @param [in|out] tensorDesc point to descriptor of tensor
* @return ccStatus_t
*/
ccStatus_t ccCreateTensorDescriptor(ccTensorDescriptor_t *tensorDesc);

/**
* @ingroup dnn
* @brief destroy descriptor of tensor
* @param [in] *tensorDesc descriptor of tensor
* @return ccStatus_t
*/
ccStatus_t ccDestroyTensorDescriptor(ccTensorDescriptor_t *tensorDesc);

/**
* @ingroup dnn
* @brief init tensor to 4d tensor
* @param [in|out] tensorDesc descriptor of tensor
* @param [in] format format of tensor
* @param [in] dataType data type in device
* @param [in] n batch size
* @param [in] c channels
* @param [in] h height of feature map
* @param [in] w width of feature map
* @return ccStatus_t
*/
ccStatus_t ccSetTensor4dDescriptor(ccTensorDescriptor_t tensorDesc,
ccTensorFormat_t format,
ccDataType_t dataType,
int32_t n,
int32_t c,
int32_t h,
int32_t w);

/**
* @ingroup dnn
* @brief read 4d tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] dataType point to data type in device
* @param [in|out] n point to batch size
* @param [in|out] c point to channels
* @param [in|out] h point to height of feature map
* @param [in|out] w point to width of feature map
* @param [in|out] nStride point to stride of n
* @param [in|out] cStride point to stride of c
* @param [in|out] hStride point to stride of h
* @param [in|out] wStride point to stride of w
* @return ccStatus_t
*/
ccStatus_t ccGetTensor4dDescriptor(const ccTensorDescriptor_t tensorDesc,
ccDataType_t *dataType,
int32_t *n,
int32_t *c,
int32_t *h,
int32_t *w,
int32_t *nStride,
int32_t *cStride,
int32_t *hStride,
int32_t *wStride);

/**
* @ingroup dnn
* @brief print 4d tensor (just in debug log mode)
* @param [in] tensorDesc descriptor of tensor
* @return ccStatus_t
*/
ccStatus_t ccPrintTensor4dDescriptor(const ccTensorDescriptor_t tensorDesc);

/**
* @ingroup dnn
* @brief print Nd tensor (just in debug log mode)
* @param [in] tensorDesc descriptor of tensor
* @return ccStatus_t
*/
ccStatus_t ccPrintTensorNdDescriptor(const ccTensorDescriptor_t tensorDesc);

/**
* @ingroup dnn
* @brief init tensor to Nd tensor
* @param [in|out] tensorDesc descriptor of tensor
* @param [in] dataType data type in device
* @param [in] dimCnt Dimension of the tensor
* @param [in] dimA Array of dimension dimCnt that contain the size of the tensor for every dimension. Size along unused dimensions should be set to 1.
* @return ccStatus_t
*/
ccStatus_t ccSetTensorNdDescriptor(ccTensorDescriptor_t tensorDesc,
ccDataType_t dataType,
int32_t dimCnt,
int32_t dimA[]);

/**
* @ingroup dnn
* @brief read Nd tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in] dimCntReq point to data type in device
* @param [in|out] dataType point to data type in device
* @param [in|out] dimCnt Dimension of the tensor
* @param [in|out] dimA Array of dimension of at least dimCntReq that will be filled with the dimensions from the provided tensor descriptor.
* @param [in|out] strideA Array of dimension dimCntReq that contain the stride of the tensor for every dimension
* @return ccStatus_t
*/
ccStatus_t ccGetTensorNdDescriptor(const ccTensorDescriptor_t tensorDesc,
int32_t dimCntReq,
ccDataType_t *dataType,
int32_t *dimCnt,
int32_t dimA[],
int32_t strideA[]);

/**
* @ingroup dnn
* @brief transform tensor between 4d(NCHW) and 5d(NC1HWC0)
* @param [in] xDesc descriptor of input tensor
* @param [in] x point to input data in host memory
* @param [in] dataTypeTransmode mode of data type transform
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y point to output data in host memory
* @param [in] ySizeInBytes size of outputData
* @return ccStatus_t
*/
ccStatus_t ccTransTensor(const ccTensorDescriptor_t xDesc,
const void *x,
const ccTensorDescriptor_t yDesc,
void *y,
uint32_t ySizeInBytes);

/**
* @ingroup dnn
* @brief get the format and dimcnt of Tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] format point to format
* @return ccStatus_t
*/
ccStatus_t ccGetTensorFormat(const ccTensorDescriptor_t tensorDesc,
ccTensorFormat_t *format);

/**
* @ingroup dnn
* @brief set the format and dimcnt of Tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] format point to format
* @return ccStatus_t
*/
ccStatus_t ccSetTensorFormat(ccTensorDescriptor_t tensorDesc,
ccTensorFormat_t format);


/**
* @ingroup dnn
* @brief get the RealDimCnt of Tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] RealDimCnt point to RealDimCnt
* @return ccStatus_t
*/
ccStatus_t ccGetTensorRealDimCnt(const ccTensorDescriptor_t tensorDesc,
int32_t *realDimCnt);

/**
* @ingroup dnn
* @brief set the RealDimCnt of Tensor
* @param [in|out] tensorDesc descriptor of tensor
* @param [in] RealDimCnt RealDimCnt to set
* @return ccStatus_t
*/
ccStatus_t ccSetTensorRealDimCnt(ccTensorDescriptor_t tensorDesc,
int32_t realDimCnt);


/**
* @ingroup dnn
* @brief get data size of 4d tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] size point to data size
* @return ccStatus_t
*/
ccStatus_t ccGetTensorSizeInBytes(const ccTensorDescriptor_t tensorDesc, uint32_t *size);

/**
* @ingroup dnn
* @brief get data size of 4d tensor which is align to 32B
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] size point to data size
* @return ccStatus_t
*/
ccStatus_t ccGetTensorMemorySizeInBytes(const ccTensorDescriptor_t tensorDesc, uint32_t *size);


ccStatus_t ccSetTensorDataSize(ccTensorDescriptor_t xDesc, uint32_t size);

/**
* @ingroup dnn
* @brief get data size of 4d filter
* @param [in] filterDesc descriptor of filter
* @param [in] groupNum number of group
* @param [in|out] size point to data size
* @return ccStatus_t
*/
ccStatus_t ccGetFilterSizeInBytes(const ccFilterDescriptor_t filterDesc, uint32_t *size);


/**
* @ingroup dnn
* @brief read 4d filter
* @param [in] filterDesc descriptor of filter
* @param [in|out] format point to format of filter
* @param [in|out] dataType point to data type in device
* @param [in|out] k point to number of output feature maps
* @param [in|out] c point to number of input feature maps
* @param [in|out] h point to height of filter
* @param [in|out] w point to width of filter
* @return ccStatus_t
*/
ccStatus_t ccGetFilter4dDescriptor(const ccFilterDescriptor_t filterDesc,
ccTensorFormat_t *format,
ccDataType_t *dataType,
int32_t *k,
int32_t *c,
int32_t *h,
int32_t *w);

ccStatus_t ccTransFilterFracZToNCHW(const ccFilterDescriptor_t wDesc,
const void *w,
ccFilterDescriptor_t yDesc,
void *y,
uint32_t ySizeInBytes);

/**
* @ingroup dnn
* @brief trans weight to fractal format, and trans data type together
* @param [in] wDesc descriptor of input filter
* @param [in] w input data pointer
* @param [in] yDesc descriptor of output filter
* @param [in|out] y output data pointer
* @param [in] ySizeInBytes size of outputData
* @return ccStatus_t
*/
ccStatus_t ccTransFilter(const ccFilterDescriptor_t wDesc,
const void *w,
const ccFilterDescriptor_t yDesc,
void *y,
uint32_t ySizeInBytes);

/**
* @ingroup dnn
* @brief trans weight to fractal format, and trans data type together
* @param [in] wDesc descriptor of input filter
* @param [in] w input data pointer
* @param [in] dataTypeTransmode mode of data type transform
* @param [in] yDesc descriptor of output filter
* @param [in|out] y output data pointer
* @param [in] ySizeInBytes size of outputData
* @return ccStatus_t
*/
ccStatus_t ccTransFilterInt8(const ccFilterDescriptor_t wDesc,
const void *w,
ccFilterDescriptor_t yDesc,
void *y,
uint32_t ySizeInBytes,
ccDataType_t outputDataType);

/**
* @ingroup dnn
* @brief create descriptor of filter
* @param [in|out] filterDesc point to descriptor of filter
* @return ccStatus_t
*/
ccStatus_t ccCreateFilterDescriptor(ccFilterDescriptor_t *filterDesc);

/**
* @ingroup dnn
* @brief destroy descriptor of filter
* @param [in] *filterDesc descriptor of filter
* @return ccStatus_t
*/
ccStatus_t ccDestroyFilterDescriptor(ccFilterDescriptor_t *filterDesc);

/**
* @ingroup dnn
* @brief init conv descriptor to 2d conv
* @param [in|out] convDesc descriptor of convolution operator
* @param [in] mode mode of convolution
* @param [in] padMode mode of padding
* @param [in] padHHead zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
* @param [in] padHTail zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
* @param [in] padWHead zero padding in width head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
* @param [in] padWTail zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN
* @param [in] strideH stride in height
* @param [in] strideW stride in width
* @param [in] dilationH dilation in height
* @param [in] dilationW dilation in width
* @return ccStatus_t
*/
ccStatus_t ccSetConvolution2dDescriptor(ccConvolutionDescriptor_t convDesc,
ccConvolutionMode_t mode,
ccPaddingMode_t padMode,
int32_t padHHead,
int32_t padHTail,
int32_t padWHead,
int32_t padWTail,
int32_t group,
int32_t strideH,
int32_t strideW,
int32_t dilationH,
int32_t dilationW);

/**
* @ingroup dnn
* @brief read 2d conv
* @param [in] convDesc descriptor of convolution operator
* @param [in|out] mode point to mode of convolution
* @param [in] padMode mode of padding
* @param [in] padHHead zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
* @param [in] padHTail zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
* @param [in] padWHead zero padding in width head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
* @param [in] padWTail zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN
* @param [in|out] strideH point to stride in height
* @param [in|out] strideW point to stride in width
* @param [in|out] dilationH point to dilation in height
* @param [in|out] dilationW point to dilation in width
* @return ccStatus_t
*/
ccStatus_t ccGetConvolution2dDescriptor(const ccConvolutionDescriptor_t convDesc,
ccConvolutionMode_t *mode,
ccPaddingMode_t *padMode,
int32_t *padHHead,
int32_t *padHTail,
int32_t *padWHead,
int32_t *padWTail,
int32_t *group,
int32_t *strideH,
int32_t *strideW,
int32_t *dilationH,
int32_t *dilationW);

/**
* @ingroup dnn
* @brief get the output dimension info of 2d convolution
* @param [in] convDesc descriptor of convolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] wDesc descriptor of filter
* @param [in|out] n point to batch size
* @param [in|out] c point to channels
* @param [in|out] h point to height of feature map
* @param [in|out] w point to width of feature map
* @return ccStatus_t
*/
ccStatus_t ccGetConvolution2dForwardOutputDim(const ccConvolutionDescriptor_t convDesc,
const ccTensorDescriptor_t xDesc,
const ccFilterDescriptor_t wDesc,
int32_t *n,
int32_t *c,
int32_t *h,
int32_t *w);

/**
* @ingroup dnn
* @brief create descriptor of convolution operator
* @param [in|out] filterDesc point to descriptor of convolution operator
* @return ccStatus_t
*/
ccStatus_t ccCreateConvolutionDescriptor(ccConvolutionDescriptor_t *convDesc);

/**
* @ingroup dnn
* @brief destroy descriptor of convolution operator
* @param [in] *convDesc descriptor of convolution operator
* @return ccStatus_t
*/
ccStatus_t ccDestroyConvolutionDescriptor(ccConvolutionDescriptor_t *convDesc);

/**
* @ingroup dnn
* @brief check specific stride condition flag
* @param [in] deconvDesc descriptor of Deconvolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] yDesc descriptor of output tensor
* @param [in] biasDesc descriptor of bias tensor
* @param [in] wDesc descriptor of filter
* @param [in|out] transMark output condition flag
* @return ccStatus_t
*/
ccStatus_t ccDeconvSpStrideCondCheck(const ccConvolutionDescriptor_t deconvDesc,
const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
const ccTensorDescriptor_t biasDesc,
const ccFilterDescriptor_t wDesc,
uint32_t &transMark);

/**
* @ingroup dnn
* @brief special deconv stride trans
* @param [in] deconvDesc descriptor of Deconvolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] yDesc descriptor of output tensor
* @param [in] biasDesc descriptor of bias tensor
* @param [in] deconvStPtr descriptor of filter
* @param [in|out] xStPtr descriptor of trans input tensor
* @param [in|out] yStPtr descriptor of trans output tensor
* @param [in|out] wStPtr descriptor of trans filter tensor
* @param [in|out] wDesc descriptor of trasn filter
* @param [in|out] transMark condition flag
* @return ccStatus_t
*/
ccStatus_t ccDeconvSpStrideDescTrans(const ccConvolutionDescriptor_t deconvDesc,
const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
const ccTensorDescriptor_t biasDesc __attribute__((__unused__)),
const ccFilterDescriptor_t wDesc,
ccConvolutionDescriptor_t deconvStPtr,
ccTensorDescriptor_t xStPtr,
ccTensorDescriptor_t yStPtr,
ccFilterDescriptor_t wStPtr,
uint32_t transMark);

/**
* @ingroup dnn
* @brief check deconv goto aicore flag
* @param [in] deconvDesc descriptor of Deconvolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] yDesc descriptor of output tensor
* @param [in] wDesc descriptor of filter
* @param [in] isGotoAicore out flag
* @param [in] transMark condition flag
* @return ccStatus_t
*/
ccStatus_t ccDeconvCheckGotoAiCore(const ccConvolutionDescriptor_t deconvDesc,
const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
const ccFilterDescriptor_t wDesc,
uint32_t *isGotoAicore,
uint32_t transMark);

/**
* @ingroup dnn
* @brief get the output dimension info of 2d Deconvolution
* @param [in] deconvDesc descriptor of Deconvolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] wDesc descriptor of filter
* @param [in|out] n point to batch size
* @param [in|out] c point to channels
* @param [in|out] h point to height of feature map
* @param [in|out] w point to width of feature map
* @return ccStatus_t
*/
ccStatus_t ccGetDeconvolution2dForwardOutputDim(const ccConvolutionDescriptor_t deconvDesc,
const ccTensorDescriptor_t xDesc,
const ccFilterDescriptor_t wDesc,
int32_t *n,
int32_t *c,
int32_t *h,
int32_t *w);

/**
* @ingroup dnn
* @brief create descriptor of PAD
* @param [in|out] padDesc point to descriptor of pad
* @return ccStatus_t
*/
ccStatus_t ccCreatePadDescriptor(ccPadDescriptor_t *padDesc);

/**
* @ingroup dnn
* @brief destroy descriptor of PAD
* @param [in] *padDesc descriptor of PAD
* @return ccStatus_t
*/
ccStatus_t ccDestroyPadDescriptor(ccPadDescriptor_t *padDesc);

/**
* @ingroup dnn
* @brief set PADDesc
* @param [in|out] padDesc descriptor of PAD
* @param [in] padMode mode of PAD
* @param [in] padValue pad value of PAD
* @param [in] wleft width left pad of PAD
* @param [in] wright width right of PAD
* @param [in] htop higth pad of PAD
* @param [in] hbottom higth bottom pad of PAD
* @return ccStatus_t
*/
ccStatus_t ccSetPadDescriptor(ccPadDescriptor_t padDesc,
ccPadMode_t padMode,
float padValue,
int32_t htop,
int32_t hbottom,
int32_t wleft,
int32_t wright);

/**
* @ingroup dnn
* @brief read 2d pooling
* @param [in] poolingDesc descriptor of pooling operator
* @param [in|out] mode point to mode of pooling
* @param [in|out] maxpoolingNanOpt point to Nan propagation mode
* @param [in|out] windowH point to height of pooling window
* @param [in|out] windowW point to width of pooling window
* @param [in|out] padHHead point to zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value.
* @param [in|out] padHTail point to zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
* @param [in|out] padWHead point to zero padding in width head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value.
* @param [in|out] padWTail point to zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
* @param [in|out] strideH point to stride in height
* @param [in|out] strideW point to stride in width
* @param [in|out] dataMode
* @param [in|out] ceilMode 0:Ceil 1:Floor
* @return ccStatus_t
*/
ccStatus_t ccGetPooling2dDescriptor(const ccPoolingDescriptor_t poolingDesc,
ccPoolingMode_t *mode,
ccPaddingMode_t *padMode,
ccNanPropagation_t *maxpoolingNanOpt,
int32_t *windowH,
int32_t *windowW,
int32_t *padHHead,
int32_t *padHTail,
int32_t *padWHead,
int32_t *padWTail,
int32_t *strideH,
int32_t *strideW,
int32_t *dataMode,
int32_t *ceilMode,
ccPooingFwdAlgo_t *algo);

ccStatus_t ccGetCompare5dOutputDim(const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
int32_t* dimCnt,
int32_t* dim,
int32_t dimLen);

ccStatus_t ccGetMaximum5dOutputDim(const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
int32_t* dimCnt,
int32_t* dim,
int32_t dimLen);

ccStatus_t ccGetMinimum5dOutputDim(const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
int32_t* dimCnt,
int32_t* dim,
int32_t dimLen);

ccStatus_t ccGetReduce5dOutputDim(const ccTensorDescriptor_t xDesc,
const ccIntArray_t* axis,
bool keepDims,
int32_t *dimCnt,
int32_t dim[],
int32_t dimLen);

/**
* @brief get out put descrition of slice tensor.
* @param [in] xDesc descriptor of input data
* @param [in] begin begin position of tensor
* @param [in] size size to slice
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetSliceOutputDim(
const ccTensorDescriptor_t xDesc,
const ccIntArray_t* begin,
const ccIntArray_t* size,
int32_t *dimCnt,
int32_t dim[],
int32_t dimLen);

/**
* @ingroup dnn
* @brief get strided slice output dim info.
* @param [in] xDesc descriptor of input tensor
* @param [in] stridedSliceDesc specifies the begin, end, strides of slice
* @param [in] attrDesc reserve for optional attributes.
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetStridedSliceOutputDim(const ccTensorDescriptor_t xDesc,
const ccStridedSliceDescriptor_t stridedSliceDesc,
const ccStridedSliceAttrsDescriptor_t attrDesc,
int32_t *dimCnt, int32_t dim[], int32_t dimLen);

/**
* @ingroup dnn
* @brief get workspace size for softmax computation
* @param [in] handle cce handle
* @param [in] xDesc descriptor of input tensor
* @param [in] yDesc descriptor of output tensor
* @param [in|out] sizeInBytes workSpace size in bytes
* @return ccStatus_t
*/
ccStatus_t ccGetSoftmaxForwardWorkspaceSize(ccHandle_t handle,
const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
uint32_t *sizeInBytes);

/**
* @ingroup dnn
* @brief set quantize algorithm type and quantize scale type (vector or scalar)
* @param [in] quantizeInfo descriptor of quantize parameters
* @param [in] quantAlgo enum type for quantize algorithm type
* @param [in] scaleType enum type for quantize scale type
* @param [in] reluflag flag for relu
* @return ccStatus_t
*/
ccStatus_t ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantizeInfo, ccQuantizeAlgo_t quantAlgo, ccScaleType_t scaleType);
ccStatus_t ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantizeInfo, ccQuantizeAlgo_t quantAlgo, ccScaleType_t scaleType, bool reluFlag);

}; /* end cce */

#endif // DNN_BASE_H__

+ 994
- 0
third_party/fwkacllib/inc/cce/dnn_base_def.hpp View File

@@ -0,0 +1,994 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DNN_BASE_HPP__
#define DNN_BASE_HPP__

#include "cce/cce_def.hpp"

namespace cce {

/**
* @ingroup dnn
* @brief tiling para
*/
typedef struct tagCcWeightCompressInfo {
uint32_t blockRow; /**< block row */
uint32_t blockCol; /**< block col */
uint32_t fractalK; /**< fractal K */
uint32_t fractalN; /**< fractal N */
uint32_t lastFractalK; /**< K of last fractal */
uint32_t lastFractalN; /**< N of last fractal */
uint32_t cubeSize; /**< cube's length */
uint32_t loadDir; /**< data load directtiono 0??col load 1:row load*/
} ccWeightCompressInfo_t;

/**
* @ingroup dnn
* @brief compress table info
*/
typedef struct tagCcWeightCompressTab {
uint16_t dataLen : 14; /**< 0: data length in 128 Byte */
uint16_t storeFlag : 1; /**< 0: compressed addr = original addr, 1: compressed addr = original addr + 256 Byte */
uint16_t dataType : 1; /**< 0: original data, 1: compressed data */
} ccWeightCompressTab_t;

/**
* @conv quantize dnn vector mode/scalar mode
*/
typedef enum {
QUANT_ALGO_NON_OFFSET = 0,
QUANT_ALGO_HALF_OFFSET = 1,
QUANT_ALGO_ALL_OFFSET = 2,
QUANT_ALGO_BUTT
} ccQuantizeAlgo_t;
typedef enum { SCALE_VEC = 0, SCALE_SCALAR = 1, SCALE_TYPE_BUTT } ccConvolutionScaleType_t, ccScaleType_t;

/**
* @conv quantize dnn sqrt mode/non sqrt mode
*/
typedef enum {
SCALE_NORMAL = 0,
SCALE_SQRT = 1,
SCALE_VALUE_MODE_BUTT
} ccConvolutionScaleValueMode_t,
ccScaleValueMode_t;

typedef struct {
float scaleW;
float scaleD;
float scaleDNext;
uint8_t offsetW;
uint8_t offsetD;
uint8_t offsetDNext;
} ccQuantAllOffsetPara_t;

typedef struct tagCcVecQuantizePara {
float scale;
uint16_t offset;
uint16_t rrv; // 32byte align
} ccVecQuantizePara_t;

/**
* @ingroup dnn
* @brief format of tensor
*/
typedef enum tagCcTensorFormat {
CC_TENSOR_NCHW = 0, /**< NCHW */
CC_TENSOR_NHWC, /**< NHWC */
CC_TENSOR_ND, /**< Nd Tensor */
CC_TENSOR_NC1HWC0, /**< NC1HWC0 */
CC_TENSOR_FRACTAL_Z, /**< FRACTAL_Z */
CC_TENSOR_NC1C0HWPAD,
CC_TENSOR_NHWC1C0,
CC_TENSOR_FSR_NCHW,
CC_TENSOR_FRACTAL_DECONV,
CC_TENSOR_C1HWNC0,
CC_TENSOR_FRACTAL_DECONV_TRANSPOSE,
CC_TENSOR_FRACTAL_DECONV_SP_STRIDE_TRANS,
CC_TENSOR_NC1HWC0_C04, /**< NC1HWC0, C0 =4*/
CC_TENSOR_FRACTAL_Z_C04, /**< FRACZ?????C0 =4 */
CC_TENSOR_CHWN,
CC_TENSOR_FRACTAL_DECONV_SP_STRIDE8_TRANS,
CC_TENSOR_HWCN,
CC_TENSOR_NC1KHKWHWC0, /** < KH,KW kernel h& kernel w maxpooling max output format*/
CC_TENSOR_HASHTABLE_LOOKUP_LOOKUPS = 20,
CC_TENSOR_HASHTABLE_LOOKUP_KEYS,
CC_TENSOR_HASHTABLE_LOOKUP_VALUE,
CC_TENSOR_HASHTABLE_LOOKUP_OUTPUT,
CC_TENSOR_HASHTABLE_LOOKUP_HITS = 24,
CC_TENSOR_C1HWNCoC0, /**< C1,H,W,N,Co,C0 6D diagonal format*/
CC_TENSOR_RESERVED
} ccTensorFormat_t;

/**
* @ingroup dnn
* @brief format of compare
*/
typedef enum tagCcCompareType {
CC_COMPARE_TYPE_LESS = 0,
CC_COMPARE_TYPE_LESS_EQUAL,
CC_COMPARE_TYPE_NOT_EQUAL,
CC_COMPARE_TYPE_EQUAL,
CC_COMPARE_TYPE_GREATER,
CC_COMPARE_TYPE_GREATER_EQUAL,
CC_COMPARE_TYPE_RESERVED
} ccCompareType_t;

/**
* @ingroup dnn
* @brief propagate Nan
*/
typedef enum tagCcNanPropagation {
CC_NAN_NOT_PROPAGATE = 0, /**< Nan numbers are not propagated */
CC_NAN_PROPAGATE, /**< Nan numbers are propagated */
CC_NAN_PROPAGATE_RESERVED
} ccNanPropagation_t;

/**
* @ingroup dnn
* @brief algorithm of convolution forward
*/
typedef enum tagCcConvolutionFwdAlgo {
CC_CONVOLUTION_FWD_ALGO_GEMM = 0, /**< matrix gemm algo */
CC_CONVOLUTION_FWD_ALGO_WINOGRAD, /**< Winograd Transform algo */
CC_CONVOLUTION_FWD_ALGO_GEMM_ACCU_FLOAT32,
CC_CONVOLUTION_FWD_ALGO_RESERVED
} ccConvolutionFwdAlgo_t;

#define ccCorrelationFwdAlgo_t ccConvolutionFwdAlgo_t

typedef enum tagCcConvolutionBwdAlgo {
CC_CONVOLUTION_BWD_ALGO_GEMM = 0, /**< matrix gemm algo */
CC_CONVOLUTION_BWD_ALGO_WINOGRAD, /**< Winograd Transform algo */
CC_CONVOLUTION_BWD_ALGO_GEMM_CO2IMG,
CC_CONVOLUTION_BWD_FILTER_GEM_ALGO,
CC_CONVOLUTION_BWD_ALGO_RESERVED
} ccConvolutionBwdAlgo_t;

#define ccCorrelationBwdAlgo_t ccConvolutionBwdAlgo_t

/**
* @ingroup dnn
* @brief algorithm of FullConnect forward
*/
typedef enum tagCcFullConnectFwdAlgo {
CC_FULLCONNECT_FWD_ALGO_HALF = 0,
CC_FULLCONNECT_FWD_ALGO_FLOAT32
} ccFullConnectFwdAlgo_t;

/**
* @ingroup dnn
* @brief mode of convolution
*/
typedef enum tagCcConvolutionMode {
CC_CONV_CONVOLUTION = 0, /**< math convolution */
CC_CONV_CROSS_CORRELATION, /**< cross-correlation convolution */
CC_CONV_DECONVOLUTION, /**< deconvolution, also named transposed convolution*/
CC_CONV_MODE_DEPTHWISE, /**< depthwise convolution*/
CC_CONV_MODE_RESERVED
} ccConvolutionMode_t;

#define ccCorrelationMode_t ccConvolutionMode_t

/**
* @ingroup dnn
* @brief mode of pooling
*/
typedef enum tagCcPoolingMode {
CC_POOLING_MAX = 0, /**< max pooling */
CC_POOLING_AVG, /**< average pooling */
CC_POOLING_L2, /**< L2 pooling */
CC_POOLING_AVG_FP32, /**< average pooling for training */
CC_POOLING_RESERVED
} ccPoolingMode_t;

/**
* @ingroup dnn
* @brief L0C accumulate algo of AvgPooling
*/
typedef enum tagCcPooingFwdAlgo {
CC_POOLING_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16
CC_POOLING_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32
} ccPooingFwdAlgo_t;

/**
* @ingroup dnn
* @brief mode of momentum
*/
typedef enum tagMomentumAlgo {
CC_MOMENTUM_UPDATE_FP32 = 0, /**< FP32 out */
CC_MOMENTUM_UPDATE_FP32_FP16, /**< FP32 and FP16 out */
CC_MOMENTUM_UPDATE_FP32_NESTEROV,
CC_MOMENTUM_UPDATE_FP32_FP16_NESTEROV,
CC_MOMENTUM_RESERVED
} ccMomentumAlgo_t;

/**
* @ingroup dnn
* @brief mode of partitionStrategy
*attention: if need to motify this struct,please must motify dPartitionStrategy_t
*/
typedef enum tagCcPartitionStrategy {
CC_PARTITION_STRATEGY_MOD = 0, /**< mod */
CC_PARTITION_STRATEGY_DIV, /**< div */
CC_PARTITION_STRATEGY_RESERVED
} ccPartitionStrategy_t;

/**
* @ingroup dnn
* @brief mode of assignOp
*/
typedef enum tagCcAssignOpMode {
CC_ASSIGN_ADD = 0, /**< assign add */
CC_ASSIGN_SUB, /**< assign sub */
CC_ASSIGN_RESERVED
} ccAssignOpMode_t;

/**
* @ingroup dnn
* @brief mode of arcSinCos
*/
typedef enum tagCcArcSinCosMode {
CC_ARCUS_SIN = 0, /**< asin */
CC_ARCUS_COS, /**< acos */
CC_ARCUS_RESERVED
} ccArcSinCosMode_t;

/**
* @ingroup dnn
* @brief mode of padding
*/
typedef enum tagCcPaddingMode {
CC_PADDING_CEIL = 0,
CC_PADDING_DIRECTASSIGN,
CC_PADDING_VALID,
CC_PADDING_SAME, /**< Padding values of 0 are always used */
CC_PADDING_CEIL_NEW, /*new ceil,use for backward compatibility*/
CC_PADDING_VALID_NEW, /*new valid,use for backward compatibility*/
CC_PADDING_SAME_NEW, /*new same,use for backward compatibility*/
CC_PADDING_RESERVED
} ccPaddingMode_t;

/**
* @ingroup dnn
* @brief mode of activation
*/
typedef enum tagCcActivationMode {
CC_ACTIVATION_SIGMOID = 0, /**< sigmoid */
CC_ACTIVATION_RELU, /**< ReLU */
CC_ACTIVATION_TANH, /**< tanh */
CC_ACTIVATION_CLIPPED_RELU, /**< clipped ReLU */
CC_ACTIVATION_ELU, /**< ELU */
CC_ACTIVATION_LEAKY_RELU,
CC_ACTIVATION_ABS, /**< Abs */
CC_ACTIVATION_RELU1, /**< relu1 */
CC_ACTIVATION_SOFTSIGN, /**< softsign */
CC_ACTIVATION_SOFTPLUS, /**< softplus */
CC_ACTIVATION_HARDSIGMOID, /**< hardsigmoid*/
CC_ACTIVATION_THRESHOLD_RELU, /**< threshold */
CC_ACTIVATION_SELU, /**< selu */
CC_ACTIVATION_LINEAR, /**< linear */
CC_ACTIVATION_RELU6, /**< relu6 */
CC_ACTIVATION_RESERVED
} ccActivationMode_t;

/**
* @ingroup dnn
* @brief mode of logical op mode
*/
typedef enum tagCcLogicalOpMode {
CC_LOGICAL_OP_NOT = 0, /**logical not**/
CC_LOGICAL_OP_AND, /**logical and**/
CC_LOGICAL_OP_OR, /**logical or**/
CC_LOGICAL_OP_XOR, /**logical xor**/
CC_LOGICAL_OP_RESERVED
} ccLogicalOpMode_t;

/**
* @ingroup dnn
* @brief mode of batchnorm
*/
typedef enum tagCcBatchNormMode {
CC_BATCHNORM_PER_ACTIVATION = 0, /**< bnScale, bnBias tensor dims are 1xCxHxW */
CC_BATCHNORM_SPATIAL, /**< bnScale, bnBias tensor dims are 1xCx1x1 */
CC_BATCHNORM_RESERVED
} ccBatchNormMode_t;

/**
* @ingroup dnn
* @brief mode of instancenorm
*/
typedef enum tagCcInstanceNormMode {
CC_INSTANCENORM_PER_ACTIVATION = 0, /**< inScale, inBias tensor dims are NxCxHxW */
CC_INSTANCENORM_SPATIAL, /**< inScale, inBias tensor dims are NxCx1x1 */
CC_INSTANCENORM_RESERVED
} ccInstanceNormMode_t;
/**
* @ingroup dnn
* @brief mode of layernorm
*/
typedef enum tagCcLayerNormMode {
CC_LAYERNORM_PER_ACTIVATION = 0, /**< lnScale, lnBias tensor dims are 1xCxHxW */
CC_LAYERNORM_SPATIAL, /**< lnScale, lnBias tensor dims are Nx1x1x1 */
CC_LAYERNORM_RESERVED
} ccLayerNormMode_t;

/**
* @ingroup dnn
* @brief softmax algorithm
*/
typedef enum tagCcSoftmaxAlgo {
CC_SOFTMAX_FAST = 0, /**< straightforward implementation */
CC_SOFTMAX_ACCURATE, /**< subtract max from every point to avoid overflow */
CC_SOFTMAX_LOG, /**< perform the Log softmax operation to avoid overflow */
CC_SOFTMAX_ACCURATE_FP32, /**< accurate mode for fp32 */
CC_SOFTMAX_RESERVED
} ccSoftmaxAlgo_t;

/**
* @ingroup dnn
* @brief softmax mode
*/
typedef enum tagCcSoftmaxMode {
CC_SOFTMAX_MODE_INSTANCE = 0, /**< compute the softmax over all C, H, W for each N */
CC_SOFTMAX_MODE_CHANNEL, /**< compute the softmax over all C for each H, W, N */
CC_SOFTMAX_MODE_HEIGHT, /**< compute the softmax over all H for each N, C, W */
CC_SOFTMAX_MODE_WIDTH, /**< compute the softmax over all W for each N, C, H */
CC_SOFTMAX_MODE_CLASS, /**< special mode: compute the softmax over all class for each N, H ,W */
CC_SOFTMAX_MODE_RESERVED
} ccSoftmaxMode_t;

/**
* @ingroup dnn
* @brief cross entropy mode
*/
typedef enum tagCcCrossEntropyMode {
CC_CROSS_ENTROPY_SPARSE_WITHOUT_REDUCTION = 0, /**< compute the sparse cross entropy without fused reduce mean */
CC_CROSS_ENTROPY_SPARSE_WITH_REDUCTION, /**< compute the sparse cross entropy with fused reduce mean*/
CC_CROSS_ENTROPY_WITHOUT_REDUCTION, /**< compute the cross entropy without fused reduce mean */
CC_CROSS_ENTROPY_WITH_REDUCTION, /**< compute the cross entropy with fused reduce mean */
CC_CROSS_ENTROPY_RESERVED
} ccCrossEntropyMode_t;

/**
* @ingroup dnn
* @brief concat mode
*/
typedef enum tagCcConcatMode {
CC_CONCAT_BY_BATCH = 0, /**< concat by batch */
CC_CONCAT_BY_FEATURE, /**< concat by feature */
CC_CONCAT_BY_HEIGHT, /**< concat by height */
CC_CONCAT_BY_WIDTH, /**< concat by width */
CC_CONCAT_BY_FLATTEN,
CC_CONCAT_RESERVED
} ccConcatMode_t;

/**
* @ingroup dnn
* @brief eltwise mode
*/
typedef enum tagCcEltwiseMode {
CC_ELTWISE_PROD = 0, /**< prod */
CC_ELTWISE_SUM, /**< sum */
CC_ELTWISE_MAX, /**< max */
CC_ELTWISE_RESERVED
} ccEltwiseMode_t;

/**
* @ingroup dnn
* @brief depthwise filter type
*/
typedef enum tagCcDepthwiseFilterType {
CC_Depthwise_FILTER_DEPTHWISE = 0, /**< depthwise filter */
CC_Depthwise_FILTER_POINTWISE, /**< pointwise filter */
CC_Depthwise_FILTER_RESERVED
} ccDepthwiseFilterType_t;

/**
* @ingroup dnn
* @brief sampler type
*/
typedef enum tagCcSamplerType {
CC_SAMPLER_BILINEAR = 0, /**< bilinear sampler algo */
CC_SAMPLER_RESERVED
} ccSamplerType_t;

/**
* @ingroup dnn
* @brief NMS type
*/
typedef enum tagCcNmsType {
CC_NMS_IOU = 0, /**< nms operation type, only IOU for now */
CC_NMS_RESERVED
} ccNmsType_t;

/**
* @ingroup dnn
* @brief Box Code type
*/
typedef enum tagCcBoxCodeType {
CC_BOX_CORNER = 1, /**< Box CodeType in detection nets */
CC_BOX_CENTER_SIZE,
CC_BOX_CORNER_SIZE,
CC_BOX_RESERVED
} ccBoxCodeType_t;

/**
* @ingroup dnn
* @brief split mode
*/
typedef enum tagSplitMode {
CC_SPLIT_MODE_SLICE = 0, /**< spilt data of one dim*/
CC_SPLIT_MODE_DUPLICATE, /**< copy data of one dim*/
CC_SPLIT_MODE_RESERVED
} ccSplitMode_t;

/**
* @ingroup dnn
* @brief mode of LRN
*/
typedef enum tagCcLRNMode {
CC_LRN_CROSS_CHANNELS = 0, /**< CROSS_CHANNELS */
CC_LRN_WITHIN_CHANNELS, /**< WITHIN_CHANNELS */
CC_LRN_RESERVED
} ccLRNMode_t;

/**
* @ingroup dnn
* @brief format of AIPP input
*/
typedef enum tagCcAippInputFormat {
CC_AIPP_INPUT_YUV420SP_U8 = 1,
/**< YUV420SP */ // mini,lite,tiny
CC_AIPP_INPUT_XRGB8888_U8,
/**< XRGB8888 */ // mini,lite,tiny
CC_AIPP_INPUT_NC1HWC0DI_FP16,
/**< NC1HWC0DI_FP16 */ // mini
CC_AIPP_INPUT_NC1HWC0DI_S8,
/**< NC1HWC0DI_S8 */ // mini
CC_AIPP_INPUT_RGB888_U8,
/**< RGB888 */ // mini,tiny
CC_AIPP_INPUT_ARGB8888_U8,
/**< ARGB8888 */ // lite
CC_AIPP_INPUT_YUYV_U8,
/**< YUYV */ // lite
CC_AIPP_INPUT_YUV422SP_U8,
/**< YUV422SP */ // lite
CC_AIPP_INPUT_AYUV444_U8,
/**< AYUV444 */ // lite
CC_AIPP_INPUT_YUV400_U8,
/**< YUV400 */ // mini,lite,tiny
CC_AIPP_INPUT_RESERVED
} ccAippInputFormat_t;

/**
* @ingroup dnn
* @brief mode of AIPP padding
*/
typedef enum tagCcAippPaddingMode {
CC_AIPP_PAD_DEFAULT_VALUE = 1, /**< CONFIG_VALUE */
CC_AIPP_PAD_LINE_COPY, /**< ROW_COL_COPY */
CC_AIPP_PAD_BLOCK_COPY, /**< BLOCK_COPY */
CC_AIPP_PAD_MIRROR_COPY, /**< MIRROR_COPY */
CC_AIPP_PAD_RESERVED
} ccAippPaddingMode_t;

/**
* @ingroup dnn
* @brief format of cmp type
*/
typedef enum tagCcccCMPType {
CC_CMP_EQ = 0,
CC_CMP_NE,
CC_CMP_LT,
CC_CMP_GT,
CC_CMP_GE,
CC_CMP_LE,
CC_CMP_TYPE_RESERVED
} ccCMPType_t;

/**
* @ingroup dnn
* @brief mode of logical op mode
*/
typedef enum tagCcResultType {
CC_Result_AND = 0, /**logical and**/
CC_Result_OR, /**logical or**/
CC_Result_RESERVED
} ccResultType_t;

/**
* @ingroup dnn
* @brief method of crop_and_resize operator
*/
typedef enum tagCcResizeMethod {
CC_RESIZE_METHOD_BILINEAR = 0, /** BILINEAR */
CC_RESIZE_METHOD_NEAREST, /** NEAREST */
CC_RESIZE_METHOD_RESERVED
} ccResizeMethod_t;

/**
* @ingroup dnn
* @brief mode of calculating new size of the images
*/
typedef enum tagCcResizeOutputDimMode {
RESIZE_OUTPUT_DIM_BY_ZOOM_FACTOR = 0, /**< Output dimension specified by zoom factor*/
RESIZE_OUTPUT_DIM_BY_SHRINK_FACTOR, /**< specified by shrink factor */
RESIZE_OUTPUT_DIM_EXPLICIT, /**< specified explicitly */
RESIZE_OUTPUT_DIM_RESERVED
} ccResizeOutputDimMode_t;

typedef enum tagCcYoloVersion {
CC_YOLO_V2 = 1, /**< YOLOv2 */
CC_YOLO_V3, /**< YOLOv3 */
CC_YOLO_RESERVED
} ccYoloVersion_t;

typedef enum tagCcAttentionAlgo {
// bahdanau-attention, for detail:https://pravn.wordpress.com/2017/11/14/bahdanau-attention/
CC_ATTENTION_ALGO_BAHDANAU = 0,
CC_ATTENTION_ALGO_NORMAL_BAHDANAU = 1,
CC_ATTENTION_ALGO_LUONG = 2,
CC_ATTENTION_ALGO_SCALED_LUONG = 3,
CC_ATTENTION_ALGO_RESERVED
} AttentionAlgo_t;
/**
* @ingroup dnn
* @brief desc of data layout
*/
typedef enum ccEmAttnDecoderDataLayout {
CC_ATTN_5D_TX1BX, //[max_time,Xt1,1,batch_size,Xt0]
CC_ATTN_5D_BTX1X, //[batch_size*max_time,Xt1,1,1,Xt0]
CC_ATTN_DL_RESERVED
} ccEmAttnDecoderDataLayout_t;

/**
* @ingroup dnn
* @brief operation of Reduce
*/
typedef enum {
CC_REDUCE_OP_SUM = 0, /**< sum */
CC_REDUCE_OP_MEAN, /**< mean */
CC_REDUCE_OP_PROD, /**< product */
CC_REDUCE_OP_ALL, /**< logical and */
CC_REDUCE_OP_ABS_SUM, /**< absolute sum */
CC_REDUCE_OP_SQUARE_SUM, /**< square sum */
CC_REDUCE_OP_MAX, /**< max */
CC_REDUCE_OP_MIN, /**< min */
CC_REDUCE_OP_LOGSUMEXP, /**< logsumexp */
CC_REDUCE_OP_INVALID
} ccReduceOpType_t;

/**
* @ingroup dnn
* @brief desc of tpye layout
*/
typedef enum {
LSH_PROJECTION_TYPE_UNKNOWN = 0,
LSH_PROJECTION_TYPE_SPARSE = 1,
LSH_PROJECTION_TYPE_DENSE = 2
} LSHProjectionType;

/**
* @ingroup dnn
* @brief activation para
*/
typedef struct tagCcActivationRelu {
double reluCoef; /* reluCoef for clipped RELU */
ccNanPropagation_t reluNanOpt;
} ccActivationRelu_t;
typedef union tagCcActivationPara {
ccActivationRelu_t actionRelu; /* relu Coef and NanOpt for clipped RELU */
double eluAlpha; /* eluAlpha for ELU */
float leakyReluNegativeSlope;
} ccActivationPara_u;

/**
* @ingroup dnn
* @bref mode of square
*/
typedef enum tagCcSquareMode {
CC_SQUARE_2 = 0, /* square */
} ccSquareMode_t;

/**
* @ingroup dnn
* @brief append operation type
*/
typedef enum tagCcOpType {
CC_OP_TYPE_NO_RELU = 0,
CC_OP_TYPE_RELU = 1,
CC_OP_TYPE_RELU6 = 2,
CC_OP_TYPE_INVALID
} ccOpType_t;

/**
* @ingroup dnn
* @brief struct define of fill operator type.
*/
typedef enum tagCcFillOpType {
CC_CONSTANT = 0,
CC_RANGE,
CC_LENGTH_RANGE,
CC_GIVEN_TENSOR,
CC_DIAGONAL,
CC_UNIFORM,
CC_UNIFORM_INT,
CC_UNIQUE_UNIFORM,
CC_GAUSSIAN,
CC_XAVIER,
CC_MSRA,
CC_FILL_OP_TYPE_RESERVED
} ccFillOpType_t;

/**
* @ingroup dnn
* @brief loss function reduction mode
*/
typedef enum tagCcLossReduction {
CC_LOSS_REDUCTION_NONE = 0,
CC_LOSS_REDUCTION_SUM,
CC_LOSS_REDUCTION_RESERVED
} ccLossReduction_t;

/**
* @ingroup dnn
* @brief max size of ccIntArray
*/
#define CC_INT_ARRAY_MAX_SIZE (8)

/**
* @ingroup dnn
* @brief struct define of int array less than 8.
*/
typedef struct tagIntArray {
uint32_t size;
int32_t value[CC_INT_ARRAY_MAX_SIZE];
} ccIntArray_t;

typedef enum tagCcPadMode {
CC_PAD_CONSTANT = 0, /*CONSTANT */
CC_PAD_REFLECT, /*REFLECT */
CC_PAD_SYMMETRIC, /*SYMMETRIC*/
CC_PAD_EDGE, /*EDGE */
CC_PAD_MODE_RESERVED
} ccPadMode_t;

/*
* @ingroup dnn
* @brief pad operation of extractImagePatches
*/
typedef enum {
CC_EXTRACT_IMAGE_PATCHES_PAD_VALID = 1,
CC_EXTRACT_IMAGE_PATCHES_PAD_SAME,
CC_EXTRACT_IMAGE_PATCHES_PAD_RESERVED
} ccExtractImagePatchesPadType_t;

/**
* @ingroup dnn
* @brief image dimensions of aipp input
*/
#define CC_AIPP_IMG_DIM (2)

/**
* @ingroup dnn
* @brief image channel number of aipp input
*/
#define CC_AIPP_IMG_CHN_NUM (4)

/**
* @ingroup dnn
* @brief element number of aipp color space convertion matrix
*/
#define CC_AIPP_CSC_MATRIX_DIM (9)

/**
* @ingroup dnn
* @brief element number of aipp color space convertion bias
*/
#define CC_AIPP_CSC_BIAS_DIM (3)

/**
* @ingroup dnn
* @brief struct define of AIPP operator
*/

typedef struct tagCcAipp {
ccAippInputFormat_t inputFormat;
ccDataType_t outputFormat;
int32_t srcImageSize[CC_AIPP_IMG_DIM];
int32_t loadStartPos[CC_AIPP_IMG_DIM];
int32_t loadSize[CC_AIPP_IMG_DIM];
int32_t scfInputSize[CC_AIPP_IMG_DIM];
int32_t scfOutputSize[CC_AIPP_IMG_DIM];
int32_t cscMatrix[CC_AIPP_CSC_MATRIX_DIM];
int32_t cscOutputBias[CC_AIPP_CSC_BIAS_DIM];
int32_t cscInputBias[CC_AIPP_CSC_BIAS_DIM];
int32_t dtcPixelMean[CC_AIPP_IMG_CHN_NUM];
float dtcPixelMin[CC_AIPP_IMG_CHN_NUM];
float dtcPixelVarReci[CC_AIPP_IMG_CHN_NUM];
ccAippPaddingMode_t paddingMode;
int32_t paddingSize[CC_AIPP_IMG_DIM * 2]; // up,down,left,right
float cpaddingVaule;
bool cscSwitch; // 0:off,1:on
bool scfSwitch; // 0:off,1:on
bool rbuvSwapSwitch;
bool axSwapSwitch;
bool singleLineMode;
bool cscConfigFlag;
bool dtcConfigFlag;
bool padConfigFlag;
bool commConfigFlag;
bool aippEn;
bool dyncAippFlag;
const void *dyncParaAddr;
bool rotationFlag;
} ccConvolutionAipp_t;


typedef struct tagCcQuantizePara {
ccConvolutionScaleValueMode_t scaleValueMode;
uint16_t *scale;
uint16_t *offsetq;
int32_t *offsetw;
uint8_t *allOffsetw;
uint8_t *offsetPad;
} CcQuantizePara_t;

typedef struct tagCcQuantize {
ccQuantizeAlgo_t quantAlgo;

ccConvolutionScaleType_t scaleWType; // show scaleRq,scaleDq type

CcQuantizePara_t scaleQ;

CcQuantizePara_t scaleRq;

CcQuantizePara_t scaleDq;

// need relu
bool reluFlag;

// relu6
uint16_t *scaleRelu6;
bool bConcat;
} ccQuantize_t;

typedef struct tagCcPad *ccPadDescriptor_t;

/**
* @ingroup dnn
* @brief operation of Cum
*/
typedef enum {
CC_CUM_OP_SUM = 0, /**< sum */
CC_CUM_OP_PROD, /**< product */
CC_CUM_OP_INVALID
} CumOpType;

/**
* @ingroup dnn
* @brief desciptor of tensor
*/
typedef struct tagCcTensor *ccTensorDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of filter tensor
*/
typedef struct tagCcFilter *ccFilterDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of convolution operator
*/
typedef struct tagCcConvolution *ccConvolutionDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of correlation operator
*/
typedef struct tagCcConvolution *ccCorrelationDescriptor_t;
typedef struct tagCcFullConnection_t *ccFullConnectionDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of pooling operator
*/
typedef struct tagCcPooling *ccPoolingDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of activation operator
*/
typedef struct tagCcActivation *ccActivationDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of batchToSpace operator
*/
typedef struct tagCcBatchToSpace *ccBatchToSpaceDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of spaceToBatch operator
*/
typedef struct tagCcSpaceToBatch *ccSpaceToBatchDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of svdf operator
*/
typedef struct tagCcSvdf *ccSvdfDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of crop operator
*/
typedef struct tagCcCrop *ccCropDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of interp operator
*/
typedef struct tagCcInterp *ccInterpDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of GetRegionBox operator
*/
typedef struct tagCcGetRegionBox *ccGetRegionBoxDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of CorrectBoxes operator
*/
typedef struct tagCorrectBoxes *ccCorrectBoxesDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of ClsProb operator
*/
typedef struct tagClsProb *ccClsProbDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of NMS operator
*/
typedef struct tagCcNms *ccNmsDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of MultiClassNms operator
*/
typedef struct tagCcMultiClassNms *ccMultiClassNmsDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of MscnnBoxOutput operator
*/
typedef struct tagCcMscnnBoxOutput *ccMscnnBoxOutputDescriptor_t;

/**
* @ingroup dnn
* @brief define of SoftmaxTree
*/
typedef void *ccSoftmaxTree_t;

/**
* @ingroup dnn
* @brief descriptor of exp operator
*/
typedef struct tagCcExp *ccExpDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of log operator
*/
typedef struct tagCcLog *ccLogDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of pow operator
*/
typedef struct tagCcPow *ccPowDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of padv2 operator
*/
typedef struct tagCcPadV2 *ccPadV2Descriptor_t;

/**
* @ingroup dnn
* @brief desciptor of ShapeClassify operator
*/
typedef struct tagCcShapeClassify *ccShapeClassifyDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of DetectionFull3DOutput operator
*/
typedef struct tagCcDetectionFull3DOutput *ccDetectionFull3DOutputDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of Quantize operator
*/
typedef struct tagCcQuantize *ccQuantizeDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of StridedSlice operator
*/
typedef struct tagCcStridedSlice *ccStridedSliceDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of StridedSliceAttrs operator
*/
typedef struct tagCcStridedSliceAttrs *ccStridedSliceAttrsDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of ResizeBilinear operator
*/
typedef struct tagCcResizeBilinear *ccResizeBilinearDescriptor_t;

typedef struct tagCcEltwise *ccEltwiseDescriptor_t;

typedef struct tagCcBatchNorm *ccBatchNormDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of Square operator
*/
typedef struct tagCcSquare *ccSquareDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of NonMaxSuppression operator
*/
typedef struct tagNonMaxSuppression *ccNonMaxSuppressionDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of NonMaxSuppression operator
*/
typedef struct tagUpsamplePara *ccUpsampleParaDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of ResizeNearestNeighbor operator
*/
typedef struct tagCcResizeNearestNeighbor *ccResizeNearestNeighborDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of Fill operator
*/
typedef struct tagCcFillParam *ccFillParamDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of Argmaxmin operator
*/
typedef struct tagCcArgmaxmin *ccArgmaxminDescriptor_t;

}; // namespace cce

#endif // DNN_BASE_HPP__

+ 4838
- 0
third_party/fwkacllib/inc/cce/dnn_op.h
File diff suppressed because it is too large
View File


+ 23
- 0
third_party/fwkacllib/inc/cce/dnn_struct.hpp View File

@@ -0,0 +1,23 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DNN_STRUCT_HPP__
#define DNN_STRUCT_HPP__

#include "dnn.h"
#include "dnn_struct_base.hpp"

#endif // DNN_STRUCT_HPP__

+ 894
- 0
third_party/fwkacllib/inc/cce/dnn_struct_base.hpp View File

@@ -0,0 +1,894 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DNN_STRUCT_BASE_HPP__
#define DNN_STRUCT_BASE_HPP__

#include "cce/cce_def.hpp"

namespace cce {

/**
* @ingroup dnn
* @brief max number of dimensions
*/
#define CC_DIM_MAX (8)

/**
* @ingroup dnn
* @brief max number of dimensions when use NC1HWC0 format
*/
#define CC_REALDIM_MAX (4)

/**
* @ingroup dnn
* @brief max input count of MscnnBoxOutput
*/
#define CC_MAX_INPUT_CNT (10)

/**
* @ingroup dnn
* @brief image dimensions of aipp input
*/
#define CC_AIPP_IMG_DIM (2)

/**
* @ingroup dnn
* @brief image channel number of aipp input
*/
#define CC_AIPP_IMG_CHN_NUM (4)

/**
* @ingroup dnn
* @brief element number of aipp color space convertion matrix
*/
#define CC_AIPP_CSC_MATRIX_DIM (9)

/**
* @ingroup dnn
* @brief element number of aipp color space convertion bias
*/
#define CC_AIPP_CSC_BIAS_DIM (3)

/**
* @ingroup dnn
* @brief parameter number of op exp/log/pow
*/
#define PARAM_CNT_THREE (3)

/**
* @ingroup dnn
* @brief parameter number of op nonmaxsuppression
*/
#define PARAM_CNT_TWO (2)
#define DIMCNT_NUMBER_ONE (1)
#define DIMCNT_NUMBER_TWO (2)
#define DIMCNT_NUMBER_FOUR (4)

#define COMMON_FORMAT_NCHW_N_INDEX (0)
#define COMMON_FORMAT_NCHW_C_INDEX (1)
#define COMMON_FORMAT_NCHW_H_INDEX (2)
#define COMMON_FORMAT_NCHW_W_INDEX (3)

/**
* @ingroup dnn
* @brief parameter number of op upsample
*/
#define UPSAMPLE_SCAL_DEFAULT_TWO (2)
#define UPSAMPLE_ILLEGAL_VALUE_1 (1)

/**
* @ingroup dnn
* @brief struct define of StridedSlice required params.
*/

typedef struct tagCcStridedSlice {
uint32_t dimCnt;
int32_t begin[CC_DIM_MAX];
int32_t end[CC_DIM_MAX];
int32_t strides[CC_DIM_MAX];
} ccStridedSlice_t;

/**
* @ingroup dnn
* @brief struct define of Strided_slice attrs
*/
typedef struct tagCcStridedSliceAttrs {
uint32_t beginMask;
uint32_t endMask;
uint32_t ellipsisMask;
uint32_t newAxisMask;
uint32_t shrinkAxisMask;
} ccStridedSliceAttrs_t;

/**
* @ingroup dnn
* @brief params of batchToSpace
*/
typedef struct tagCcBatchToSpace {
int32_t blockShapeLength;
int32_t blockShape[CC_DIM_MAX];
int32_t crops[2 * CC_DIM_MAX];
} ccBatchToSpace_t;

/**
* @ingroup dnn
* @brief params of spaceToBatch
*/
typedef struct tagCcSpaceToBatch {
int32_t blockShapeLength;
int32_t blockShape[CC_DIM_MAX];
int32_t paddings[2 * CC_DIM_MAX];
} ccSpaceToBatch_t;

/**
* @ingroup dnn
* @brief struct define of tensor
*/
typedef struct tagCcTensor {
ccTensorFormat_t format;
ccDataType_t dataType;
int32_t dimCnt;
int32_t realDimCnt;
uint32_t dataSize;
int32_t dim[CC_DIM_MAX];
int32_t stride[CC_DIM_MAX];
ccVecQuantizePara_t vecQuantizePara;
} ccTensor_t;

/**
* @ingroup dnn
* @brief struct define of filter tensor
*/
typedef struct tagCcFilter {
ccTensorFormat_t format;
ccDataType_t dataType;
int32_t dimCnt;
uint32_t dataSize;
int32_t dim[CC_DIM_MAX];
} ccFilter_t;

/**
* @ingroup dnn
* @brief struct define of convolution operator
*/
typedef struct tagCcConvolution {
ccConvolutionMode_t mode;
ccPaddingMode_t padMode;
int32_t dimCnt;
int32_t padding[2 * (CC_DIM_MAX - 2)];
int32_t filterStride[CC_DIM_MAX - 2];
int32_t dilation[CC_DIM_MAX - 2];
int32_t group;
ccQuantizeDescriptor_t quantInfo;
ccConvolutionAipp_t aippInfo;
int32_t adj[CC_DIM_MAX - 2];
int32_t targetShape[CC_DIM_MAX - 2];
int32_t beforePadding[2 * (CC_DIM_MAX - 2)]; // pad before conv
uint32_t reluFlag;
int64_t concatBatchSize;
} ccConvolution_t;

#define ccCorrelation_t ccConvolution_t
typedef struct tagCcFullConnection_t {
ccQuantizeDescriptor_t quantInfo;
uint32_t infoTabSize;
const void *infoTab;
bool reluFlag;
ccFullConnectFwdAlgo_t algo;
} ccFullConnection_t;

typedef struct tagCcConcatFour2Five_t {
uint32_t branchNum; // how many branch for box or class
uint32_t classNum; // box branch's classNum is four, class branch's classNum is class number
} ccConcatFour2Five_t;

typedef struct tagCcTransdata_t {
uint64_t scaleQAddr;
uint8_t scaleQValueMode;
uint64_t offsetQAddr;
uint8_t quantAlgo;
uint8_t quantize8bitFlag;
} ccTransdata_t;
/**
* @ingroup dnn
* @brief struct define of pooling operator
*/
typedef struct tagCcPooling {
ccPoolingMode_t mode;
ccPaddingMode_t padMode;
ccNanPropagation_t maxpoolingNanOpt;
int32_t dimCnt;
int32_t windowDim[CC_DIM_MAX - 2];
int32_t padding[CC_DIM_MAX - 2];
int32_t stride[CC_DIM_MAX - 2];
int32_t dataMode;
int32_t ceilMode;
ccQuantizeDescriptor_t quantInfo;
ccPooingFwdAlgo_t algo;
} ccPooling_t;

/**
* @ingroup dnn
* @brief struct define of activation operator
*/
typedef struct tagCcActivation {
ccActivationMode_t mode;
ccNanPropagation_t reluNanOpt;
double coef; /* ceiling for clipped RELU, alpha for ELU */
ccActivationPara_u activationPara;
} ccActivation_t;

/**
* @ingroup dnn
* @brief struct define of svdf operator
*/
typedef struct tagCcSvdf {
ccTensorFormat_t format;
ccDataType_t dataType;
uint32_t batches;
uint32_t features;
uint32_t rank;
uint32_t inputSize;
uint32_t memorySize;
} ccSvdf_t;

/**
* @ingroup dnn
* @brief struct define of svdf operator
*/
typedef struct tagCcHashTableLookup {
ccTensorFormat_t format;
ccDataType_t lookupType;
ccDataType_t keyType;
ccDataType_t valueType;
ccDataType_t outputType;
ccDataType_t hitsType;
uint32_t lookups;
uint32_t keys;
uint32_t rows;
uint32_t features;
uint16_t valueScale;
uint16_t outputScale;
uint16_t valueOffset;
uint16_t outputOffset;
} ccHashTableLookup_t;

/**
* @ingroup dnn
* @brief struct define of prelu operator
*/
typedef struct tagCcPRelu {
ccNanPropagation_t reluNanOpt;
int32_t slopeCount;
bool channelShared;
} ccPRelu_t;

/**
* @ingroup dnn
* @brief struct define of crop operator
*/
typedef struct tagCcCrop {
int32_t startAxis;
int32_t offset[CC_DIM_MAX];
int32_t offsetCnt;
} ccCrop_t;

/**
* @ingroup dnn
* @brief struct define of SpatialTransformer operator
*/
typedef struct tagCcSpatialTransformer {
ccSamplerType_t samplerType;
ccDataType_t dataType;
int32_t dimCnt;
uint64_t dim[CC_DIM_MAX];
uint64_t alignCorner;
} ccSpatialTransformer_t;

/**
* @ingroup dnn
* @brief struct define of ShiftTransformer operator
*/
typedef struct tagCcShiftTransformer {
ccSamplerType_t samplerType;
double xPreDefined;
double yPreDefined;
bool xShift;
bool yShift;
int32_t gridH;
int32_t gridW;
} ccShiftTransformer_t;

/**
* @ingroup dnn
* @brief struct define of FasterRcnnProposal operator
*/
typedef struct tagCcFasterRcnnProposal {
int32_t preNMStopK;
int32_t postNMStopK;
float nmsTresh;
float minSize;
float featStride;
float baseSize;
int32_t ratioCnt;
int32_t scaleCnt;
float *ratio;
float *scale;
int32_t imgH;
int32_t imgW;
} ccFasterRcnnProposal_t;

/**
* @ingroup dnn
* @brief struct define of LRN operator
*/
typedef struct tagCcLRN {
ccLRNMode_t lrnMode;
int32_t lrnN;
double lrnAlpha;
double lrnBeta;
double lrnK;
} ccLRN_t;

/**
* @ingroup dnn
* @brief struct define of instanceNorm
*/
typedef struct tagCcInstancenorm {
ccInstanceNormMode_t mode;
double epsilon;
} ccInstancenorm_t;

/**
* @ingroup dnn
* @brief struct define of assignOp operator
*/
typedef struct tagCcAssignOp {
ccAssignOpMode_t assignOpMode;
} ccAssignOp_t;

/**
* @ingroup dnn
* @brief struct define of arcSinCos operator
*/
typedef struct tagCcArcSinCos {
ccArcSinCosMode_t arcSinCosMode;
} ccArcSinCos_t;

/**
* @ingroup dnn
* @brief struct define of Detectpostprocess operator
*/
typedef struct tagCcDetectpostprocess {
int32_t numClasses;
float confThreshold;
float nmsThreshold;
int32_t outTopK;
float bboxRegWeightsDx;
float bboxRegWeightsDy;
float bboxRegWeightsDw;
float bboxRegWeightsDh;
} ccDetectpostprocess_t;
/**
* @ingroup dnn
* @brief struct define of FasterRcnnDetectionOutput operator
*/
typedef struct tagCcFasterRcnnDetectionOutput {
int32_t numClasses;
float nmsThreshold;
float postConfThreshold;
int32_t imgH;
int32_t imgW;
int32_t batchSize;
} ccFasterRcnnDetectionOutput_t;

/**
* @ingroup dnn
* @brief struct define of SsdDetectionOutput operator
*/
typedef struct tagCcSsdDetectionOutput {
int32_t numClasses;
int32_t backgroundLabelId;
double preConfThreshold;
int32_t preTopK;
double nmsThreshold;
double nmsEta;
ccBoxCodeType_t codeType;
int32_t outTopK;
bool shareLocation;
bool varianceEncodedInTarget;
uint32_t boxTypeNum;
float var[4];
uint32_t variance_num;
} ccSsdDetectionOutput_t;

/**
* @ingroup dnn
* @brief struct define of RefinedetDetectionOutput operator
*/
typedef struct tagCcRefinedetDetectionOutput {
int32_t numClasses;
int32_t backgroundLabelId;
double preConfThreshold;
int32_t preTopK;
double nmsThreshold;
double nmsEta;
ccBoxCodeType_t codeType;
int32_t outTopK;
bool shareLocation;
bool varianceEncodedInTarget;
uint32_t boxTypeNum;
float var[4];
uint32_t variance_num;
double objectness_score;
} ccRefinedetDetectionOutput_t;

/**
* @ingroup dnn
* @brief struct define of MsrGenerateRpnProposals operator
*/
typedef struct tagCcMsrGenerateRpnProposals {
int32_t preNmsTopK;
int32_t postNmsTopK;
float nmsThreshold;
float rpnMiniSize;
int32_t imgH;
int32_t imgW;
uint32_t boxTypeNum;
float scoreThreshold;
} ccMsrGenerateRpnProposals_t;

/**
* @ingroup dnn
* @brief struct define of RetinaPostprocessor operator
*/
typedef struct tagCcRetinaPostprocessor {
int32_t numClasses;
int32_t maxDetections;
float nmsThreshold;
float scoreThreshold;
int32_t imgH;
int32_t imgW;
uint32_t boxTypeNum;
float mean[4];
int32_t meanNum;
float std[4];
int32_t stdNum;
int32_t outputNum;
bool ocrFlag;
} ccRetinaPostprocessor_t;

/**
* @ingroup dnn
* @brief struct define of GenerateSsdAnchors operator
*/
typedef struct tagCcGenerateSsdAnchors {
int32_t featureMapShapeList[20];
uint32_t featureMapShapeListSize;
int32_t boxSpecsNum[10];
uint32_t boxSpecsNumSize;
float scales[10];
uint32_t scalesNum;
float aspectRatios[10];
uint32_t aspectRatiosNum;
int32_t baseAnchorSize[2];
uint32_t baseAnchorSizeNum;
int32_t anchorStride[2];
uint32_t anchorStrideNum;
int32_t anchorOffset[2];
uint32_t anchorOffsetNum;
bool reduceBoxesInLowestLayer;
float minScale;
float maxScale;
int32_t imgH;
int32_t imgW;
} ccGenerateSsdAnchors_t;

/**
* @ingroup dnn
* @brief struct define of MscnnBoxOutput operator
*/
typedef struct tagCcMscnnBoxOutput {
double fgThreshold;
double nmsThreshold;
ccNmsType_t nmsType;
int32_t fieldH[CC_MAX_INPUT_CNT];
int32_t fieldW[CC_MAX_INPUT_CNT];
int32_t downsampleRate[CC_MAX_INPUT_CNT];
int32_t defaultBoxCnt;
double fieldWhr;
double fieldXyr;
int32_t maxNmsNum;
int32_t maxPostNmsNum;
double minSize;
} ccMscnnBoxOutput_t;

/**
* @ingroup dnn
* @brief struct define of NMS operator
*/
typedef struct tagCcNms {
int32_t numClasses;
int32_t backgroundLabelId;
double preConfThreshold;
int32_t preTopK;
double nmsThreshold;
double nmsEta;
int32_t postTopK;
int32_t outTopK;
double postConfThreshold;
bool shareLocation;
} ccNms_t;

/**
* @ingroup dnn
* @brief struct define of NMS/MultiClassNMS operator
*/
typedef struct tagCcMultiClassNms {
uint64_t numClasses;
float objThreshold;
float nmsThreshold;
float clsThreshold;
bool normal;
uint64_t coorType;
} ccCcMultiClassNms_t;

/**
* @ingroup dnn
* @brief struct define of YoloDetectionOutput operator
*/
typedef struct tagCcYoloDetectionOutput {
ccYoloVersion_t yoloVersion;
uint32_t netH;
uint32_t netW;
uint32_t postTopK;
uint32_t classes;
float nmsThreshold;
float iouThreDecay;
float coorScaleFactor;
bool relative;
float objThreshold;
float clsThreshold;
uint32_t biasNum;
float *bias;
} ccYoloDetectionOutput_t;

/**
* @ingroup dnn
* @brief struct define of GetRegionBox operator
*/
#ifndef CC_MAX_YOLO_BIAS_NUM
#define CC_MAX_YOLO_BIAS_NUM (16)
#endif

typedef struct tagCcGetRegionBox {
uint32_t biasNum;
uint32_t H;
uint32_t W;
float bias[CC_MAX_YOLO_BIAS_NUM];
} ccGetRegionBox_t;

/**
* @ingroup dnn
* @brief struct define of CorrectBoxes operator
*/
typedef struct tagCorrectBoxes {
uint32_t netW;
uint32_t netH;
bool relative;
} ccCorrectBoxes_t;

/**
* @ingroup dnn
* @brief struct define of ClsProb operator
*/
typedef struct tagClsProb {
float objThreshold;
} ccClsProb_t;

/**
* @ingroup dnn
* @brief struct define of SsdPriorBox operator
*/
typedef struct tagCcSsdPriorBox {
ccBoxCodeType_t codeType;
double *minSize;
int32_t minSizeNum;
double *maxSize;
int32_t maxSizeNum;
double *aspectRatio;
int32_t aspectRatioNum;
double *variance;
int32_t varianceNum;
int32_t imgH;
int32_t imgW;
double stepH;
double stepW;
double offset;
bool flip;
bool clip;
} ccSsdPriorBox_t;

/**
* @ingroup dnn
* @brief struct define of Yolo2Region operator
*/
typedef struct tagCcYolo2Region {
ccSoftmaxTree_t softmaxTree;
bool softmax;
bool background;
bool treeSoftmax;
} ccYolo2Region_t;

/**
* @ingroup dnn
* @brief struct define of YoloRegion operator
*/
typedef struct tagCcYoloRegion {
ccSoftmaxTree_t softmaxTree;
bool softmax;
bool background;
bool treeSoftmax;
int32_t classes;
int32_t coords;
int32_t boxes;
ccYoloVersion_t yoloV;
} ccYoloRegion_t;

/**
* @ingroup dnn
* @brief struct define of power operator
*/
typedef struct tagCcPower {
float scale;
float shift;
float power;
} ccPower_t;

/**
* @ingroup dnn
* @brief struct define of exp operator
*/
typedef struct tagCcExp {
ccDataType_t dataType;
uint32_t paramCnt;
} ccExp_t;

/**
* @ingroup dnn
* @brief struct define of exp operator
*/
typedef struct tagCcLog {
ccDataType_t dataType;
uint32_t paramCnt;
} ccLog_t;

/**
* @ingroup dnn
* @brief struct define of pow operator
*/
typedef struct tagCcPow {
ccDataType_t dataType;
uint32_t paramCnt;
} ccPow_t;

/**
* @ingroup dnn
* @brief struct define of padv2 operator
*/
typedef struct tagCcPadV2 {
ccPadMode_t padMode;
void *padValue;
ccDataType_t padValueType;
int32_t padDimCnt;
int32_t padShapeLow[CC_DIM_MAX];
int32_t padShapeHigh[CC_DIM_MAX];
} ccPadV2_t;

/**
* @ingroup dnn
* @brief struct define of psROIPooling operator
*/
typedef struct tagCcPsRoiPooling {
ccPoolingMode_t poolingMode;
int32_t pooledH;
int32_t pooledW;
float spatialScale;
float padRatio;
int32_t groupSize;
int32_t outputDim;
} ccPsRoiPooling_t;

/**
* @ingroup dnn
* @brief struct define of RoIAlign operator
*/
typedef struct tagCcRoiAlign {
int32_t pooledH;
int32_t pooledW;
float spatialScale;
int32_t samplingRatio;
} ccRoiAlign_t;

/**
* @ingroup dnn
* @brief struct define of RoiInterpPooling operator
*/
typedef struct tagCcRoiInterpPooling {
int32_t pooledH;
int32_t pooledW;
int32_t poolKernelH;
int32_t poolKernelW;
int32_t pooledTailH;
int32_t pooledTailW;
float spatialScaleH;
float spatialScaleW;
} ccRoiInterpPooling_t;

/**
* @ingroup dnn
* @brief struct define of DetectionFull3DOutput operator
*/
typedef struct tagCcDetectionFull3DOutput {
int32_t imageWidth;
int32_t imageHeight;
int32_t numAngleBins;
float trcMarginRatioX;
float trcMarginRatioY;
int32_t pitchRangeD;
int32_t pitchPresetD;
float mountHeight;
int32_t visiblenessBins;
float meanVisibleness;
bool discreteVisibleness;
} ccDetectionFull3DOutput_t;

/**
* @ingroup dnn
* @brief struct define of MsrFastRcnnPredictions operator
*/
typedef struct tagMsrFastRcnnPredictions {
int32_t numClasses; // num of classes
float scoreThreshold; // the threshold of the score
double nmsThreshold; // the threshold of nms
int32_t postTopK;
int32_t outTopK;
int32_t imgH; // the height of image
int32_t imgW; // the width of image
} ccMsrFastRcnnPredictions_t;

typedef struct tagCcResizeBilinear {
ccResizeOutputDimMode_t resizeOutputDimMode;
bool alignCorners;
int32_t zoom_factor;
int32_t shrink_factor;
int32_t height;
int32_t width;
int32_t pad_begin;
int32_t pad_end;
} ccResizeBilinear_t;

typedef struct tagCcResizeNearestNeighbor {
bool alignCorners;
int32_t height;
int32_t width;
} ccResizeNearestNeighbor_t;

typedef struct tagCcEltwise {
ccQuantize_t *quantInfo;
bool reluFlag;
} ccEltwise_t;

typedef struct tagCcBatchNorm {
bool reluFlag;
} ccBatchNorm_t;

typedef struct tagCcPad {
ccPadMode_t padMode;
float padValue;
int32_t htoppad; // padLow[0]
int32_t hbottompad; // padHigh[0]
int32_t wleftpad; // padLow[1]
int32_t wrightpad; // padHigh[1]
} ccPad_t;

typedef struct tagCcSubCondition {
uint32_t BaseCondValue[4];
ccCMPType_t condType[4];
ccResultType_t resultType;
} ccSubCondition;

typedef struct tagCcShapeClassifyCond {
uint32_t subConditionNum;
ccResultType_t resultType;
uint32_t true_value;
ccSubCondition subCond[2];
} ccShapeClassifyCond;

#ifndef CC_SHAPE_CLASSIFY_CONDITION_NUM
#define CC_SHAPE_CLASSIFY_CONDITION_NUM (8)
#endif

typedef struct tagCcShapeClassify {
uint32_t shapeClassifyConditionNum;
uint32_t defaultValue;
ccShapeClassifyCond shapeClassifyCond[CC_SHAPE_CLASSIFY_CONDITION_NUM];
} ccShapeClassify_t;

/**
* @ingroup dnn
* @bref struct define of square operator
*/
typedef struct tagCcSquare {
ccSquareMode_t mode;
} ccSquare_t;

/*
* @ingroup dnn
* @brief operation of segment reduction
*/
typedef enum {
CC_SEGMENT_REDUCTION_OP_SUM = 0, /**< sum */
CC_SEGMENT_REDUCTION_OP_INVALID
} ccSegmentReductionOpType_t;

typedef struct tagCcFillParam {
// The filler type.
ccFillOpType_t fillType;
ccDataType_t valueDatatype;
const void *value; // the value in constant fill
const void *min; // the min value in uniform fill
const void *max; // the max value in uniform fill
const void *mean; // the mean value in Gaussian fill
const void *std; // the std value in Gaussian fill
// the seed used to generate data in Gaussian and uniform fill
int64_t seed1;
int64_t seed2;
} ccFillParam_t;

typedef struct tagNonMaxSuppression {
ccDataType_t dataType;
uint32_t paraCount;
} ccNonMaxSuppression_t;

typedef struct tagCcArgmaxmin {
int32_t axisType;
bool outMaxVal;
int64_t topK;
int64_t reduceSize;
int64_t reduceStride;
int64_t axis;
bool keepDims;
} ccArgmaxmin_t;

typedef struct tagUpsamplePara {
int32_t scale;
int32_t scaleHeight;
int32_t scaleWidth;
int32_t upsampleHeight;
int32_t upsampleWidth;
bool padOutHeight;
bool padOutWidth;
} ccUpsamplePara_t;

typedef struct tagCcConcatFive2Four_t {
ccTransForLossMode_t mode;
uint32_t classNum;
} ccConcatFive2Four_t;

}; // namespace cce
#endif // DNN_STRUCT_BASE_HPP__

+ 155
- 0
third_party/fwkacllib/inc/cce/fwk_adpt_struct.h View File

@@ -0,0 +1,155 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef FWK_ADPT_STRUCT_H__
#define FWK_ADPT_STRUCT_H__

#include <cstdint>

namespace aicpu {
namespace FWKAdapter {

// API RETURN CODE
enum FWKAdptAPIRetCode {
FWK_ADPT_SUCCESS = 0, // success
FWK_ADPT_NOT_INIT = 1, // not init
FWK_ADPT_ALLOC_FAILED = 2, // allocate memory failed
FWK_ADPT_PARAM_INVALID = 3, // invalid input param
FWK_ADPT_PARAM_PARSE_FAILED = 4, // parase input param failed
FWK_ADPT_NATIVE_ERROR = 5, // error code
FWK_ADPT_NOT_SUPPORT_OPTYPE = 6, // unsupport operate type
FWK_ADPT_INTERNAL_ERROR = 7, // adpter internal error
FWK_ADPT_NOT_SUPPORT_DATATYPE = 8, // unsupport input/output data type
FWK_ADPT_KERNEL_ALREADY_RUNING = 9, // kernel already runing, not support parallel run
FWK_ADPT_SESSION_NOT_EXIST = 10, // session id not exist
FWK_ADPT_SESSION_ALREADY_EXIST = 11, // session id alread exist for create session
FWK_ADPT_NATIVE_END_OF_SEQUENCE = 12, // end of sequence
FWK_ADPT_EXTEND_TYPE_NOT_EXIST = 13, // extend info type not exist
FWK_ADPT_UNKNOWN_ERROR = 99 // unknown error code
};

// FWKAdapter operate type
// Notice: add new operate type need check with OMM, and make sure append to the end line.
enum FWKOperateType {
FWK_ADPT_SESSION_CREATE = 0,
FWK_ADPT_KERNEL_RUN,
FWK_ADPT_KERNEL_DESTROY,
FWK_ADPT_SESSION_DESTROY,
FWK_ADPT_SINGLE_OP_RUN,
FWK_ADPT_KERNEL_RUN_NO_SESS,
};

// Extend Info type for task
enum FWKTaskExtInfoType {
FWK_ADPT_EXT_SHAPE_TYPE = 0,
FWK_ADPT_EXT_INPUT_SHAPE,
FWK_ADPT_EXT_OUTPUT_SHAPE,
FWK_ADPT_EXT_UPDATE_ADDR,
FWK_ADPT_EXT_OP_NAME,
FWK_ADPT_EXT_SESSION_INFO,
FWK_ADPT_EXT_BITMAP,
FWK_ADPT_EXT_TOPIC_TYPE,
FWK_ADPT_EXT_ASYNCWAIT,
FWK_ADPT_EXT_INVALID
};

enum FWKExtTopicType {
FWK_ADPT_TOPIC_DEVICE_ONLY = 0,
FWK_ADPT_TOPIC_DEVICE_FIRST,
FWK_ADPT_TOPIC_HOST_ONLY,
FWK_ADPT_TOPIC_HOST_FIRST,
FWK_ADPT_TOPIC_INVALID
};

enum FWKExtUpdateAddrType {
FWK_ADPT_UPDATE_NULL = 0,
FWK_ADPT_UPDATE_INPUT,
FWK_ADPT_UPDATE_OUTPUT,
FWK_ADPT_UPDATE_INPUT_OUTPUT
};

enum FWKExtWaitType {
FWK_ADPT_WAIT_TYPE_NULL = 0,
FWK_ADPT_WAIT_TYPE_EVENT,
FWK_ADPT_WAIT_TYPE_INVALID
};

#pragma pack(push, 1)
// API Parameter Structure
struct StrFWKKernel {
FWKOperateType opType;
uint64_t sessionID; // unique

uint64_t stepIDAddr; // step id addr
uint64_t kernelID; // run kernel id, unique in session
uint64_t nodeDefLen; // nodeDef protobuf len
uint64_t nodeDefBuf; // NodeDef protobuf offset addr, need convert to void*
uint64_t funDefLibLen; // FunctionDefLibrary protobuf len
uint64_t funDefLibBuf; // FunctionDefLibrary protobuf addr which use in NodeDef, need convert to void*

uint64_t inputOutputLen; // InputOutput shap protobuf len
uint64_t inputOutputBuf; // InputOutput shap protobuf addr, need convert to void*
uint64_t workspaceBaseAddr; // Workspace base addr, need convert to void*
uint64_t inputOutputAddr; // InputOutput addr, need convert to void*

uint64_t extInfoLen; // extend info total length
uint64_t extInfoAddr; // extend info addr, ExtInfo structure
};
#pragma pack(pop)

typedef StrFWKKernel FWKOperateParam;

// Extent info ShapeAndType
const uint32_t kMaxShapeDims = 8;
#pragma pack(push, 1)
struct ShapeAndType {
int32_t type;
int64_t dims[kMaxShapeDims];
};
#pragma pack(pop)

// Extend info structure for extInfoAddr
const uint32_t kExtInfoHeadSize = 8;

#pragma pack(push, 1)
struct ExtInfo {
int32_t infoType; // extend type
uint32_t infoLen; // length for infoMsg
char infoMsg[0]; // extend value
};
#pragma pack(pop)

#pragma pack(push, 1)
struct ResultSummary {
uint64_t shape_data_ptr; // shape data addr, need convert to void*
uint64_t shape_data_size; // num of dims
uint64_t raw_data_ptr; // raw data addr, need convert to void*
uint64_t raw_data_size; // size of raw data
};
#pragma pack(pop)

#pragma pack(push, 1)
struct AsyncWait {
uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait
uint32_t waitId; // wait id, GE refresh
uint32_t timeOut; // reserved
uint64_t reserved;
};
#pragma pack(pop)
} // end namespace FWKAdapter
} // namespace aicpu

#endif // FWK_ADPT_STRUCT_H__

+ 56
- 0
third_party/fwkacllib/inc/cce/l2fusion_struct.hpp View File

@@ -0,0 +1,56 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef L2FUSION_STRUCT_HPP_
#define L2FUSION_STRUCT_HPP_

#include <map>
#include <string>
#include "runtime/kernel.h"

#define L2_DYNAMIC_SPLIT_NUM

using namespace std;

namespace fusion {

typedef struct tagL2Data {
uint32_t l2Index;
uint64_t l2Addr;
uint64_t l2PageNum;
} L2Data_t;

typedef std::map<uint64_t, L2Data_t> L2DataMap_t; // the key is ddr addr
typedef std::pair<uint64_t, L2Data_t> L2DataPair_t; // the key is ddr addr

typedef struct TagTaskL2Info {
string nodeName;
rtL2Ctrl_t l2ctrl;

L2DataMap_t input;
L2DataMap_t output;
uint32_t isUsed;
} TaskL2Info_t;

typedef std::map<uint32_t, TaskL2Info_t> TaskL2InfoMap_t; // the key is nodeId
typedef std::pair<uint32_t, TaskL2Info_t> TaskL2InfoPair_t; // the key is nodeId

typedef std::map<string, TaskL2Info_t> TaskL2InfoFEMap_t; // the key is nodeName
typedef std::pair<string, TaskL2Info_t> TaskL2InfoFEPair_t; // the key is nodeName

} // namespace fusion

#endif // L2FUSION_STRUCT_HPP_

+ 65
- 0
third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h View File

@@ -0,0 +1,65 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef FUSION_ENGINE_HPP_
#define FUSION_ENGINE_HPP_

#include "cce/cce.h"
#include "graph/compute_graph.h"
#include "proto/task.pb.h"

#include <map>
#include <vector>

using namespace domi;
using namespace std;

namespace fusion {
enum {
FUSION_STATUS_SUCCESS = 0,
FUSION_STATUS_FAIL = 1,
};

typedef struct {
uint64_t weightSize;
uint64_t memorySize;
uint8_t *dataMemBase;
uint8_t *weightMemBase;
uint32_t l2Enable; // 1 //1 - enable l2 buffer allocation, 0 - disable l2 buffer allocation
uint32_t fusionEnable; // 1 // 1 - enable buffer fusion, 0 - disable buffer fusion
} ModelRes;

static const std::string SCOPE_ID_ATTR = "fusion_scope";
static const std::string L2FUSION_DYNAMIC_CONVERGE_OP = "l2fusion_dynamic_converge_op";
static const std::string L2FUSION_DYNAMIC_SPLIT_NUM = "l2fusion_dynamic_split_num";
static const std::string FUSION_VIRTUAL_OP = "fusion_virtual_op";
static const std::string FUSION_MULTI_BATCH_STRIDE = "fusion_multi_bathc_stride";

#define TVM_TYPE 1

typedef std::map<int64_t, std::vector<ge::NodePtr>> kScopeNodeMap_t;
typedef std::pair<int64_t, std::vector<ge::NodePtr>> kScopeNodePair_t;

uint32_t BufferFusion(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph, bool enable_l2dynamic = true);
uint32_t BufferFusionTrain(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
uint32_t GraphFusion(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
uint32_t FusionTaskBuild(cce::ccHandle_t ccHandle, ge::ComputeGraphPtr fusionGraph, ge::Buffer &buffer,
ModelRes &modelRes, std::vector<TaskDef> &task_def_list_);
void FusionTaskBuildComplete(std::vector<cce::ccHandle_t> cchandleList);
uint32_t GraphFusionTrain(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
} // namespace fusion

#endif // FUSION_ENGINE_HPP_

+ 54
- 0
third_party/fwkacllib/inc/cce/taskdown_api.h View File

@@ -0,0 +1,54 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef TASKDOWN_API_H_
#define TASKDOWN_API_H_

#include <map>
#include <vector>
#include "cce/cce.h"
#include "l2fusion_struct.hpp"
#include "taskdown_common.hpp"

namespace cce {

#define CC_FUSION_OP_MAX 32

typedef struct tagOpAddrsInfo {
void *addrPos;
uintptr_t addrData;
} ccOpAddrsInfo;

#ifdef __cplusplus
extern "C" {
#endif

ccStatus_t ccUpdateKernelArgs(ccOpContext &opContext, uint64_t dataBaseAddr, uint64_t weightBaseAddr,
uint64_t variableBaseAddr, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr);

#ifdef __cplusplus
}
#endif

ccStatus_t ccGetKernelArgsAddrs(ccOpContext &opContext, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr,
std::vector<ccOpAddrsInfo> &opAddrsInfo);

ccStatus_t ccSetKernelArgs(std::vector<ccOpAddrsInfo> &dateInfo);

ccStatus_t ccGetKernelTypeByOpId(uint32_t opId, ccKernelType &kernelType);

} // namespace cce
#endif // TASKDOWN_API_H_

+ 108
- 0
third_party/fwkacllib/inc/cce/taskdown_common.hpp View File

@@ -0,0 +1,108 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef TASKDOWN_COMMON_H_
#define TASKDOWN_COMMON_H_

#include <map>
#include "cce/cce_def.hpp"
#include "common/attr_list.hpp"
#include "l2fusion_struct.hpp"

namespace cce {

#define CC_FUSION_OP_MAX 32

typedef enum tagccKernelType {
CCE_AI_CORE = 0, /* cce aicore */
CCE_AI_CPU = 1, /* cce aicpu */
TE = 2, /* te operator*/
CUSTOMIZED = 3, /* customized operator */
TE_AI_CORE = 4, /* te aicore operator*/
TE_AI_CPU = 5, /* te aicpu operator */
AI_CPU = 6, /* aicpu */
CUST_AI_CPU = 7, /* custom aicpu*/
HOST_CPU = 8, /* host cpu */
INVALID = 10000 /* unknown kernel type */
} ccKernelType;

typedef struct tagOpContext {
ccKernelType kernelType;
uint32_t opId;
uint32_t kernelFuncId;
uint32_t opIndex;
uint32_t opCount;
uint32_t opIndex2[CC_FUSION_OP_MAX];
bool isFlowtable;
uint16_t *argsOffset;
uint32_t argsCount;
uint64_t genDataBaseAddr;
uint64_t genDataBaseSize;
uint64_t genWeightBaseAddr;
uint64_t genWeightBaseSize;
uint64_t genVariableBaseAddr;
uint64_t genVariableBaseSize;
uint64_t l2ctrlSize;
} ccOpContext;

typedef struct tagOpReadCount {
bool isEnable;
std::map<uint64_t, uint32_t> tensorRc;
} ccOpReadCount;

typedef enum tagTaskDownKernelIdMode {
CC_TASKDOWN_RESERVED = 0,
CC_TASKDOWN_ROIPOOLING,
CC_TASKDOWN_ROIPOOLING_PERF,
CC_TASKDOWN_ROIALIGN,
CC_TASKDOWN_ROIALIGN_PERF,
CC_TASKDOWN_FC,
CC_TASKDOWN_FC_COMPRESS,
CC_TASKDOWN_SOFTMAX_LOWEST,
CC_TASKDOWN_ROIALIGN_FP16,
CC_TASKDOWN_RESIZE_NEAREST_NEIGHBOR,
CC_TASKDOWN_RESIZE_NEAREST_NEIGHBOR_COMMON,
} ccTaskDownKernelIdMode_t;

ccStatus_t GetStream(ccHandle_t handle, rtStream_t *streamId);

ccStatus_t ccClearOpMap(ccHandle_t handle);

ccStatus_t ccSetKernelOpMap(ccHandle_t handle);

ccStatus_t ccSetKernelContext(ccHandle_t handle, uint32_t opId, AttrList &attrList, bool isFlowtable,
ccKernelType kernelType, void *pgraph);

ccStatus_t ccGetKernelContext(rtStream_t streamId, ccOpContext &opContext);

ccStatus_t ccGetKernelTypeByOpId(uint32_t opId, ccKernelType &kernelType);

ccStatus_t ccSetStreamL2Map(ccHandle_t handle, fusion::TaskL2InfoMap_t &l2AllocRes);

ccStatus_t ccGetStreamL2Map(rtStream_t streamId, uint32_t opIndex, fusion::TaskL2Info_t *&l2Data);

ccStatus_t ccSetOpIndex(ccHandle_t handle, uint32_t opIndex);

ccStatus_t ccGetOpIndex(ccHandle_t handle, uint32_t &opIndex);

ccStatus_t ccGetOpIndexByStream(rtStream_t streamId, uint32_t &opIndex);

ccStatus_t ccClearStreamL2Map(ccHandle_t handle);

ccStatus_t ccGetKernelReadCount(rtStream_t streamId, ccOpReadCount &rc);

} // namespace cce
#endif // TASKDOWN_COMMON_H_

Loading…
Cancel
Save