| Author | SHA1 | Message | Date |
|---|---|---|---|
|
|
008fafbb15
|
!2107 upgrade Ascend package 13 Jan 22
Merge pull request !2107 from yanghaoran/r1.6 |
3 years ago |
|
|
20354d0cfa | upgrade Ascend package 13 Jan 22 | 3 years ago |
|
|
2158c0a9b8
|
!2106 upgrade Ascend package 07 Jan 22
Merge pull request !2106 from yanghaoran/r1.6 |
3 years ago |
|
|
e0619959fb | upgrade Ascend package 07 Jan 22 | 3 years ago |
|
|
14e4920442 |
!2103 upgrade Ascend package 30 Dec 21
Merge pull request !2103 from yanghaoran/r1.6 |
3 years ago |
|
|
c888273bc7 | upgrade Ascend package 30 Dec 21 | 3 years ago |
|
|
1b80a4c045 |
!2102 upgrade Ascend package 23 Dec 21
Merge pull request !2102 from yanghaoran/r1.6 |
3 years ago |
|
|
82e6f4774f | upgrade Ascend package 23 Dec 21 | 3 years ago |
|
|
4740bb12af |
!2101 permanent fix of metadef conflict
Merge pull request !2101 from yanghaoran/r1.6 |
3 years ago |
|
|
b74d9ffd58 | permanent fix of metadef conflict | 3 years ago |
|
|
ea67886b3b |
!2100 tmporary fix of metadef conflict
Merge pull request !2100 from yanghaoran/r1.6 |
3 years ago |
|
|
d499a9989d | tmporary fix of metadef conflict | 3 years ago |
|
|
ca6cea7617 |
!2099 upgrade Ascend package 17 Dec 21
Merge pull request !2099 from yanghaoran/r1.6 |
3 years ago |
|
|
9868387c05 | upgrade Ascend package 16 Dec 21 | 3 years ago |
| @@ -134,6 +134,7 @@ static const int ACL_ERROR_DRV_FAILURE = 500004; | |||
| static const int ACL_ERROR_PROFILING_FAILURE = 500005; | |||
| #define ACL_TENSOR_SHAPE_RANGE_NUM 2 | |||
| #define ACL_TENSOR_VALUE_RANGE_NUM 2 | |||
| #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE | |||
| typedef enum { | |||
| @@ -336,6 +337,19 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); | |||
| ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, | |||
| int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief set value range for aclTensorDesc | |||
| * | |||
| * @param desc [OUT] pointer to the data of aclTensorDesc | |||
| * @param valueCount [IN] the number of value | |||
| * @param valueRange [IN] the range of value | |||
| * | |||
| * @retval ACL_SUCCESS The function is successfully executed. | |||
| * @retval OtherValues Failure | |||
| */ | |||
| ACL_FUNC_VISIBILITY aclError aclSetTensorValueRange(aclTensorDesc *desc, size_t valueCount, | |||
| int64_t valueRange[][ACL_TENSOR_VALUE_RANGE_NUM]); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief get data type specified by the tensor description | |||
| @@ -41,6 +41,8 @@ typedef enum { | |||
| typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag; | |||
| typedef struct aclGraphDumpOption aclGraphDumpOption; | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief compile op | |||
| @@ -114,6 +116,55 @@ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *val | |||
| */ | |||
| ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief generate graph and dump | |||
| * | |||
| * @param opType [IN] op type | |||
| * @param numInputs [IN] number of inputs | |||
| * @param inputDesc [IN] pointer to array of input tensor descriptions | |||
| * @param inputs [IN] pointer to array of input buffers | |||
| * @param numOutputs [IN] number of outputs | |||
| * @param outputDesc [IN] pointer to array of output tensor descriptions | |||
| * @param outputs [IN] pointer to array of outputs buffers | |||
| * @param attr [IN] pointer to instance of aclopAttr. | |||
| * may pass nullptr if the op has no attribute | |||
| * @param engineType [IN] engine type | |||
| * @param graphDumpPath [IN] dump path, if the suffix is ".txt", it means file path, else it means directory path | |||
| * @param graphDumpOpt [IN] dump option, nullptr is supported | |||
| * | |||
| * @retval ACL_SUCCESS The function is successfully executed. | |||
| * @retval OtherValues Failure | |||
| */ | |||
| ACL_FUNC_VISIBILITY aclError aclGenGraphAndDumpForOp( | |||
| const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], | |||
| int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, | |||
| aclopEngineType engineType, const char *graphDumpPath, const aclGraphDumpOption *graphDumpOpt); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief Create the graph dump option | |||
| * | |||
| * @retval null for failed | |||
| * @retval OtherValues success | |||
| * | |||
| * @see aclDestroyGraphDumpOpt | |||
| */ | |||
| ACL_FUNC_VISIBILITY aclGraphDumpOption *aclCreateGraphDumpOpt(); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief Destroy graph dump option | |||
| * | |||
| * @param graphDumpOpt [IN] pointer to the graph dump option | |||
| * | |||
| * @retval ACL_SUCCESS The function is successfully executed. | |||
| * @retval OtherValues Failure | |||
| * | |||
| * @see aclCreateGraphDumpOpt | |||
| */ | |||
| ACL_FUNC_VISIBILITY aclError aclDestroyGraphDumpOpt(const aclGraphDumpOption *graphDumpOpt); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -367,6 +367,61 @@ MSVP_PROF_API aclprofStepInfo *aclprofCreateStepInfo(); | |||
| */ | |||
| MSVP_PROF_API void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief create pointer to aclprofstamp | |||
| * | |||
| * | |||
| * @retval aclprofStamp pointer | |||
| */ | |||
| MSVP_PROF_API void *aclprofCreateStamp(); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief destory stamp pointer | |||
| * | |||
| * | |||
| * @retval void | |||
| */ | |||
| MSVP_PROF_API void aclprofDestroyStamp(void *stamp); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief Record push timestamp | |||
| * | |||
| * @retval ACL_SUCCESS The function is successfully executed. | |||
| * @retval OtherValues Failure | |||
| */ | |||
| MSVP_PROF_API aclError aclprofPush(void *stamp); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief Record pop timestamp | |||
| * | |||
| * | |||
| * @retval ACL_SUCCESS The function is successfully executed. | |||
| * @retval OtherValues Failure | |||
| */ | |||
| MSVP_PROF_API aclError aclprofPop(); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief Record range start timestamp | |||
| * | |||
| * @retval ACL_SUCCESS The function is successfully executed. | |||
| * @retval OtherValues Failure | |||
| */ | |||
| MSVP_PROF_API aclError aclprofRangeStart(void *stamp, uint32_t *rangeId); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief Record range end timestamp | |||
| * | |||
| * @retval ACL_SUCCESS The function is successfully executed. | |||
| * @retval OtherValues Failure | |||
| */ | |||
| MSVP_PROF_API aclError aclprofRangeStop(uint32_t rangeId); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -32,42 +32,43 @@ | |||
| #endif | |||
| #include <stddef.h> | |||
| #include <stdint.h> | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009; | |||
| static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011; | |||
| static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012; | |||
| static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013; | |||
| static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014; | |||
| static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015; | |||
| static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; | |||
| static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; | |||
| static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; | |||
| static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019; | |||
| static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020; | |||
| static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021; | |||
| static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022; | |||
| static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; | |||
| static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001; | |||
| static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; | |||
| static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006; | |||
| static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007; | |||
| static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008; | |||
| static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009; | |||
| static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009U; | |||
| static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011U; | |||
| static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012U; | |||
| static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013U; | |||
| static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014U; | |||
| static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015U; | |||
| static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016U; | |||
| static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017U; | |||
| static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018U; | |||
| static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019U; | |||
| static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020U; | |||
| static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021U; | |||
| static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022U; | |||
| static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000U; | |||
| static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001U; | |||
| static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000U; | |||
| static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005U; | |||
| static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006U; | |||
| static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007U; | |||
| static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008U; | |||
| static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009U; | |||
| #ifdef __cplusplus | |||
| } // namespace ge | |||
| @@ -44,6 +44,7 @@ static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callbac | |||
| static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
| static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
| static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
| static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | |||
| static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
| static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
| @@ -61,6 +62,7 @@ static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | |||
| static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | |||
| static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||
| static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||
| static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow | |||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
| @@ -99,6 +101,11 @@ static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // devic | |||
| static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
| static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
| static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
| static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
| static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||
| static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||
| static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||
| static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||
| static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
| static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
| @@ -107,5 +114,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconn | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||
| @@ -20,15 +20,28 @@ | |||
| #include <map> | |||
| #include <string> | |||
| #include "ge_error_codes.h" | |||
| #include "ge_api_types.h" | |||
| #include "graph/types.h" | |||
| namespace ge { | |||
| #ifdef __GNUC__ | |||
| #define ATTRIBUTED_DEPRECATED(replacement) __attribute__((deprecated("Please use " #replacement " instead."))) | |||
| #else | |||
| #define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead.")) | |||
| #endif | |||
| // Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit | |||
| #define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ | |||
| constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30U) | \ | |||
| (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28U) | \ | |||
| (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25U) | \ | |||
| (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17U) | \ | |||
| (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12U) | \ | |||
| (static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value))); \ | |||
| const ErrorNoRegisterar g_errorno_##name((name), (desc)); | |||
| #define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_errorno_##name((name), (desc)); | |||
| namespace ge { | |||
| class GE_FUNC_VISIBILITY StatusFactory { | |||
| public: | |||
| static StatusFactory *Instance() { | |||
| @@ -56,7 +69,7 @@ class GE_FUNC_VISIBILITY StatusFactory { | |||
| } | |||
| std::string GetErrDesc(const uint32_t err) { | |||
| const auto iter_find = err_desc_.find(err); | |||
| const std::map<uint32_t, std::string>::const_iterator iter_find = err_desc_.find(err); | |||
| if (iter_find == err_desc_.end()) { | |||
| return ""; | |||
| } | |||
| @@ -82,59 +95,10 @@ class GE_FUNC_VISIBILITY ErrorNoRegisterar { | |||
| ~ErrorNoRegisterar() {} | |||
| }; | |||
| // Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit | |||
| #define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ | |||
| constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30U) | \ | |||
| (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28U) | \ | |||
| (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25U) | \ | |||
| (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17U) | \ | |||
| (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12U) | \ | |||
| (static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value))); \ | |||
| const ErrorNoRegisterar g_##name##_errorno(name, desc); | |||
| #define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc); | |||
| using Status = uint32_t; | |||
| // General error code | |||
| GE_ERRORNO(0, 0, 0, 0, 0, SUCCESS, 0, "success"); | |||
| GE_ERRORNO(0b11, 0b11, 0b111, 0xFFU, 0b11111, FAILED, 0xFFFU, "failed"); /*lint !e401*/ | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PARAM_INVALID, "Parameter invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_NOT_INIT, "GE executor not initialized yet."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "Model id invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "Data size of model invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID, "Model addr invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Queue id of model invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "The model loaded repeatedly."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Dynamic input size invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "Dynamic batch size invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_BATCH_EMPTY, "AIPP batch parameter empty."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_NOT_EXIST, "AIPP parameter not exist."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_MODE_INVALID, "AIPP mode invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Task type invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Kernel type invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "Plugin path is invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_FORMAT_INVALID, "Format is invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_SHAPE_INVALID, "Shape is invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DATATYPE_INVALID, "Datatype is invalid."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_ALLOCATION, "Memory allocation error."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate memory."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_INTERNAL_ERROR, "Internal error."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_LOAD_MODEL, "Load model error."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED, "Failed to load model partition."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, "Failed to load weight partition."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "Failed to load task partition."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED, "Failed to load op kernel partition."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA, "Failed to release the model data."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_COMMAND_HANDLE, "Command handle error."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_GET_TENSOR_INFO, "Get tensor info error."); | |||
| GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_UNLOAD_MODEL, "Load model error."); | |||
| } // namespace ge | |||
| #endif // INC_EXTERNAL_GE_GE_API_ERROR_CODES_H_ | |||
| @@ -338,6 +338,9 @@ const std::string MODIFY_MIXLIST = "ge.exec.modify_mixlist"; | |||
| const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode"; | |||
| const std::string OP_WAIT_TIMEOUT = "ge.exec.opWaitTimeout"; | |||
| const std::string OP_EXECUTE_TIMEOUT = "ge.exec.opExecuteTimeout"; | |||
| const char *const FILE_CONSTANT_PATH = "ge.exec.value_bins"; | |||
| // Graph run mode | |||
| @@ -62,6 +62,7 @@ static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | |||
| static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | |||
| static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||
| static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||
| static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow | |||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
| @@ -113,5 +114,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconn | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||
| @@ -40,7 +40,7 @@ enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; | |||
| class GE_FUNC_VISIBILITY GeLog { | |||
| public: | |||
| static const uint64_t GetTid() { | |||
| static uint64_t GetTid() { | |||
| #ifdef __GNUC__ | |||
| const uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid)); | |||
| #else | |||
| @@ -56,11 +56,11 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { | |||
| return (enable == 1); | |||
| } | |||
| #define GELOGE(ERROR_CODE, fmt, ...) \ | |||
| do { \ | |||
| dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \ | |||
| ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
| ##__VA_ARGS__); \ | |||
| #define GELOGE(ERROR_CODE, fmt, ...) \ | |||
| do { \ | |||
| dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \ | |||
| ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
| ##__VA_ARGS__); \ | |||
| } while (false) | |||
| #define GELOGW(fmt, ...) \ | |||
| @@ -91,7 +91,7 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { | |||
| #define GELOGT(VALUE, fmt, ...) \ | |||
| do { \ | |||
| TraceStatus stat = VALUE; \ | |||
| TraceStatus stat = (VALUE); \ | |||
| const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ | |||
| const int32_t idx = static_cast<int32_t>(stat); \ | |||
| char_t *k = const_cast<char_t *>("status"); \ | |||
| @@ -102,7 +102,7 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { | |||
| #define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||
| do { \ | |||
| dlog_error(MOD_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \ | |||
| dlog_error((MOD_NAME), "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \ | |||
| ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
| ##__VA_ARGS__); \ | |||
| } while (false) | |||
| @@ -213,9 +213,9 @@ | |||
| // If expr is not RT_ERROR_NONE, print the log | |||
| #define GE_CHK_RT(expr) \ | |||
| do { \ | |||
| const rtError_t _rt_ret = (expr); \ | |||
| if (_rt_ret != RT_ERROR_NONE) { \ | |||
| GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ | |||
| const rtError_t _rt_err = (expr); \ | |||
| if (_rt_err != RT_ERROR_NONE) { \ | |||
| GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_err); \ | |||
| } \ | |||
| } while (false) | |||
| @@ -279,6 +279,7 @@ | |||
| } \ | |||
| } while (false) | |||
| namespace ge { | |||
| template <typename T> | |||
| GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { | |||
| std::string fmt; | |||
| @@ -287,5 +288,5 @@ GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { | |||
| fmt = st.str(); | |||
| return fmt; | |||
| } | |||
| } // namespace ge | |||
| #endif // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ | |||
| @@ -74,7 +74,7 @@ class GE_FUNC_VISIBILITY StatusFactory { | |||
| class GE_FUNC_VISIBILITY ErrorNoRegisterar { | |||
| public: | |||
| ErrorNoRegisterar(uint32_t err, const std::string &desc) { | |||
| ErrorNoRegisterar(const uint32_t err, const std::string &desc) { | |||
| StatusFactory::Instance()->RegisterErrorNo(err, desc); | |||
| } | |||
| ~ErrorNoRegisterar() {} | |||
| @@ -24,15 +24,15 @@ | |||
| namespace ge { | |||
| // System ID | |||
| enum SystemIdType { SYSID_GE = 8 }; | |||
| enum class SystemIdType { SYSID_GE = 8 }; | |||
| // Runtime location | |||
| enum LogRuntime { | |||
| enum class LogRuntime { | |||
| RT_HOST = 0b01, | |||
| RT_DEVICE = 0b10, | |||
| }; | |||
| // Sub model | |||
| enum SubModuleId { | |||
| enum class SubModuleId { | |||
| COMMON_MODULE = 0, | |||
| CLIENT_MODULE = 1, | |||
| INIT_MODULE = 2, | |||
| @@ -47,13 +47,13 @@ enum SubModuleId { | |||
| }; | |||
| // Error code type | |||
| enum ErrorCodeType { | |||
| enum class ErrorCodeType { | |||
| ERROR_CODE = 0b01, | |||
| EXCEPTION_CODE = 0b10, | |||
| }; | |||
| // Error level | |||
| enum ErrorLevel { | |||
| enum class ErrorLevel { | |||
| COMMON_LEVEL = 0b000, | |||
| SUGGESTION_LEVEL = 0b001, | |||
| MINOR_LEVEL = 0b010, | |||
| @@ -62,28 +62,39 @@ enum ErrorLevel { | |||
| }; | |||
| // Each module defines error codes using the following macros, name can not be modified to (name) | |||
| #define GE_ERRORNO_COMMON(name, value, desc) \ | |||
| GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_CLIENT(name, value, desc) \ | |||
| GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_INIT(name, value, desc) \ | |||
| GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_SESSION(name, value, desc) \ | |||
| GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_GRAPH(name, value, desc) \ | |||
| GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_ENGINE(name, value, desc) \ | |||
| GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_OPS(name, value, desc) \ | |||
| GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_PLUGIN(name, value, desc) \ | |||
| GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_RUNTIME(name, value, desc) \ | |||
| GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_EXECUTOR(name, value, desc) \ | |||
| GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_GENERATOR(name, value, desc) \ | |||
| GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_COMMON(name, value, desc) \ | |||
| GE_ERRORNO(LogRuntime::RT_HOST, ErrorCodeType::ERROR_CODE, ErrorLevel::COMMON_LEVEL, SystemIdType::SYSID_GE, \ | |||
| SubModuleId::COMMON_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_CLIENT(name, value, desc) \ | |||
| GE_ERRORNO(LogRuntime::RT_HOST, ErrorCodeType::ERROR_CODE, ErrorLevel::COMMON_LEVEL, SystemIdType::SYSID_GE, \ | |||
| SubModuleId::CLIENT_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_INIT(name, value, desc) \ | |||
| GE_ERRORNO(LogRuntime::RT_HOST, ErrorCodeType::ERROR_CODE, ErrorLevel::COMMON_LEVEL, SystemIdType::SYSID_GE, \ | |||
| SubModuleId::INIT_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_SESSION(name, value, desc) \ | |||
| GE_ERRORNO(LogRuntime::RT_HOST, ErrorCodeType::ERROR_CODE, ErrorLevel::COMMON_LEVEL, SystemIdType::SYSID_GE, \ | |||
| SubModuleId::SESSION_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_GRAPH(name, value, desc) \ | |||
| GE_ERRORNO(LogRuntime::RT_HOST, ErrorCodeType::ERROR_CODE, ErrorLevel::COMMON_LEVEL, SystemIdType::SYSID_GE, \ | |||
| SubModuleId::GRAPH_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_ENGINE(name, value, desc) \ | |||
| GE_ERRORNO(LogRuntime::RT_HOST, ErrorCodeType::ERROR_CODE, ErrorLevel::COMMON_LEVEL, SystemIdType::SYSID_GE, \ | |||
| SubModuleId::ENGINE_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_OPS(name, value, desc) \ | |||
| GE_ERRORNO(LogRuntime::RT_HOST, ErrorCodeType::ERROR_CODE, ErrorLevel::COMMON_LEVEL, SystemIdType::SYSID_GE, \ | |||
| SubModuleId::OPS_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_PLUGIN(name, value, desc) \ | |||
| GE_ERRORNO(LogRuntime::RT_HOST, ErrorCodeType::ERROR_CODE, ErrorLevel::COMMON_LEVEL, SystemIdType::SYSID_GE, \ | |||
| SubModuleId::PLUGIN_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_RUNTIME(name, value, desc) \ | |||
| GE_ERRORNO(LogRuntime::RT_HOST, ErrorCodeType::ERROR_CODE, ErrorLevel::COMMON_LEVEL, SystemIdType::SYSID_GE, \ | |||
| SubModuleId::RUNTIME_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_EXECUTOR(name, value, desc) \ | |||
| GE_ERRORNO(LogRuntime::RT_DEVICE, ErrorCodeType::ERROR_CODE, ErrorLevel::COMMON_LEVEL, SystemIdType::SYSID_GE, \ | |||
| SubModuleId::EXECUTOR_MODULE, name, (value), (desc)) | |||
| #define GE_ERRORNO_GENERATOR(name, value, desc) \ | |||
| GE_ERRORNO(LogRuntime::RT_HOST, ErrorCodeType::ERROR_CODE, ErrorLevel::COMMON_LEVEL, SystemIdType::SYSID_GE, \ | |||
| SubModuleId::GENERATOR_MODULE, name, (value), (desc)) | |||
| // Get error code description | |||
| #define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value) | |||
| @@ -40,8 +40,8 @@ enum FrameworkType { | |||
| CAFFE = 0, | |||
| MINDSPORE = 1, | |||
| TENSORFLOW = 3, | |||
| ANDROID_NN, | |||
| ONNX, | |||
| ANDROID_NN = 4, | |||
| ONNX = 5, | |||
| }; | |||
| enum class GraphStage : int64_t { GRAPH_STAGE_FUZZ = 0, GRAPH_STAGE_RESERVED }; | |||
| @@ -76,15 +76,16 @@ const char_t *const kLazyRecompile = "lazy_recompile"; | |||
| // Data cache, including data address and length | |||
| struct DataBuffer { | |||
| public: | |||
| void *data; // Data address | |||
| uint64_t length; // Data length | |||
| bool isDataSupportMemShare = false; | |||
| uint32_t placement = 0U; | |||
| DataBuffer(void *data_in, uint64_t data_len, bool is_support_mem_share, uint32_t placement = 0U) | |||
| DataBuffer(void *const data_in, const uint64_t data_len, const bool is_support_mem_share = false, | |||
| const uint32_t placement = 0U) | |||
| : data(data_in), length(data_len), isDataSupportMemShare(is_support_mem_share), placement(placement) {} | |||
| DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false) {} | |||
| DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false), placement(0U) {} | |||
| }; | |||
| /// | |||
| @@ -40,6 +40,7 @@ class GE_FUNC_VISIBILITY ModelHelper { | |||
| Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file); | |||
| Status LoadModel(const ge::ModelData &model_data); | |||
| Status LoadRootModel(const ge::ModelData &model_data); | |||
| static void SetModelToGeModel(GeModelPtr &ge_model, Model &model); | |||
| GeModelPtr GetGeModel(); | |||
| GeRootModelPtr GetGeRootModel(); | |||
| @@ -67,7 +68,6 @@ class GE_FUNC_VISIBILITY ModelHelper { | |||
| Status GenerateGeModel(OmFileLoadHelper &om_load_helper); | |||
| Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper); | |||
| Status LoadModelData(OmFileLoadHelper &om_load_helper); | |||
| void SetModelToGeModel(GeModelPtr &ge_model, Model &model) const; | |||
| Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | |||
| Status LoadWeights(OmFileLoadHelper &om_load_helper); | |||
| Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | |||
| @@ -21,25 +21,20 @@ | |||
| #include <vector> | |||
| #include "external/ge/ge_ir_build.h" | |||
| #include "framework/common/fmk_types.h" | |||
| #include "framework/common/types.h" | |||
| #include "framework/common/ge_types.h" | |||
| using ProcParam = struct PROC_PARAM; | |||
| using std::string; | |||
| using std::vector; | |||
| namespace ge { | |||
| struct ModelPartition { | |||
| ModelPartitionType type; | |||
| uint8_t *data = 0; | |||
| uint32_t size = 0; | |||
| uint8_t *data = nullptr; | |||
| uint32_t size = 0U; | |||
| }; | |||
| struct OmFileContext { | |||
| std::vector<ModelPartition> partition_datas_; | |||
| std::vector<char> partition_table_; | |||
| uint32_t model_data_len_ = 0; | |||
| std::vector<char_t> partition_table_; | |||
| uint32_t model_data_len_ = 0U; | |||
| }; | |||
| struct SaveParam { | |||
| @@ -55,13 +50,13 @@ class GE_FUNC_VISIBILITY OmFileLoadHelper { | |||
| public: | |||
| Status Init(const ge::ModelData &model); | |||
| Status Init(uint8_t *model_data, const uint32_t model_data_size); | |||
| Status Init(uint8_t *const model_data, const uint32_t model_data_size); | |||
| Status Init(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num); | |||
| Status Init(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num); | |||
| Status GetModelPartition(ModelPartitionType type, ModelPartition &partition); | |||
| Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition); | |||
| Status GetModelPartition(ModelPartitionType type, ModelPartition &partition, size_t model_index); | |||
| Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition, const size_t model_index); | |||
| OmFileContext context_; | |||
| @@ -70,9 +65,9 @@ class GE_FUNC_VISIBILITY OmFileLoadHelper { | |||
| private: | |||
| Status CheckModelValid(const ge::ModelData &model) const; | |||
| Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size); | |||
| Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size); | |||
| Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num); | |||
| Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num); | |||
| bool is_inited_{false}; | |||
| }; | |||
| @@ -89,16 +84,16 @@ class GE_FUNC_VISIBILITY OmFileSaveHelper { | |||
| ModelPartitionTable *GetPartitionTable(); | |||
| Status AddPartition(ModelPartition &partition); | |||
| Status AddPartition(const ModelPartition &partition); | |||
| Status AddPartition(ModelPartition &partition, size_t cur_index); | |||
| Status AddPartition(const ModelPartition &partition, const size_t cur_index); | |||
| const std::vector<ModelPartition> &GetModelPartitions() const; | |||
| Status SaveModel(const SaveParam &save_param, const char *target_file, ge::ModelBufferData &model, | |||
| bool is_offline = true); | |||
| Status SaveModel(const SaveParam &save_param, const char_t *const output_file, ge::ModelBufferData &model, | |||
| const bool is_offline = true); | |||
| Status SaveModelToFile(const char *output_file, ge::ModelBufferData &model, bool is_offline = true); | |||
| Status SaveModelToFile(const char_t *const output_file, ge::ModelBufferData &model, const bool is_offline = true); | |||
| std::vector<OmFileContext> model_contexts_; | |||
| @@ -28,8 +28,6 @@ | |||
| #include "framework/common/util.h" | |||
| #include "graph/compute_graph.h" | |||
| using std::vector; | |||
| namespace ge { | |||
| // Size of RC memory alignment, 2M | |||
| constexpr size_t ALIGN_SIZE = 2097152; | |||
| @@ -38,7 +36,7 @@ constexpr uint32_t RC_VALUE_DEFAULT = 1; | |||
| constexpr uint32_t RC_VALUE_MAX = 32; | |||
| // RC data type classification | |||
| enum RCType { | |||
| enum class RCType { | |||
| RC_DEFAULT, // Such as temporary workspace memory of operator, variable (including global and local variable) | |||
| RC_HCOM, // Output of gradient aggregation, RC value should be set to 0 | |||
| RC_L2LOSS, // Parameter of L2 loss operator, RC value should be set to 0 | |||
| @@ -49,7 +47,7 @@ enum RCType { | |||
| RC_ARGS // Args of FlowTable, actual access numbers | |||
| }; | |||
| enum MemType { INPUT_TENSOR, OUTPUT_TENSOR, WEIGHT, WORKSPACE }; | |||
| enum class MemType { INPUT_TENSOR, OUTPUT_TENSOR, WEIGHT, WORKSPACE }; | |||
| // Memory usage information < node, type, number > | |||
| struct NodeInfo { | |||
| @@ -104,8 +102,10 @@ class GE_FUNC_VISIBILITY L2CacheOptimize { | |||
| void HandOPoutput(ge::NodePtr node, std::vector<int64_t> &outputList, std::vector<RCMemoryBlock> &blocks); | |||
| // maximum common divisor | |||
| uint32_t Measure(uint32_t x, uint32_t y) { | |||
| if ((x == 0) || (y == 0)) return RC_VALUE_DEFAULT; | |||
| uint32_t Measure(uint32_t x, uint32_t y) const { | |||
| if ((x == 0) || (y == 0)) { | |||
| return RC_VALUE_DEFAULT; | |||
| } | |||
| uint32_t z = y; | |||
| while (x % y != 0) { | |||
| z = x % y; | |||
| @@ -34,143 +34,18 @@ | |||
| #include <google/protobuf/map.h> | |||
| #include <unordered_map> | |||
| #include <string> | |||
| #include "external/graph/types.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| #include "proto/om.pb.h" | |||
| using domi::AttrDef; | |||
| using domi::AttrDef_ListValue; | |||
| using domi::ModelDef; | |||
| using domi::NamedAttrs; | |||
| using domi::OpDef; | |||
| namespace ge { | |||
| using AttrDefMap = ::google::protobuf::Map<::std::string, ::domi::AttrDef>; | |||
| using AttrDefPair = ::google::protobuf::MapPair<std::string, domi::AttrDef>; | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, AttrDef &attr, OpDef *opdef); | |||
| // DEFINE_ADD_ATTR_VALUE | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, AttrDefMap *attrs); | |||
| // DEFINE_ADD_ATTR_VALUE | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpBytesAttr(const std::string &key, const void *value, size_t size, OpDef *opdef); | |||
| // DEFINE_ADD_ATTR_VALUE_LIST | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string value, AttrDefMap *attrs); | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const bool value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int64_t value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string &value, OpDef *opdef); | |||
| GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, std::string *value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int64_t *value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, uint32_t *value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, float *value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, double *value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, bool *value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, AttrDef_ListValue *value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, std::string value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int32_t value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int64_t value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, uint32_t value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, float value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, double value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, bool value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const OpDef *opdef); | |||
| GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const std::string &value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const char *value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttr(const char *key, const char *value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const uint32_t value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int32_t value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int64_t value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const float value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const double value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const bool value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const void *value, size_t size, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const AttrDef_ListValue &value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const double value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const float value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const uint32_t value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const int32_t value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const std::string &value, ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, std::string *value, const ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int32_t *value, const ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int64_t *value, const ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, uint32_t *value, const ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, float *value, const ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, double *value, const ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, bool *value, const ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, AttrDef_ListValue *value, const ModelDef *model_def); | |||
| GE_FUNC_VISIBILITY bool HasOpAttr(const OpDef *opdef, const std::string &attr_name); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const char *value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const uint32_t value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const int32_t value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const float value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const double value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const bool value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY void SetAttrList(const std::string &value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY void SetAttrList(const bool value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY void SetAttrList(const float value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY void SetAttrList(const double value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY void SetAttrList(const uint32_t value, AttrDef *out); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, std::string *value, const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int32_t *value, const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int64_t *value, const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, uint32_t *value, const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, float *value, const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, double *value, const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, bool *value, const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, AttrDef_ListValue *value, const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, int32_t *value, | |||
| const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, uint32_t *value, | |||
| const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, float *value, const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, double *value, const AttrDefMap &attr); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, domi::AttrDef *const out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const char *value, domi::AttrDef *const out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const uint32_t value, domi::AttrDef *const out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const int32_t value, domi::AttrDef *const out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const int64_t value, domi::AttrDef *const out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const float32_t value, domi::AttrDef *const out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const float64_t value, domi::AttrDef *const out); | |||
| GE_FUNC_VISIBILITY void SetAttrDef(const bool value, domi::AttrDef *const out); | |||
| } // namespace ge | |||
| #endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ | |||
| #endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ | |||
| @@ -31,7 +31,6 @@ | |||
| #include "proto/insert_op.pb.h" | |||
| namespace ge { | |||
| using domi::Status; | |||
| // Add Sub Mul | |||
| GE_FUNC_VISIBILITY extern const uint32_t ADD_INPUT_NUM; | |||
| @@ -55,8 +54,8 @@ GE_FUNC_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT; | |||
| GE_FUNC_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT; | |||
| // Merge | |||
| GE_FUNC_VISIBILITY extern const uint32_t MERGE_DATA_OUTPUT; | |||
| GE_FUNC_VISIBILITY extern const uint32_t MERGE_INDEX_OUTPUT; | |||
| GE_FUNC_VISIBILITY extern const int32_t MERGE_DATA_OUTPUT; | |||
| GE_FUNC_VISIBILITY extern const int32_t MERGE_INDEX_OUTPUT; | |||
| // FunctionOp | |||
| GE_FUNC_VISIBILITY extern const uint32_t IF_COND_INPUT; | |||
| @@ -66,7 +65,7 @@ GE_FUNC_VISIBILITY extern const uint32_t FOR_DELTA_INPUT; | |||
| GE_FUNC_VISIBILITY extern const uint32_t FOR_DATA_INPUT; | |||
| GE_FUNC_VISIBILITY extern const int32_t NORMAL_TENSOR_SIZE; | |||
| /*lint -e148*/ | |||
| class GE_FUNC_VISIBILITY OpUtils { | |||
| public: | |||
| /// | |||
| @@ -95,8 +94,8 @@ class GE_FUNC_VISIBILITY OpUtils { | |||
| /// @param [out] aipp_params aipp parameters | |||
| /// @return enum of tagCCAippInputFormat | |||
| /// | |||
| static Status ConvertAippParams(const NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params); | |||
| static Status TransferDim(const std::vector<int64_t> &dim, std::vector<int64_t> &dim_vector); | |||
| static Status ConvertAippParams(const NamedAttrs &aipp_attr, domi::AippOpParams &aipp_params); | |||
| template <typename T> | |||
| static void SliceData(const std::vector<char *> &input, int64_t chunk_size, std::vector<char *> &output, | |||
| int64_t begin, int64_t out_dim, int64_t stride); | |||
| @@ -107,45 +106,13 @@ class GE_FUNC_VISIBILITY OpUtils { | |||
| static Status SetOutputSliceDataByDataType(void *data, int64_t data_size, const std::vector<int64_t> &input_dims, | |||
| const std::vector<int64_t> &begin, const std::vector<int64_t> &output_dims, | |||
| ge::GeTensor *output, const std::vector<int64_t> &stride); | |||
| static Status SetOutputSliceData(void *data, int64_t data_size, int32_t data_type, | |||
| static Status SetOutputSliceData(void *const data, const int64_t data_size, const int32_t data_type, | |||
| const std::vector<int64_t> &input_dims, const std::vector<int64_t> &begin, | |||
| const std::vector<int64_t> &output_dims, ge::GeTensor *const output, | |||
| const std::vector<int64_t> &output_dims, GeTensor *const output, | |||
| const std::vector<int64_t> &stride); | |||
| /// | |||
| /// @ingroup domi_omg | |||
| /// @brief Convert the convolutional weight data from [h, w, c, k] to [k, c, h, w] | |||
| /// @param [in] input Weight data in HWCK format | |||
| /// @param [in] H value of H dimension | |||
| /// @param [in] W value of W dimension | |||
| /// @param [in] C value of C dimension | |||
| /// @param [in] K value of K dimension | |||
| /// @param [out] output Data pointer after conversion. The format is KCHW. | |||
| /// | |||
| static void TransDataHWCK2KCHW(const void *input, int64_t h, int64_t w, int64_t c, int64_t k, void **output); | |||
| /// | |||
| /// @ingroup domi_omg | |||
| /// @brief Converts the convolutional weight data from [k, c, h, w] to [h, w, c, k]. | |||
| /// @param [in] input Weight data in HWCK format | |||
| /// @param [in] K value of K dimension | |||
| /// @param [in] C value of C dimension | |||
| /// @param [in] H value of H dimension | |||
| /// @param [in] W value of W dimension | |||
| /// @param [out] output Data pointer after conversion. The format is HWCK | |||
| /// | |||
| static void TransDataKCHW2HWCK(const void *input, int64_t k, int64_t c, int64_t h, int64_t w, void *output); | |||
| static std::vector<ConstGeTensorPtr> GetWeights(const ge::Node &node); | |||
| static std::vector<ConstGeTensorPtr> GetWeights(ge::ConstNodePtr node); | |||
| static std::vector<GeTensorPtr> MutableWeights(const ge::Node &node); | |||
| static std::vector<GeTensorPtr> MutableWeights(const ge::NodePtr node); | |||
| static Status SetWeights(ge::Node &node, const std::vector<ge::GeTensorPtr> &weights); | |||
| static Status SetWeights(const ge::NodePtr node, const std::vector<ge::GeTensorPtr> &weights); | |||
| static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, const DataType type, | |||
| std::vector<int64_t> &dims); | |||
| private: | |||
| static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc); | |||
| }; | |||
| /*lint +e148*/ | |||
| } // namespace ge | |||
| #endif // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ | |||
| @@ -20,6 +20,8 @@ | |||
| #include <set> | |||
| #include <string> | |||
| #include "graph/types.h" | |||
| namespace ge { | |||
| class GE_FUNC_VISIBILITY OpTypeContainer { | |||
| public: | |||
| @@ -34,8 +36,7 @@ class GE_FUNC_VISIBILITY OpTypeContainer { | |||
| } | |||
| bool IsExisting(const std::string &op_type) { | |||
| auto iter_find = op_type_list_.find(op_type); | |||
| return iter_find != op_type_list_.end(); | |||
| return op_type_list_.find(op_type) != op_type_list_.end(); | |||
| } | |||
| protected: | |||
| @@ -47,17 +48,17 @@ class GE_FUNC_VISIBILITY OpTypeContainer { | |||
| class GE_FUNC_VISIBILITY OpTypeRegistrar { | |||
| public: | |||
| explicit OpTypeRegistrar(const std::string &op_type) { | |||
| explicit OpTypeRegistrar(const std::string &op_type) noexcept { | |||
| OpTypeContainer::Instance()->Register(op_type); | |||
| } | |||
| ~OpTypeRegistrar() {} | |||
| }; | |||
| #define REGISTER_OPTYPE_DECLARE(var_name, str_name) \ | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *var_name; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char_t *var_name; | |||
| #define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ | |||
| const char *var_name = str_name; \ | |||
| const char_t *var_name = str_name; \ | |||
| const OpTypeRegistrar g_##var_name##_reg(str_name); | |||
| #define IS_OPTYPE_EXISTING(str_name) (OpTypeContainer::Instance()->IsExisting(str_name)) | |||
| @@ -24,7 +24,7 @@ | |||
| /// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading | |||
| /// @return Status result | |||
| /// | |||
| GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream); | |||
| GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t const stream); | |||
| GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id); | |||
| @@ -0,0 +1,146 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AIR_CXX_PROFILING_DEFINITIONS_H | |||
| #define AIR_CXX_PROFILING_DEFINITIONS_H | |||
| #include <string> | |||
| #include <iostream> | |||
| #include <atomic> | |||
| #include <mutex> | |||
| #include <unordered_map> | |||
| #include "graph/profiler.h" | |||
| #include "external/ge/ge_api_types.h" | |||
| #include "toolchain/prof_callback.h" | |||
| namespace ge { | |||
| namespace profiling { | |||
| enum { | |||
| kInferShape, | |||
| kTiling, | |||
| kUpdateShape, | |||
| kConstPrepare, | |||
| kInitHybridExecuteArgs, | |||
| kInitInferShapeContext, | |||
| kDestroyInferShapeContext, | |||
| kResetSubgraphExecutor, | |||
| kCommitInferShapeTask, | |||
| kDeviceToHost, | |||
| kPrepareTask, | |||
| kLaunchTask, | |||
| kCommitTilingTask, | |||
| kAtomic, | |||
| kKernelLaunchPrepare, | |||
| kRtKernelLaunch, | |||
| kOpExecute, | |||
| kAllocMem, | |||
| kCopyH2D, | |||
| kProfilingIndexEnd | |||
| }; | |||
| constexpr uint64_t kInvalidHashId = 0ULL; | |||
| class ProfilingContext { | |||
| public: | |||
| static bool IsDumpToStdEnabled(); | |||
| static ProfilingContext &GetInstance(); | |||
| ProfilingContext(); | |||
| ~ProfilingContext(); | |||
| /* | |||
| * 还有一种思路是`IsEnabled`只判断profiler_是否为空指针,不再设置单独的enabled标记位,这样可以少一个标记位。 | |||
| * 但是这么做就意味着,profiler_实例在未使能profiling时,必须是空指针状态。 | |||
| * 为了性能考虑,profiling机制在编译和加载时,就会调用`RegisterString`,向profiler_注册字符串,后续执行时,只会使用注册好的index了。 | |||
| * 因此存在一种场景:编译时并未使能profiling(因为编译时间很长,使能profiling也无法真实反应执行时的耗时状态), | |||
| * 因此编译时注册字符串的动作并没有生效。在执行时,动态的打开了profiling,这种场景下,执行时无法拿到注册后字符串 | |||
| */ | |||
| bool IsEnabled() const noexcept { | |||
| return enabled_ && profiler_ != nullptr; | |||
| } | |||
| void SetEnable() noexcept { | |||
| enabled_ = true; | |||
| } | |||
| void SetDisable() noexcept { | |||
| enabled_ = false; | |||
| } | |||
| void RecordCurrentThread(int64_t element, int64_t event, EventType et) { | |||
| if (IsEnabled()) { | |||
| profiler_->RecordCurrentThread(element, event, et); | |||
| } | |||
| } | |||
| const Profiler *GetProfiler() const { | |||
| return profiler_.get(); | |||
| } | |||
| void Dump(std::ostream &out_stream) const { | |||
| if (IsEnabled()) { | |||
| profiler_->Dump(out_stream); | |||
| } else { | |||
| out_stream << "Profiling not enable, skip to dump" << std::endl; | |||
| } | |||
| } | |||
| void DumpToStdOut() const { | |||
| Dump(std::cout); | |||
| } | |||
| void Reset() { | |||
| if (IsEnabled()) { | |||
| profiler_->Reset(); | |||
| } | |||
| } | |||
| int64_t RegisterString(const std::string &str); | |||
| int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str); | |||
| void UpdateElementHashId(const MsprofReporterCallback reporter_callback); | |||
| static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str, | |||
| uint64_t &hash_id); | |||
| size_t GetRegisterStringNum() const { | |||
| return strings_to_index_.size(); | |||
| } | |||
| private: | |||
| void UpdateHashByStr(const std::string &str, const uint64_t hash); | |||
| void Init(); | |||
| private: | |||
| bool enabled_; | |||
| int64_t str_index_; | |||
| std::unordered_map<std::string, int64_t> strings_to_index_; | |||
| std::mutex strings_to_index_mutex_; | |||
| std::unique_ptr<Profiler> profiler_; | |||
| }; | |||
| class ScopeProfiler { | |||
| public: | |||
| ScopeProfiler(int64_t element, int64_t event) : element_(element), event_(event) { | |||
| ProfilingContext::GetInstance().RecordCurrentThread(element_, event, EventType::kEventStart); | |||
| } | |||
| ~ScopeProfiler() { | |||
| ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, EventType::kEventEnd); | |||
| } | |||
| private: | |||
| int64_t element_; | |||
| int64_t event_; | |||
| }; | |||
| } // namespace profiling | |||
| } // namespace ge | |||
| #define PROFILING_START(element, event) \ | |||
| profiling::ProfilingContext::GetInstance().RecordCurrentThread(element, event, profiling::EventType::kEventStart) | |||
| #define PROFILING_END(element, event) \ | |||
| profiling::ProfilingContext::GetInstance().RecordCurrentThread(element, event, profiling::EventType::kEventEnd) | |||
| #define PROFILING_SCOPE(element, event) profiling::ScopeProfiler profiler(element, event) | |||
| #endif // AIR_CXX_PROFILING_DEFINITIONS_H | |||
| @@ -25,9 +25,9 @@ | |||
| /// MAKE_GUARD([&] { Release Resource 1 }) | |||
| /// Acquire Resource 2 | |||
| // MAKE_GUARD([&] { Release Resource 2 }) | |||
| #define GE_MAKE_GUARD(var, callback) const ScopeGuard const_guard_##var(callback) | |||
| #define GE_MAKE_GUARD(var, callback) const ::ge::ScopeGuard const_guard_##var(callback) | |||
| #define GE_DISMISSABLE_GUARD(var, callback) ScopeGuard make_guard_##var(callback) | |||
| #define GE_DISMISSABLE_GUARD(var, callback) ::ge::ScopeGuard make_guard_##var(callback) | |||
| #define GE_DISMISS_GUARD(var) make_guard_##var.Dismiss() | |||
| namespace ge { | |||
| @@ -23,7 +23,7 @@ namespace ge { | |||
| const int32_t CC_FUSION_OP_MAX = 32; | |||
| typedef enum tagCcStatus { | |||
| enum class ccStatus_t { | |||
| CC_STATUS_SUCCESS = 0, /**< succ */ | |||
| CC_STATUS_NOT_INITIALIZED = 1, /**< not init */ | |||
| CC_STATUS_ALLOC_FAILED = 2, /**< alloc mem failed */ | |||
| @@ -34,9 +34,9 @@ typedef enum tagCcStatus { | |||
| CC_STATUS_NOT_SUPPORTED = 7, /**< unsupport error */ | |||
| CC_STATUS_INVALID_VALUE = 7, /**< invalid value error for blas*/ | |||
| CC_STATUS_RESERVED /**< just for check */ | |||
| } ccStatus_t; | |||
| }; | |||
| typedef enum tagccKernelType { | |||
| enum class ccKernelType { | |||
| CCE_AI_CORE = 0, /* cce aicore */ | |||
| CCE_AI_CPU = 1, /* cce aicpu */ | |||
| TE = 2, /* te operator*/ | |||
| @@ -47,9 +47,9 @@ typedef enum tagccKernelType { | |||
| CUST_AI_CPU = 7, /* custom aicpu*/ | |||
| HOST_CPU = 8, /* host cpu */ | |||
| INVALID = 10000 /* unknown kernel type */ | |||
| } ccKernelType; | |||
| }; | |||
| typedef struct tagOpContext { | |||
| using ccOpContext = struct tagOpContext { | |||
| ccKernelType kernelType; | |||
| uint32_t opId; | |||
| uint32_t kernelFuncId; | |||
| @@ -66,7 +66,7 @@ typedef struct tagOpContext { | |||
| uint64_t genVariableBaseAddr; | |||
| uint64_t genVariableBaseSize; | |||
| uint64_t l2ctrlSize; | |||
| } ccOpContext; | |||
| }; | |||
| } // namespace ge | |||
| #endif // INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_ | |||
| @@ -19,7 +19,6 @@ | |||
| #include <climits> | |||
| #include <cstdint> | |||
| #include <algorithm> | |||
| #include <map> | |||
| #include <memory> | |||
| #include <string> | |||
| @@ -53,23 +52,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_FUSION_MODEL_DEF; | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE; // Max size of 8 GB. | |||
| template <typename K, typename V> | |||
| static std::pair<V, K> flip_pair(const std::pair<K, V> &p) { | |||
| return std::pair<V, K>(p.second, p.first); | |||
| } | |||
| template <typename K, typename V> | |||
| static std::map<V, K> flip_map(std::map<K, V> src) { | |||
| std::map<V, K> dst; | |||
| std::transform(src.begin(), src.end(), std::inserter(dst, dst.begin()), flip_pair<K, V>); | |||
| return dst; | |||
| } | |||
| REGISTER_OPTYPE_DECLARE(DATA, "Data"); | |||
| REGISTER_OPTYPE_DECLARE(AIPPDATA, "AippData"); | |||
| REGISTER_OPTYPE_DECLARE(QUEUE_DATA, "QueueData"); | |||
| REGISTER_OPTYPE_DECLARE(CONVOLUTION, "Convolution"); | |||
| REGISTER_OPTYPE_DECLARE(CORRELATION, "Correlation"); | |||
| REGISTER_OPTYPE_DECLARE(CORRELATIONV2, "Correlation_V2"); | |||
| @@ -516,30 +503,6 @@ REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims"); | |||
| // profiling training trace node | |||
| REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace"); | |||
| enum InputMode { INPUT = 0, CONST_INPUT }; | |||
| // Definition of the processing status enum of the process module | |||
| enum ModelProcessState { | |||
| INIT_STATE = 0, // init status | |||
| WAIT_EVENT_STATE, // Wait for the event status | |||
| IND_RSLT_STATE, // The model execution result is being output to the high level | |||
| STOPPED_STATE, // Model execution completed. The model enters this state after Model Manager::Stop | |||
| RESERVED_STATE, // reserved | |||
| }; | |||
| // Indicates the enun definition of the execution mode of the access module | |||
| enum SysMode { | |||
| INFERENCE = 0, // Normal, that is, Inference mode | |||
| DEBUG, // Debug mode | |||
| TIME, // Model execution time mode, including the execution time of each OP | |||
| STOP, // STOP mode | |||
| RESET, // RESET mode | |||
| PERFORMANCE, // Impact of enabling the performance model: 1. The input data of the model is considered ready and does | |||
| // not need to be converted | |||
| ANDROID_DEBUG, // Exports Android platform computing data | |||
| RESERVED, // reserved | |||
| }; | |||
| // @brief encryption type of the model file | |||
| enum ModelEncryptType { | |||
| UNENCRYPTED, // not encrypted | |||
| @@ -577,22 +540,22 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FIL | |||
| /// | |||
| /// @brief model name length | |||
| /// | |||
| static constexpr uint32_t MODEL_NAME_LENGTH = 32; | |||
| constexpr uint32_t MODEL_NAME_LENGTH = 32U; | |||
| /// | |||
| /// @brief length of user-defined information | |||
| /// | |||
| static constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32; | |||
| constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32U; | |||
| /// | |||
| /// @brief length of the model file signature | |||
| /// | |||
| static constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64; | |||
| constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64U; | |||
| /// | |||
| /// @brief length of the reserved field in the model file header | |||
| /// | |||
| static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75; | |||
| constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75U; | |||
| // DATA node type | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DATA_TYPE; | |||
| @@ -617,7 +580,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYP | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_CONF_DELIMITER; | |||
| // dim default size value | |||
| static const int32_t DIM_DEFAULT_SIZE = 4; | |||
| constexpr int32_t DIM_DEFAULT_SIZE = 4; | |||
| // dim extension default value | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t DIM_DEFAULT_VALUE; | |||
| @@ -650,34 +613,35 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STREAM_SW | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_GLOBAL_STEP; | |||
| static const uint32_t PLATFORM_VERSION_LEN = 20; | |||
| constexpr uint32_t PLATFORM_VERSION_LEN = 20U; | |||
| // Definition of the file header of the model file | |||
| struct ModelFileHeader { | |||
| uint32_t magic = MODEL_FILE_MAGIC_NUM; // magic number of DOMI | |||
| uint32_t headsize = MODEL_FILE_HEAD_LEN; // length of the model header. The value is fixed at 256 | |||
| uint32_t version = MODEL_VERSION; // version 1.0 | |||
| uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0}; // signature | |||
| uint32_t length = 0; // Ciphertext length. In the non-encryption model, the length is the plaintext length. | |||
| uint8_t is_encrypt = ModelEncryptType::UNENCRYPTED; // whether encrypted 0:not encrypt, 1:encrypt | |||
| uint8_t is_checksum = ModelCheckType::CHECK; // whether to check the checksum | |||
| uint8_t modeltype = 0; // 0:IR model 1:standard model 2: OM Tiny model | |||
| uint8_t genmode = 0; // 0:offline generate 1:online generate | |||
| uint8_t name[MODEL_NAME_LENGTH] = {0}; // Model name, which contains 32 characters | |||
| uint32_t ops = 0; // Computing power (Kops) | |||
| uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0}; // User-defined information. The value contains 32 characters | |||
| uint32_t om_ir_version = 0; | |||
| uint32_t model_num = 0; | |||
| uint8_t platform_version[PLATFORM_VERSION_LEN] = {0}; | |||
| uint8_t platform_type = {0}; | |||
| uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0}; // Reserved field 75 | |||
| uint32_t magic = MODEL_FILE_MAGIC_NUM; // magic number of DOMI | |||
| uint32_t headsize = MODEL_FILE_HEAD_LEN; // length of the model header. The value is fixed at 256 | |||
| uint32_t version = MODEL_VERSION; // version 1.0 | |||
| uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0U}; // signature | |||
| uint32_t length = 0U; // Ciphertext length. In the non-encryption model, the length is the plaintext length. | |||
| uint8_t is_encrypt = | |||
| static_cast<uint8_t>(ModelEncryptType::UNENCRYPTED); // whether encrypted 0:not encrypt, 1:encrypt | |||
| uint8_t is_checksum = static_cast<uint8_t>(ModelCheckType::CHECK); // whether to check the checksum | |||
| uint8_t modeltype = 0U; // 0:IR model 1:standard model 2: OM Tiny model | |||
| uint8_t genmode = 0U; // 0:offline generate 1:online generate | |||
| uint8_t name[MODEL_NAME_LENGTH] = {0U}; // Model name, which contains 32 characters | |||
| uint32_t ops = 0U; // Computing power (Kops) | |||
| uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0U}; // User-defined information. The value contains 32 characters | |||
| uint32_t om_ir_version = 0U; | |||
| uint32_t model_num = 0U; | |||
| uint8_t platform_version[PLATFORM_VERSION_LEN] = {0U}; | |||
| uint8_t platform_type = {0U}; | |||
| uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0U}; // Reserved field 75 | |||
| }; | |||
| static constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0; | |||
| static constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1; | |||
| constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0U; | |||
| constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1U; | |||
| // number of partitions in the current model | |||
| static constexpr uint32_t PARTITION_SIZE = 5; | |||
| constexpr uint32_t PARTITION_SIZE = 5U; | |||
| enum ModelPartitionType { MODEL_DEF = 0, WEIGHTS_DATA, TASK_INFO, TBE_KERNELS, CUST_AICPU_KERNELS }; | |||
| @@ -694,20 +658,6 @@ struct ModelPartitionTable { | |||
| #define SIZE_OF_MODEL_PARTITION_TABLE(table) (sizeof(ModelPartitionTable) + sizeof(ModelPartitionMemInfo) * (table).num) | |||
| // Filter format | |||
| typedef enum tagDomiFilterFormat { | |||
| DOMI_FILTER_KCHW, // KCHW | |||
| DOMI_FILTER_HWCK, // HWCK | |||
| DOMI_FILTER_RESERVED | |||
| } domiFilterFormat_t; | |||
| // Const data trans type | |||
| typedef enum tagDomiConstDataTransType { | |||
| DOMI_CONST_DATA_NOT_CHANGE = 0, // No action is required | |||
| DOMI_CONST_DATA_TRANS_MATMUL, // The const input to MatMul and needs to be transposed | |||
| DOMI_CONST_DATA_RESERVED | |||
| } domiConstDataTransType_t; | |||
| // mode of activation | |||
| typedef enum tagDomiActivationMode { | |||
| DOMI_ACTIVATION_SIGMOID = 0, // sigmoid | |||
| @@ -727,170 +677,6 @@ typedef enum tagDomiActivationMode { | |||
| DOMI_ACTIVATION_RESERVED | |||
| } domiActivationMode_t; | |||
| // mode of batchnorm | |||
| typedef enum tagDomiBatchNormMode { | |||
| DOMI_BATCHNORM_PER_ACTIVATION = 0, // bnScale, bnBias tensor dims are 1xCxHxW | |||
| DOMI_BATCHNORM_SPATIAL, // bnScale, bnBias tensor dims are 1xCx1x1 | |||
| DOMI_BATCHNORM_RESERVED | |||
| } domiBatchNormMode_t; | |||
| // eltwise mode | |||
| typedef enum tagDomiEltwiseMode { | |||
| DOMI_ELTWISE_PROD = 0, // prod | |||
| DOMI_ELTWISE_SUM, // sum | |||
| DOMI_ELTWISE_MAX, // max | |||
| DOMI_ELTWISE_RESERVED | |||
| } domiEltwiseMode_t; | |||
| // mode of padding | |||
| typedef enum tagDomiPaddingMode { | |||
| DOMI_PADDING_CEIL = 0, // Default padding mode | |||
| DOMI_PADDING_DIRECTASSIGN, // Default padding mode: NOTSET | |||
| DOMI_PADDING_VALID, // VALID padding mode | |||
| DOMI_PADDING_SAME, // Padding values of 0 are always used | |||
| DOMI_PADDING_CEIL_NEW, // Padding values of 0 are always used | |||
| DOMI_PADDING_VALID_NEW, // Padding values of 0 are always used | |||
| DOMI_PADDING_SAME_NEW, // Padding values of 0 are always used | |||
| DOMI_PADDING_RESERVED | |||
| } domiPaddingMode_t; | |||
| // algorithm of convolution forward | |||
| typedef enum tagDomiConvolutionFwdAlgo { | |||
| DOMI_CONVOLUTION_FWD_ALGO_GEMM = 0, // matrix gemm algo | |||
| DOMI_CONVOLUTION_FWD_ALGO_WINOGRAD, // Winograd Transform algo | |||
| DOMI_CONVOLUTION_FWD_ALGO_GEMM_ACCU_FLOAT32, // accumulate in L0c with FP32 | |||
| DOMI_CONVOLUTION_FWD_ALGO_RESERVED | |||
| } domiConvolutionFwdAlgo_t; | |||
| typedef enum tagDomiFullConnectFwdAlgo { | |||
| DOMI_FULLCONNECT_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16 | |||
| DOMI_FULLCONNECT_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32 | |||
| } domiFullConnectFwdAlgo_t; | |||
| typedef enum tagDomiPooingFwdAlgo { | |||
| DOMI_POOLING_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16 | |||
| DOMI_POOLING_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32 | |||
| } domiPooingFwdAlgo_t; | |||
| // mode of convolution | |||
| typedef enum tagDomiConvolutionMode { | |||
| DOMI_CONV_CONVOLUTION = 0, // math convolution | |||
| DOMI_CONV_CROSS_CORRELATION, // cross-correlation convolution | |||
| DOMI_CONV_DECONVOLUTION, // deconvolution, also named transposed convolution | |||
| DOMI_CONV_MODE_DEPTHWISE, // depthwise convolution | |||
| DOMI_CONV_MODE_RESERVED | |||
| } domiConvolutionMode_t; | |||
| // softmax mode | |||
| typedef enum tagDomiSoftmaxMode { | |||
| DOMI_SOFTMAX_MODE_INSTANCE = 0, // compute the softmax over all C, H, W for each N | |||
| DOMI_SOFTMAX_MODE_CHANNEL, // compute the softmax over all C for each H, W, N | |||
| DOMI_SOFTMAX_MODE_HEIGHT, // compute the softmax over all H for each N, C, W | |||
| DOMI_SOFTMAX_MODE_WIDTH, // compute the softmax over all W for each N, C, H | |||
| DOMI_SOFTMAX_MODE_RESERVED | |||
| } domiSoftmaxMode_t; | |||
| // softmax algorithm | |||
| typedef enum tagDomiSoftmaxAlgo { | |||
| DOMI_SOFTMAX_FAST = 0, // straightforward implementation | |||
| DOMI_SOFTMAX_ACCURATE, // subtract max from every point to avoid overflow | |||
| DOMI_SOFTMAX_LOG, // perform the Log softmax operation to avoid overflow | |||
| DOMI_SOFTMAX_ACCURATE_FP32, | |||
| DOMI_SOFTMAX_RESERVED | |||
| } domiSoftmaxAlgo_t; | |||
| // algorithm of convolution backward | |||
| typedef enum tagDomiConvolutionBwdAlgo { | |||
| DOMI_CONVOLUTION_BWD_ALGO_GEMM = 0, // matrix gemm algo | |||
| DOMI_CONVOLUTION_BWD_ALGO_WINOGRAD, // Winograd Transform algo | |||
| DOMI_CONVOLUTION_BWD_ALGO_RESERVED | |||
| } domiConvolutionBwdAlgo_t; | |||
| // mode of pooling | |||
| typedef enum tagDomiPoolingMode { | |||
| DOMI_POOLING_MAX = 0, // max pooling | |||
| DOMI_POOLING_AVG, // average pooling | |||
| DOMI_POOLING_L2, // L2 pooling | |||
| DOMI_POOLING_RESERVED | |||
| } domiPoolingMode_t; | |||
| // propagate Nan | |||
| typedef enum tagDomiNanPropagation { | |||
| DOMI_NAN_NOT_PROPAGATE = 0, // Nan numbers are not propagated | |||
| DOMI_NAN_PROPAGATE, // Nan numbers are propagated | |||
| DOMI_NAN_PROPAGATE_RESERVED | |||
| } domiNanPropagation_t; | |||
| // mode of cropandresize | |||
| typedef enum tagDomiCropAndResizeMode { | |||
| DOMI_RESIZE_METHOD_BILINEAR = 0, // resize bilinear | |||
| DOMI_RESIZE_METHOD_NEAREST, // resize nearest | |||
| DOMI_RESIZE_RESERVED | |||
| } domiCropAndResizeMode_t; | |||
| // yolo version | |||
| typedef enum tagDomiYoloVersion { DOMI_YOLO_V2 = 1, DOMI_YOLO_V3, DOMI_YOLO_TRSERVED } domiYoloVersion_t; | |||
| typedef enum tagDomiRNNScopePassType { | |||
| DOMI_STATIC_BIDIRECTIONAL_RNN_GENERAL_PASS = 0, | |||
| DOMI_DYNAMIC_BIDIRECTIONAL_RNN_GENERAL_PASS, | |||
| DOMI_DYNAMIC_BIDIRECTIONAL_RNN_BIDAF_PASS | |||
| } domiRNNScopePassType; | |||
| // RNNDataLayout | |||
| typedef enum tagDomiRNNDataLayout { | |||
| DOMI_RNN_ND_TBX = 0, // data[max_time,batch_size,Xt] | |||
| DOMI_RNN_ND_BTX, // data[batch_size,max_time,Xt] | |||
| DOMI_RNN_5D_TX1BX, // data[max_time,Xt,1,batch_size,Xt] | |||
| DOMI_RNN_5D_BX1TX, // dataa[batch_size,Xt,1,max_time,Xt] | |||
| DOMI_RNN_4DTBX1, | |||
| DOMI_ENN_DL_RESERVED | |||
| } domiRNNDataLayout_t; | |||
| // RNNInputMode | |||
| typedef enum tagDomiRNNInputMode { DOMI_RNN_LINEAR_INPUT = 0, DOMI_RNN_SKIP_INPUT } domiRNNInputMode_t; | |||
| // RNNDirectionMode | |||
| typedef enum tagDomiRNNDirectionMode { DOMI_RNN_UNIDIRECTIONAL = 0, DOMI_RNN_BIDIRECTIONAL } domiDirectionMode_t; | |||
| typedef enum tagDomiPoolingCeilMode { DOMI_POOLING_FLOOR = 0, DOMI_POOLING_CEIL } domiPoolingCeilMode_t; | |||
| // RNNMode | |||
| typedef enum tagDomiRNNActivationMode { | |||
| DOMI_RNN_ACTIVATION_SIGMOID = 0, // sigmoid | |||
| DOMI_RNN_ACTIVATION_TANH, // tanh | |||
| DOMI_RNN_ACTIVATION_RELU, // ReLU | |||
| DOMI_RNN_ACTIVATION_RELU1, // ReLU1 | |||
| DOMI_RNN_ACTIVATION_RELU6, // ReLU6 | |||
| DOMI_RNN_ACTIVATION_RESERVED | |||
| } domiRNNActivationMode_t; | |||
| typedef enum tagDomiRNNLSTMOutMode { | |||
| DOMI_RNN_LSTM_OUT_SEPARATE = 0, | |||
| DOMI_RNN_LSTM_OUT_CONCAT, | |||
| DOMI_RNN_LSTM_OUT_RESERVED | |||
| } domiRNNLSTMOutPutMode_t; | |||
| typedef enum tagDomiRNNLSTMStateOutMode { | |||
| DOMI_RNN_LSTM_STATE_OUT_SEPARATE = 0, | |||
| DOMI_RNN_LSTM_STATE_OUT_CONCAT_ALL, | |||
| DOMI_RNN_LSTM_STATE_OUT_RESERVED | |||
| } domiRNNLSTMStateOutMode_t; | |||
| typedef enum tagDomiRNNMode { | |||
| DOMI_RNN_RELU = 0, | |||
| DOMI_RNN_TANH, | |||
| DOMI_LSTM, | |||
| DOMI_GRU, | |||
| DOMI_RNN_MODE_RESERVED | |||
| } domiRNNMode_t; | |||
| typedef enum tagDomiResizeBilinearMode { | |||
| DOMI_RESIZE_OUTPUT_DIM_BY_ZOOM_FACTOR = 0, // Output dimension specified by zoom factor | |||
| DOMI_RESIZE_OUTPUT_DIM_BY_SHRINK_FACTOR, // specified by shrink factor | |||
| DOMI_RESIZE_OUTPUT_DIM_EXPLICIT, // specified explicitly | |||
| DOMI_RESIZE_OUTPUT_DIM_RESERVED | |||
| } domiResizeOutputDimMode_t; | |||
| #pragma pack(1) // single-byte alignment | |||
| // DUMP file struct | |||
| struct FileHeader { | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef INC_FRAMEWORK_COMMON_UTIL_H_ | |||
| #define INC_FRAMEWORK_COMMON_UTIL_H_ | |||
| #ifndef AIR_INC_FRAMEWORK_COMMON_UTIL_H_ | |||
| #define AIR_INC_FRAMEWORK_COMMON_UTIL_H_ | |||
| #include <climits> | |||
| #include <cmath> | |||
| @@ -24,13 +24,15 @@ | |||
| #include <vector> | |||
| #include <google/protobuf/text_format.h> | |||
| #include "external/graph/types.h" | |||
| #include "framework/common/debug/log.h" | |||
| #include "framework/common/scope_guard.h" | |||
| #include "framework/common/ge_inner_error_codes.h" | |||
| #include "graph/detail/attributes_holder.h" | |||
| #define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ | |||
| do { \ | |||
| if (size <= 0) { \ | |||
| if ((size) <= 0) { \ | |||
| GELOGE(ge::FAILED, "param[%s] is not a positive number", #size); \ | |||
| return PARAM_INVALID; \ | |||
| } \ | |||
| @@ -46,15 +48,19 @@ | |||
| // new ge marco | |||
| // Encapsulate common resource releases | |||
| #define GE_MAKE_GUARD_RTMEM(var) \ | |||
| GE_MAKE_GUARD(var, [&] { \ | |||
| if (var) GE_CHK_RT(rtFreeHost(var)); \ | |||
| }); | |||
| #define GE_MAKE_GUARD_RTMEM(var) \ | |||
| GE_MAKE_GUARD(var, [&] { \ | |||
| if ((var) != nullptr) { \ | |||
| GE_CHK_RT(rtFreeHost(var)); \ | |||
| } \ | |||
| }) | |||
| #define GE_MAKE_GUARD_RTSTREAM(var) \ | |||
| GE_MAKE_GUARD(var, [&] { \ | |||
| if (var) GE_CHK_RT(rtStreamDestroy(var)); \ | |||
| }); | |||
| #define GE_MAKE_GUARD_RTSTREAM(var) \ | |||
| GE_MAKE_GUARD(var, [&] { \ | |||
| if ((var) != nullptr) { \ | |||
| GE_CHK_RT(rtStreamDestroy(var)); \ | |||
| } \ | |||
| }) | |||
| // For propagating errors when calling a function. | |||
| #define GE_RETURN_IF_ERROR(expr) \ | |||
| @@ -115,7 +121,7 @@ | |||
| // Check if the parameter is null. If yes, return PARAM_INVALID and record the error | |||
| #define GE_CHECK_NOTNULL(val) \ | |||
| do { \ | |||
| if (val == nullptr) { \ | |||
| if ((val) == nullptr) { \ | |||
| REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \ | |||
| GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val); \ | |||
| return ge::PARAM_INVALID; \ | |||
| @@ -125,7 +131,7 @@ | |||
| // Check if the parameter is null. If yes, just return and record the error | |||
| #define GE_CHECK_NOTNULL_JUST_RETURN(val) \ | |||
| do { \ | |||
| if (val == nullptr) { \ | |||
| if ((val) == nullptr) { \ | |||
| GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
| return; \ | |||
| } \ | |||
| @@ -134,7 +140,7 @@ | |||
| // Check whether the parameter is null. If so, execute the exec_expr expression and record the error log | |||
| #define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ | |||
| do { \ | |||
| if (val == nullptr) { \ | |||
| if ((val) == nullptr) { \ | |||
| GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
| exec_expr; \ | |||
| } \ | |||
| @@ -143,7 +149,7 @@ | |||
| // Check whether the parameter is null. If yes, return directly and record the error log | |||
| #define GE_RT_VOID_CHECK_NOTNULL(val) \ | |||
| do { \ | |||
| if (val == nullptr) { \ | |||
| if ((val) == nullptr) { \ | |||
| GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
| return; \ | |||
| } \ | |||
| @@ -152,7 +158,7 @@ | |||
| // Check if the parameter is null. If yes, return false and record the error log | |||
| #define GE_RT_FALSE_CHECK_NOTNULL(val) \ | |||
| do { \ | |||
| if (val == nullptr) { \ | |||
| if ((val) == nullptr) { \ | |||
| GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
| return false; \ | |||
| } \ | |||
| @@ -161,7 +167,7 @@ | |||
| // Check if the parameter is out of bounds | |||
| #define GE_CHECK_SIZE(size) \ | |||
| do { \ | |||
| if (size == 0) { \ | |||
| if ((size) == 0) { \ | |||
| GELOGE(ge::FAILED, "param[%s] is out of range", #size); \ | |||
| return ge::PARAM_INVALID; \ | |||
| } \ | |||
| @@ -170,7 +176,7 @@ | |||
| // Check if the value on the left is greater than or equal to the value on the right | |||
| #define GE_CHECK_GE(lhs, rhs) \ | |||
| do { \ | |||
| if (lhs < rhs) { \ | |||
| if ((lhs) < (rhs)) { \ | |||
| GELOGE(ge::FAILED, "param[%s] is less than[%s]", #lhs, #rhs); \ | |||
| return ge::PARAM_INVALID; \ | |||
| } \ | |||
| @@ -179,7 +185,7 @@ | |||
| // Check if the value on the left is less than or equal to the value on the right | |||
| #define GE_CHECK_LE(lhs, rhs) \ | |||
| do { \ | |||
| if (lhs > rhs) { \ | |||
| if ((lhs) > (rhs)) { \ | |||
| GELOGE(ge::FAILED, "param[%s] is greater than[%s]", #lhs, #rhs); \ | |||
| return ge::PARAM_INVALID; \ | |||
| } \ | |||
| @@ -187,102 +193,42 @@ | |||
| #define GE_DELETE_NEW_SINGLE(var) \ | |||
| do { \ | |||
| if (var != nullptr) { \ | |||
| delete var; \ | |||
| var = nullptr; \ | |||
| if ((var) != nullptr) { \ | |||
| delete (var); \ | |||
| (var) = nullptr; \ | |||
| } \ | |||
| } while (false) | |||
| #define GE_DELETE_NEW_ARRAY(var) \ | |||
| do { \ | |||
| if (var != nullptr) { \ | |||
| delete[] var; \ | |||
| var = nullptr; \ | |||
| if ((var) != nullptr) { \ | |||
| delete[](var); \ | |||
| (var) = nullptr; \ | |||
| } \ | |||
| } while (false) | |||
| #define GE_FREE_RT_LOG(addr) \ | |||
| do { \ | |||
| if (addr != nullptr) { \ | |||
| if ((addr) != nullptr) { \ | |||
| const rtError_t error = rtFree(addr); \ | |||
| if (error != RT_ERROR_NONE) { \ | |||
| GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \ | |||
| } \ | |||
| addr = nullptr; \ | |||
| (addr) = nullptr; \ | |||
| } \ | |||
| } while (false) | |||
| /** | |||
| * @ingroup domi_common | |||
| * @brief version of om.proto file | |||
| */ | |||
| static constexpr int32_t OM_PROTO_VERSION = 2; | |||
| /** | |||
| * Finding an Integer Ceiling Value Without Precision Loss | |||
| */ | |||
| #define CEIL(N, n) (((N) + (n)-1) / (n)) | |||
| namespace ge { | |||
| using google::protobuf::Message; | |||
| /// | |||
| /// @ingroup domi_common | |||
| /// @brief Reads the proto structure from an array. | |||
| /// @param [in] data proto data to be read | |||
| /// @param [in] size proto data size | |||
| /// @param [out] proto Memory for storing the proto file | |||
| /// @return true success | |||
| /// @return false fail | |||
| /// | |||
| GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int32_t size, Message *proto); | |||
| /// | |||
| /// @ingroup domi_proto | |||
| /// @brief Reads the proto file in the text format. | |||
| /// @param [in] file path of proto file | |||
| /// @param [out] message Memory for storing the proto file | |||
| /// @return true success | |||
| /// @return false fail | |||
| /// | |||
| GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message); | |||
| /// | |||
| /// @ingroup: domi_common | |||
| /// @brief: get length of file | |||
| /// @param [in] input_file: path of file | |||
| /// @return long: File length. If the file length fails to be obtained, the value -1 is returned. | |||
| /// | |||
| GE_FUNC_VISIBILITY extern long GetFileLength(const std::string &input_file); | |||
| /// | |||
| /// @ingroup domi_common | |||
| /// @brief Reads all data from a binary file. | |||
| /// @param [in] file_name path of file | |||
| /// @param [out] buffer Output memory address, which needs to be released by the caller. | |||
| /// @param [out] length Output memory size | |||
| /// @return false fail | |||
| /// @return true success | |||
| /// | |||
| GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *const file_name, char **buffer, int32_t &length); | |||
| GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, std::vector<char> &buffer); | |||
| /// | |||
| /// @ingroup domi_common | |||
| /// @brief Recursively Creating a Directory | |||
| /// @param [in] directory_path Path, which can be a multi-level directory. | |||
| /// @return 0 success | |||
| /// @return -1 fail | |||
| /// | |||
| GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path); | |||
| /// | |||
| /// @ingroup domi_common | |||
| /// @brief Obtains the current time string. | |||
| /// @return Time character string in the format : %Y%m%d%H%M%S, eg: 20171011083555 | |||
| /// | |||
| GE_FUNC_VISIBILITY std::string CurrentTimeInStr(); | |||
| /** | |||
| * @ingroup domi_common | |||
| * @brief version of om.proto file | |||
| */ | |||
| constexpr int32_t OM_PROTO_VERSION = 2; | |||
| /// | |||
| /// @ingroup domi_common | |||
| @@ -294,7 +240,7 @@ template <typename T> | |||
| GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) { | |||
| std::stringstream ss; | |||
| ss << "["; | |||
| for (T x : v) { | |||
| for (const T x : v) { | |||
| ss << x; | |||
| ss << ", "; | |||
| } | |||
| @@ -314,7 +260,7 @@ template <typename T> | |||
| GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) { | |||
| std::stringstream ss; | |||
| ss << "["; | |||
| for (T x : rpd_field) { | |||
| for (const T x : rpd_field) { | |||
| ss << x; | |||
| ss << ", "; | |||
| } | |||
| @@ -343,6 +289,65 @@ GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedPtrField | |||
| return str_ret; | |||
| } | |||
| /// | |||
| /// @ingroup domi_common | |||
| /// @brief Reads the proto structure from an array. | |||
| /// @param [in] data proto data to be read | |||
| /// @param [in] size proto data size | |||
| /// @param [out] proto Memory for storing the proto file | |||
| /// @return true success | |||
| /// @return false fail | |||
| /// | |||
| GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *const data, const int32_t size, | |||
| google::protobuf::Message *const proto); | |||
| /// | |||
| /// @ingroup domi_proto | |||
| /// @brief Reads the proto file in the text format. | |||
| /// @param [in] file path of proto file | |||
| /// @param [out] message Memory for storing the proto file | |||
| /// @return true success | |||
| /// @return false fail | |||
| /// | |||
| GE_FUNC_VISIBILITY bool ReadProtoFromText(const char_t *const file, google::protobuf::Message *const message); | |||
| /// | |||
| /// @ingroup: domi_common | |||
| /// @brief: get length of file | |||
| /// @param [in] input_file: path of file | |||
| /// @return int64_t: File length. If the file length fails to be obtained, the value -1 is returned. | |||
| /// | |||
| GE_FUNC_VISIBILITY extern int64_t GetFileLength(const std::string &input_file); | |||
| /// | |||
| /// @ingroup domi_common | |||
| /// @brief Reads all data from a binary file. | |||
| /// @param [in] file_name path of file | |||
| /// @param [out] buffer Output memory address, which needs to be released by the caller. | |||
| /// @param [out] length Output memory size | |||
| /// @return false fail | |||
| /// @return true success | |||
| /// | |||
| GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char_t *const file_name, char_t **const buffer, int32_t &length); | |||
| GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char_t *file_name, std::vector<char_t> &buffer); | |||
| /// | |||
| /// @ingroup domi_common | |||
| /// @brief Recursively Creating a Directory | |||
| /// @param [in] directory_path Path, which can be a multi-level directory. | |||
| /// @return 0 success | |||
| /// @return -1 fail | |||
| /// | |||
| GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path); | |||
| /// | |||
| /// @ingroup domi_common | |||
| /// @brief Obtains the current time string. | |||
| /// @return Time character string in the format : %Y%m%d%H%M%S, eg: 20171011083555 | |||
| /// | |||
| GE_FUNC_VISIBILITY std::string CurrentTimeInStr(); | |||
| /// | |||
| /// @ingroup domi_common | |||
| /// @brief Obtains the absolute time (timestamp) of the current system. | |||
| @@ -366,7 +371,7 @@ GE_FUNC_VISIBILITY uint32_t GetCurrentSecondTimestap(); | |||
| /// @param [in] b | |||
| /// @return false: true: The result is within the normal int64 range. | |||
| /// | |||
| GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b); | |||
| GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(const int64_t a, const int64_t b); | |||
| /// | |||
| /// @ingroup domi_common | |||
| @@ -374,7 +379,7 @@ GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b); | |||
| /// @param [in] path of input file | |||
| /// @param [out] Absolute path of a file. If the absolute path cannot be obtained, an empty string is returned | |||
| /// | |||
| GE_FUNC_VISIBILITY std::string RealPath(const char *path); | |||
| GE_FUNC_VISIBILITY std::string RealPath(const char_t *path); | |||
| /// | |||
| /// @ingroup domi_common | |||
| @@ -401,17 +406,7 @@ GE_FUNC_VISIBILITY bool CheckOutputPathValid(const std::string &file_path, const | |||
| /// @param [in] str file path | |||
| /// @param [out] result | |||
| /// | |||
| GE_FUNC_VISIBILITY bool ValidateStr(const std::string &str, const std::string &mode); | |||
| /// | |||
| /// @ingroup domi_common | |||
| /// @brief Check path invalid | |||
| /// @param [in] path, path to be checked | |||
| /// @param [in] length, length of path | |||
| /// @return 0 success | |||
| /// @return -1 fail | |||
| /// | |||
| GE_FUNC_VISIBILITY Status CheckPath(const char *path, size_t length); | |||
| GE_FUNC_VISIBILITY bool ValidateStr(const std::string &file_path, const std::string &mode); | |||
| } // namespace ge | |||
| #endif // INC_FRAMEWORK_COMMON_UTIL_H_ | |||
| #endif // AIR_INC_FRAMEWORK_COMMON_UTIL_H_ | |||
| @@ -26,7 +26,7 @@ | |||
| #include "graph/types.h" | |||
| namespace ge { | |||
| enum PriorityEnum { | |||
| enum class PriorityEnum { | |||
| COST_0 = 0, | |||
| COST_1, | |||
| COST_2, | |||
| @@ -38,7 +38,7 @@ enum PriorityEnum { | |||
| struct DNNEngineAttribute { | |||
| std::string engine_name; | |||
| std::vector<std::string> mem_type; | |||
| uint32_t compute_cost; | |||
| PriorityEnum compute_cost; | |||
| enum RuntimeType runtime_type; // HOST, DEVICE | |||
| // If engine input format must be specific, set this attribute, else set FORMAT_RESERVED | |||
| Format engine_input_format; | |||
| @@ -53,10 +53,10 @@ class GE_FUNC_VISIBILITY DNNEngine { | |||
| engine_attribute_ = attrs; | |||
| } | |||
| virtual ~DNNEngine() = default; | |||
| Status Initialize(const std::map<std::string, std::string> &options) { | |||
| Status Initialize(const std::map<std::string, std::string> &options) const { | |||
| return SUCCESS; | |||
| } | |||
| Status Finalize() { | |||
| Status Finalize() const { | |||
| return SUCCESS; | |||
| } | |||
| void GetAttributes(DNNEngineAttribute &attr) const { | |||
| @@ -32,6 +32,7 @@ | |||
| namespace ge { | |||
| class SingleOp; | |||
| class DynamicSingleOp; | |||
| class GeRootModel; | |||
| struct RunModelData { | |||
| uint32_t index; // Data index | |||
| @@ -69,7 +70,7 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
| /// | |||
| static Status FinalizeEx(); | |||
| Status UnloadModel(uint32_t modelId); | |||
| Status UnloadModel(uint32_t model_id); | |||
| // Get input and output descriptor | |||
| Status GetModelDescInfo(uint32_t model_id, std::vector<TensorDesc> &input_desc, std::vector<TensorDesc> &output_desc, | |||
| @@ -225,6 +226,18 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
| Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids, | |||
| const std::vector<uint32_t> &output_queue_ids); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Load task list from ModelData with queue. | |||
| /// @param [out] model_id: model id allocate from manager. | |||
| /// @param [in] root_model: Instance of GeRootModel. | |||
| /// @param [in] input_queue_ids: input queue ids create from user. | |||
| /// @param [in] output_queue_ids: input queue ids create from user. | |||
| /// @return: 0 for success / others for fail | |||
| /// | |||
| Status LoadModelWithQ(uint32_t &model_id, const std::shared_ptr<GeRootModel> &root_model, | |||
| const std::vector<uint32_t> &input_queue_ids, const std::vector<uint32_t> &output_queue_ids); | |||
| /// | |||
| /// @ingroup ge | |||
| /// @brief Synchronous execution of offline model(Do not create thread) | |||
| @@ -235,7 +248,7 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
| /// @param [out] domi::OutputData *output_data: Model output data | |||
| /// @return SUCCESS handle successfully / others handle failed | |||
| /// | |||
| Status ExecModel(uint32_t model_id, void *stream, const RunModelData &input_data, RunModelData &output_data, | |||
| Status ExecModel(uint32_t model_id, void *stream, const RunModelData &run_input_data, RunModelData &run_output_data, | |||
| bool async_mode = false); | |||
| /// | |||
| @@ -275,19 +288,19 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
| /// | |||
| Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size); | |||
| static Status LoadSingleOp(const std::string &modelName, const ModelData &modelData, void *stream, | |||
| static Status LoadSingleOp(const std::string &model_name, const ModelData &model_data, void *stream, | |||
| SingleOp **single_op); | |||
| static Status LoadSingleOpV2(const std::string &modelName, const ModelData &modelData, void *stream, | |||
| static Status LoadSingleOpV2(const std::string &model_name, const ModelData &model_data, void *stream, | |||
| SingleOp **single_op, const uint64_t model_id); | |||
| static Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | |||
| std::vector<DataBuffer> &outputs); | |||
| static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &modelData, void *stream, | |||
| static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &model_data, void *stream, | |||
| DynamicSingleOp **single_op); | |||
| static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &modelData, void *stream, | |||
| static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &model_data, void *stream, | |||
| DynamicSingleOp **single_op, const uint64_t model_id); | |||
| static Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc, | |||
| @@ -120,9 +120,9 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||
| GraphStage graph_stage = GraphStage::GRAPH_STAGE_RESERVED); | |||
| bool CheckNoAicore(const ComputeGraphPtr &graph); | |||
| void RemoveConst(const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs); | |||
| Status CheckForSingleOp(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, | |||
| Status CheckForSingleOp(const OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, | |||
| const std::vector<GeTensor> &outputs); | |||
| Status InferFormatForSingleOp(OpDescPtr &op_desc, Graph &graph); | |||
| Status InferFormatForSingleOp(const OpDescPtr &op_desc, const Graph &graph) const; | |||
| using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>; | |||
| Status SetModelNameForDump(const GeRootModelPtr &ge_root_model); | |||
| @@ -17,11 +17,9 @@ | |||
| #ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_ | |||
| #define INC_FRAMEWORK_MEMORY_MEMORY_API_H_ | |||
| #include <string> | |||
| #include <vector> | |||
| #include "external/ge/ge_api_error_codes.h" | |||
| #include "graph/types.h" | |||
| #include "runtime/mem.h" | |||
| namespace ge { | |||
| @@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner { | |||
| MemoryAssigner &operator=(const MemoryAssigner &) = delete; | |||
| Status AssignMemory(bool is_loop_graph, std::map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size); | |||
| Status AssignMemory(std::map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size); | |||
| private: | |||
| ge::ComputeGraphPtr compute_graph_; | |||
| @@ -64,7 +64,7 @@ GE_FUNC_VISIBILITY Status InitDomiOmgContext(const std::string &input_shape, con | |||
| GE_FUNC_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<std::string, std::string> &atc_params, | |||
| const char *model_file, const char *weights_file, domi::FrameworkType type, | |||
| const char *op_conf = nullptr, const char *target = nullptr, | |||
| RunMode run_mode = GEN_OM_MODEL, bool is_dynamic_input = false); | |||
| RunMode run_mode = RunMode::GEN_OM_MODEL, bool is_dynamic_input = false); | |||
| /** | |||
| * @ingroup domi_omg | |||
| @@ -31,12 +31,7 @@ | |||
| using domi::DOMI_TENSOR_ND; | |||
| using domi::DOMI_TENSOR_RESERVED; | |||
| using domi::domiTensorFormat_t; | |||
| using domi::FRAMEWORK_RESERVED; | |||
| using domi::FrameworkType; | |||
| using std::map; | |||
| using std::string; | |||
| using std::unordered_map; | |||
| using std::vector; | |||
| namespace ge { | |||
| /** | |||
| @@ -51,36 +46,13 @@ enum RunMode { | |||
| DISPLAY_OM_INFO = 6 // display model info | |||
| }; | |||
| /// | |||
| /// @ingroup domi_omg | |||
| /// @brief high-precision mode | |||
| /// | |||
| enum HighPrecisionMode { | |||
| // the FP16 high-precision function is disabled in common mode | |||
| HIGH_PRECISION_DEFAULT = 0, | |||
| // high-precision mode, enabling FP16 high-precision mode (Convolution/FullConnect/AvgPooling are involved) | |||
| HIGH_PRECISION_FP16 = 1 | |||
| }; | |||
| /// | |||
| /// @ingroup domi_omg | |||
| /// @brief description buffer data | |||
| /// | |||
| struct OMGBufferData { | |||
| void *data; | |||
| uint32_t length; | |||
| }; | |||
| struct OmgContext { | |||
| OmgContext() { | |||
| format = DOMI_TENSOR_ND; | |||
| } | |||
| domiTensorFormat_t format; | |||
| OmgContext() : format(domi::DOMI_TENSOR_ND) {} | |||
| domi::domiTensorFormat_t format; | |||
| // format of the input specified by the command line | |||
| std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map; | |||
| std::vector<domiTensorFormat_t> output_formats; | |||
| std::unordered_map<std::string, domi::domiTensorFormat_t> input_nodes_format_map; | |||
| std::vector<domi::domiTensorFormat_t> output_formats; | |||
| // user-designate input dims | |||
| std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims; | |||
| @@ -107,9 +79,9 @@ struct OmgContext { | |||
| // net data nodes tensor names(caffe or onnx) | |||
| std::vector<std::string> data_tensor_names; | |||
| // preferential format used by the entire network | |||
| domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED; | |||
| domi::domiTensorFormat_t net_format = domi::DOMI_TENSOR_RESERVED; | |||
| domi::FrameworkType type = domi::FRAMEWORK_RESERVED; | |||
| RunMode run_mode = ONLY_PRE_CHECK; | |||
| RunMode run_mode = RunMode::ONLY_PRE_CHECK; | |||
| bool train_flag = false; | |||
| std::string output_type; | |||
| @@ -108,6 +108,8 @@ class GE_FUNC_VISIBILITY ModelParser { | |||
| * @return Others failed | |||
| */ | |||
| virtual domi::Status ToJson(const char *model_file, const char *json_file) { | |||
| (void)model_file; | |||
| (void)json_file; | |||
| return domi::SUCCESS; | |||
| } | |||
| @@ -130,6 +132,8 @@ class GE_FUNC_VISIBILITY ModelParser { | |||
| * @return Others failed | |||
| */ | |||
| virtual domi::Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) { | |||
| (void)serialized_proto; | |||
| (void)graph; | |||
| return UNSUPPORTED; | |||
| } | |||
| @@ -144,6 +148,9 @@ class GE_FUNC_VISIBILITY ModelParser { | |||
| */ | |||
| virtual domi::Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback, | |||
| ge::ComputeGraphPtr &graph) { | |||
| (void)serialized_proto; | |||
| (void)callback; | |||
| (void)graph; | |||
| return UNSUPPORTED; | |||
| } | |||
| }; | |||
| @@ -50,7 +50,7 @@ class GE_FUNC_VISIBILITY OpParser { | |||
| * @return SUCCESS | |||
| * @return FAILED | |||
| */ | |||
| virtual domi::Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0; | |||
| virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::OpDescPtr &op_desc) = 0; | |||
| /** | |||
| * @ingroup domi_omg | |||
| @@ -60,7 +60,7 @@ class GE_FUNC_VISIBILITY OpParser { | |||
| * @return SUCCESS | |||
| * @return FAILED | |||
| */ | |||
| virtual domi::Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0; | |||
| virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::Operator &op_dest) = 0; | |||
| /** | |||
| * @ingroup domi_omg | |||
| @@ -70,7 +70,7 @@ class GE_FUNC_VISIBILITY OpParser { | |||
| * @return SUCCESS | |||
| * @return FAILED | |||
| */ | |||
| virtual domi::Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0; | |||
| virtual domi::Status ParseWeights(const google::protobuf::Message *op_src, ge::NodePtr &node) = 0; | |||
| /** | |||
| * @ingroup domi_omg | |||
| @@ -80,7 +80,7 @@ class GE_FUNC_VISIBILITY OpParser { | |||
| * @return SUCCESS | |||
| * @return FAILED | |||
| */ | |||
| virtual domi::Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) { | |||
| virtual domi::Status GetFormat(const google::protobuf::Message *op_src, domi::domiTensorFormat_t &format) { | |||
| (void)op_src; | |||
| // Indicates that the op does not provide a value for format | |||
| format = domi::DOMI_TENSOR_RESERVED; | |||
| @@ -24,13 +24,11 @@ | |||
| #include "framework/omg/omg_inner_types.h" | |||
| #include "framework/omg/parser/parser_types.h" | |||
| using Status = domi::Status; | |||
| namespace domi { | |||
| class WeightsParser; | |||
| class ModelParser; | |||
| typedef std::shared_ptr<ModelParser> (*MODEL_PARSER_CREATOR_FUN)(void); | |||
| using MODEL_PARSER_CREATOR_FUN = std::shared_ptr<ModelParser> (*)(void); | |||
| // Create modelparser for different frameworks | |||
| class GE_FUNC_VISIBILITY ModelParserFactory { | |||
| @@ -82,7 +80,7 @@ class GE_FUNC_VISIBILITY ModelParserRegisterar { | |||
| } \ | |||
| ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser) | |||
| typedef std::shared_ptr<WeightsParser> (*WEIGHTS_PARSER_CREATOR_FUN)(void); | |||
| using WEIGHTS_PARSER_CREATOR_FUN = std::shared_ptr<WeightsParser> (*)(void); | |||
| // Create weightsparser for different frameworks | |||
| class GE_FUNC_VISIBILITY WeightsParserFactory { | |||
| @@ -29,8 +29,8 @@ | |||
| namespace ge { | |||
| struct ParserContext { | |||
| // format of the input specified by the command line | |||
| std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map; | |||
| std::vector<domiTensorFormat_t> output_formats; | |||
| std::unordered_map<std::string, domi::domiTensorFormat_t> input_nodes_format_map; | |||
| std::vector<domi::domiTensorFormat_t> output_formats; | |||
| // user-designate input dims | |||
| std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims; | |||
| std::map<std::string, std::vector<int64_t>> input_dims; | |||
| @@ -58,7 +58,7 @@ struct ParserContext { | |||
| bool train_flag = false; | |||
| domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; | |||
| domi::FrameworkType type = domi::FRAMEWORK_RESERVED; | |||
| RunMode run_mode = GEN_OM_MODEL; | |||
| RunMode run_mode = RunMode::GEN_OM_MODEL; | |||
| // save caffe custom proto path, used by caffe parse | |||
| std::string custom_proto_path; | |||
| // save caffe proto path, used by caffe parse | |||
| @@ -19,8 +19,6 @@ | |||
| #include <memory> | |||
| #include <set> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "framework/common/debug/log.h" | |||
| #include "framework/common/string_util.h" | |||
| @@ -34,7 +32,7 @@ class GE_FUNC_VISIBILITY PlatformVersionManager { | |||
| static Status GetPlatformVersion(std::string &ver) { | |||
| ver = "1.11.z"; | |||
| const std::vector<std::string> version_splits = StringUtils::Split(ver, '.'); | |||
| GE_IF_BOOL_EXEC(version_splits.size() < 3, GELOGW("Read platform version error!"); return FAILED;); | |||
| GE_IF_BOOL_EXEC(version_splits.size() < 3U, GELOGW("Read platform version error!"); return FAILED;); | |||
| GELOGI("Read current platform version: %s.", ver.c_str()); | |||
| return SUCCESS; | |||
| @@ -1 +1 @@ | |||
| Subproject commit 1d99928bfcb02e45acc7db73e3ee57304ff1131a | |||
| Subproject commit b903e17423bb8f5f97b5cc4cae2ec54a7bf701b8 | |||
| @@ -21,7 +21,7 @@ | |||
| namespace aicpu { | |||
| namespace FWKAdapter { | |||
| using char_t = char; | |||
| // API RETURN CODE | |||
| enum FWKAdptAPIRetCode { | |||
| FWK_ADPT_SUCCESS = 0, // success | |||
| @@ -63,6 +63,8 @@ enum FWKTaskExtInfoType { | |||
| FWK_ADPT_EXT_BITMAP, | |||
| FWK_ADPT_EXT_TOPIC_TYPE, | |||
| FWK_ADPT_EXT_ASYNCWAIT, | |||
| FWK_ADPT_EXT_UNKNOWN_SHAPE_INPUT_INDEX, | |||
| FWK_ADPT_EXT_UNKNOWN_SHAPE_OUTPUT_INDEX, | |||
| FWK_ADPT_EXT_INVALID | |||
| }; | |||
| @@ -113,7 +115,7 @@ struct StrFWKKernel { | |||
| typedef StrFWKKernel FWKOperateParam; | |||
| // Extent info ShapeAndType | |||
| const uint32_t kMaxShapeDims = 8; | |||
| const uint32_t kMaxShapeDims = 8U; | |||
| #pragma pack(push, 1) | |||
| struct ShapeAndType { | |||
| int32_t type; | |||
| @@ -122,13 +124,13 @@ struct ShapeAndType { | |||
| #pragma pack(pop) | |||
| // Extend info structure for extInfoAddr | |||
| const uint32_t kExtInfoHeadSize = 8; | |||
| const uint32_t kExtInfoHeadSize = 8U; | |||
| #pragma pack(push, 1) | |||
| struct ExtInfo { | |||
| int32_t infoType; // extend type | |||
| uint32_t infoLen; // length for infoMsg | |||
| char infoMsg[0]; // extend value | |||
| char_t infoMsg[0]; // extend value | |||
| }; | |||
| #pragma pack(pop) | |||
| @@ -62,6 +62,7 @@ static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over l | |||
| static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | |||
| static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||
| static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||
| static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow | |||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
| @@ -126,72 +126,6 @@ extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, co | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); | |||
| /** | |||
| * @brief Initialize hcom executor. | |||
| * | |||
| * @param void | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult HcomExecInitialize(); | |||
| /** | |||
| * @brief Finalize hcom executor. | |||
| * | |||
| * @param void | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult HcomExecFinalize(); | |||
| /** | |||
| * @brief Put collective communication operation into hcom executor. | |||
| * | |||
| * @param opInfo information about collective communication operation. | |||
| * @param callback callback after collective communication operation. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback); | |||
| /** | |||
| * @brief Put remote access operation into hcom executor. | |||
| * | |||
| * @param remoteAccessType operation type (read or write). | |||
| * @param addrInfos address information about collective communication operation. | |||
| * @param callback callback after collective communication operation. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, | |||
| const std::vector<HcomRemoteAccessAddrInfo>& addrInfos, | |||
| std::function<void(HcclResult status)> callback); | |||
| /** | |||
| * @brief Put alltoallv communication operation into hcom executor. | |||
| * | |||
| * @param params information about alltoallv communication operation. | |||
| * @param callback callback after collective communication operation. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult HcomExecEnqueueAllToAllV(HcomAllToAllVParams params, std::function<void(HcclResult status)> callback); | |||
| /** | |||
| * @brief Put agther alltoallv communication operation into hcom executor. | |||
| * | |||
| * @param params information about agther alltoallv communication operation. | |||
| * @param callback callback after collective communication operation. | |||
| * @return HcclResult | |||
| */ | |||
| HcclResult HcomExecEnqueueGatherAllToAllV(HcomGatherAllToAllVParams params, | |||
| std::function<void(HcclResult status)> callback); | |||
| /** | |||
| * @brief Register memories and init resources for remote access. | |||
| * | |||
| * @param addrList memory addresses for remote access. | |||
| * @param count number of remote memory addresses. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif // __cplusplus | |||
| @@ -1,18 +1,12 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| /* | |||
| * @file mmpa_api.h | |||
| * | |||
| * Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved. | |||
| * | |||
| * This program is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| */ | |||
| #ifndef _MMPA_API_H_ | |||
| #define _MMPA_API_H_ | |||
| @@ -1,18 +1,12 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| /* | |||
| * @file mmpa_linux.h | |||
| * | |||
| * Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved. | |||
| * | |||
| * This program is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| */ | |||
| #ifndef MMPA_LINUX_MMPA_LINUX_H | |||
| #define MMPA_LINUX_MMPA_LINUX_H | |||
| @@ -79,6 +79,9 @@ typedef long LONG; | |||
| #define MMPA_THREAD_SCHED_OTHER SCHED_OTHER | |||
| #define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN | |||
| #define MMPA_PATH_SEPARATOR_STR "/" | |||
| #define MMPA_PATH_SEPARATOR_CHAR '/' | |||
| #define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER | |||
| #define MMPA_MAX_NI 19 | |||
| @@ -1,83 +1,86 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MMPA_TYPEDEF_WIN_H | |||
| #define MMPA_TYPEDEF_WIN_H | |||
| #ifdef __cplusplus | |||
| #if __cplusplus | |||
| extern "C" { | |||
| #endif // __cpluscplus | |||
| #endif // __cpluscplus | |||
| #ifndef FALSE | |||
| #define FALSE 0 | |||
| #endif | |||
| #ifndef TRUE | |||
| #define TRUE 1 | |||
| #endif | |||
| #define EN_OK 0 | |||
| #define EN_ERR 1 | |||
| #define EN_ERROR (-1) | |||
| #define EN_INVALID_PARAM (-2) | |||
| #define EN_TIMEOUT (-3) | |||
| #define HANDLE_INVALID_VALUE (-1) | |||
| #define INVALID_SOCKET_HANDLE INVALID_SOCKET | |||
| #define MMPA_MEM_MAX_LEN (0x7fffffff) | |||
| #define MMPA_PROCESS_ERROR (0x7fffffff) | |||
| #define MMPA_ONE_THOUSAND 1000 | |||
| #define MMPA_COMPUTER_BEGIN_YEAR 1900 | |||
| #define SUMMER_TIME_OR_NOT (-1) | |||
| #define MMPA_ZERO 0 | |||
| #define MMPA_VALUE_ONE 1 | |||
| #define MMPA_SOCKET_MAIN_EDITION 2 | |||
| #define MMPA_SOCKET_SECOND_EDITION 0 | |||
| #define MMPA_PIPE_BUF_SIZE 1024 | |||
| #define MMPA_MAX_SCANDIR_COUNT 1024 | |||
| #define MAX_IOVEC_SIZE 32 | |||
| #define MMPA_PIPE_COUNT 2 | |||
| #define MMPA_THREADNAME_SIZE 16 | |||
| #define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) | |||
| #define MMPA_MIN_OS_VERSION_SIZE 64 | |||
| #define MMPA_MAX_NI 19 | |||
| #define MMPA_MIDDLE_NI 5 | |||
| #define MMPA_LOW_NI (-5) | |||
| #define MMPA_MIN_NI (-20) | |||
| #define MMPA_MAX_FILE 128 | |||
| #define MMPA_MAX_THREAD_PIO 99 | |||
| #define MMPA_MIDDLE_THREAD_PIO 66 | |||
| #define MMPA_LOW_THREAD_PIO 33 | |||
| #define MMPA_MIN_THREAD_PIO 1 | |||
| #define MMPA_THREAD_SCHED_RR 0 | |||
| #define MMPA_THREAD_SCHED_FIFO 0 | |||
| #define MMPA_THREAD_SCHED_OTHER 0 | |||
| #define MMPA_THREAD_MIN_STACK_SIZE 0 | |||
| #define MM_MUTEX_INITIALIZER NULL | |||
| #ifdef __cplusplus | |||
| #if __cplusplus | |||
| } | |||
| #endif // __cpluscplus | |||
| #endif // __cpluscplus | |||
| #endif // _MMPA_TYPEDEF_WIN_H_ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MMPA_TYPEDEF_WIN_H | |||
| #define MMPA_TYPEDEF_WIN_H | |||
| #ifdef __cplusplus | |||
| #if __cplusplus | |||
| extern "C" { | |||
| #endif // __cpluscplus | |||
| #endif // __cpluscplus | |||
| #ifndef FALSE | |||
| #define FALSE 0 | |||
| #endif | |||
| #ifndef TRUE | |||
| #define TRUE 1 | |||
| #endif | |||
| #define EN_OK 0 | |||
| #define EN_ERR 1 | |||
| #define EN_ERROR (-1) | |||
| #define EN_INVALID_PARAM (-2) | |||
| #define EN_TIMEOUT (-3) | |||
| #define HANDLE_INVALID_VALUE (-1) | |||
| #define INVALID_SOCKET_HANDLE INVALID_SOCKET | |||
| #define MMPA_MEM_MAX_LEN (0x7fffffff) | |||
| #define MMPA_PROCESS_ERROR (0x7fffffff) | |||
| #define MMPA_ONE_THOUSAND 1000 | |||
| #define MMPA_COMPUTER_BEGIN_YEAR 1900 | |||
| #define SUMMER_TIME_OR_NOT (-1) | |||
| #define MMPA_ZERO 0 | |||
| #define MMPA_VALUE_ONE 1 | |||
| #define MMPA_SOCKET_MAIN_EDITION 2 | |||
| #define MMPA_SOCKET_SECOND_EDITION 0 | |||
| #define MMPA_PIPE_BUF_SIZE 1024 | |||
| #define MMPA_MAX_SCANDIR_COUNT 1024 | |||
| #define MAX_IOVEC_SIZE 32 | |||
| #define MMPA_PIPE_COUNT 2 | |||
| #define MMPA_THREADNAME_SIZE 16 | |||
| #define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) | |||
| #define MMPA_MIN_OS_VERSION_SIZE 64 | |||
| #define MMPA_MAX_NI 19 | |||
| #define MMPA_MIDDLE_NI 5 | |||
| #define MMPA_LOW_NI (-5) | |||
| #define MMPA_MIN_NI (-20) | |||
| #define MMPA_MAX_FILE 128 | |||
| #define MMPA_PATH_SEPARATOR_STR "\\" | |||
| #define MMPA_PATH_SEPARATOR_CHAR '\\' | |||
| #define MMPA_MAX_THREAD_PIO 99 | |||
| #define MMPA_MIDDLE_THREAD_PIO 66 | |||
| #define MMPA_LOW_THREAD_PIO 33 | |||
| #define MMPA_MIN_THREAD_PIO 1 | |||
| #define MMPA_THREAD_SCHED_RR 0 | |||
| #define MMPA_THREAD_SCHED_FIFO 0 | |||
| #define MMPA_THREAD_SCHED_OTHER 0 | |||
| #define MMPA_THREAD_MIN_STACK_SIZE 0 | |||
| #define MM_MUTEX_INITIALIZER NULL | |||
| #ifdef __cplusplus | |||
| #if __cplusplus | |||
| } | |||
| #endif // __cpluscplus | |||
| #endif // __cpluscplus | |||
| #endif // _MMPA_TYPEDEF_WIN_H_ | |||
| @@ -1,18 +1,12 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| /* | |||
| * @file mmpa_win.h | |||
| * | |||
| * Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved. | |||
| * | |||
| * This program is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| */ | |||
| #ifndef MMPA_WIN_MMPA_WIN_H | |||
| #define MMPA_WIN_MMPA_WIN_H | |||
| @@ -200,6 +200,48 @@ REG_OP(Unique) | |||
| .ATTR(out_idx, Type, DT_INT32) | |||
| .OP_END_FACTORY_REG(Unique) | |||
| /** | |||
| *@brief Finds unique elements in a N-D tensor. \n | |||
| *@par Inputs: | |||
| *Inputs "x" and "axis" are N-D vectors. | |||
| * @li x: A N-D tensor. \n | |||
| *@par Attributes: | |||
| *sorted: An optional attr of type int, default to 1. | |||
| *axis: An optional attr of type int, default to -1000. | |||
| *return_idx: An optional attr of type bool, default to false. | |||
| *return_inverse: An optional attr of type bool, default to false. | |||
| *return_counts: An optional attr of type bool, default to false.\n | |||
| *@par Outputs: | |||
| *@li y: "x" in the unique output "y". | |||
| *@li idx: A tensor the same size as "x". The index of each value of "x". | |||
| *@li inverse_idx: A tensor the same size as "x". The index of each value of "y". | |||
| *@li count: A tensor the same size as "x". The index of each value of "y". \n | |||
| *@attention Constraints: | |||
| *UniqueV2 runs on the Ascend AI CPU, which delivers poor performance. \n | |||
| *@par Third-party framework compatibility | |||
| *Compatible with the Pytorch operator unique. | |||
| */ | |||
| REG_OP(UniqueV2) | |||
| .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | |||
| DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_STRING })) | |||
| .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | |||
| DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_STRING })) | |||
| .OUTPUT(idx, TensorType({ DT_INT64 })) | |||
| .OUTPUT(inverse_idx, TensorType({ DT_INT64 })) | |||
| .OUTPUT(count, TensorType({ DT_INT64 })) | |||
| .ATTR(sorted, Int, 1) | |||
| .ATTR(axis, Int, -1000) | |||
| .ATTR(return_idx, Bool, false) | |||
| .ATTR(return_inverse, Bool, false) | |||
| .ATTR(return_counts, Bool, false) | |||
| .OP_END_FACTORY_REG(UniqueV2) | |||
| /** | |||
| *@brief Finds unique elements in a 1D tensor. \n | |||
| @@ -3821,6 +3821,10 @@ REG_OP(CosineSimilarity) | |||
| * @li max_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n | |||
| * @li global_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n | |||
| * @li weight_decay: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n | |||
| * @li step_size: A Optional Tensor. Datatype is same as exp_avg. Shape (1, ).\n | |||
| * @par Attributes: | |||
| * @li adam_mode: An optional bool. Defaults to "adam". \n | |||
| *@par Outputs: | |||
| *three inputs, including: | |||
| @@ -3840,9 +3844,11 @@ REG_OP(ApplyAdamV2) | |||
| .INPUT(max_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
| .INPUT(global_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
| .INPUT(weight_decay, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
| .OPTIONAL_INPUT(step_size, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
| .OUTPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
| .OUTPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
| .OUTPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
| .ATTR(adam_mode, String, "adam") | |||
| .OP_END_FACTORY_REG(ApplyAdamV2) | |||
| } // namespace ge | |||
| @@ -142,6 +142,74 @@ REG_OP(BatchNorm) | |||
| .ATTR(is_training, Bool, true) | |||
| .OP_END_FACTORY_REG(BatchNorm) | |||
| /** | |||
| * @brief After the mean and reciprocal of standard deviation(invert_std) are separately calculated on each device, | |||
| * the mena and reciprocal of standard deviation(invert_std) data on each device are normlized, | |||
| * a total mean and reciprocal of standard deviation(invert_std) are returned, and running_var are updated. | |||
| * @par Inputs: | |||
| * include: | |||
| * @li mean_all: A Tensor. The mean of each device. Must be one of the following types: float16, float32. | |||
| * @li invert_std_all: A Tensor. Reciprocal of the variances of each device. Must be one of the following types: float16, float32. | |||
| * @li count_all: A Tensor. Number of data for each device. Must be one of the following types: float16, float32. | |||
| * @li mean_broadcast: A Tensor. The overall average and broadcast. Must be one of the following types: float16, float32. | |||
| * @li count_sum: A Tensor. General statistics. Must be one of the following types: float16, float32. | |||
| * @li running_var: A Tensor. Runtime variance. Must be one of the following types: float16, float32. \n | |||
| * @par Attributes: | |||
| * Two Attributes, including: | |||
| * @li momentum: A optional float. Defaults to 0.01. \n | |||
| * @li epsilon: An optional float. Defaults to 0.00001. \n | |||
| * @par Outputs: | |||
| * include: | |||
| * @li invert_std: A Tensor. It's inverse of total variance. | |||
| * @li running_var_update: A Tensor. It's moving variance of each device after the update. \n | |||
| * @par Third-party framework compatibility | |||
| * ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate | |||
| * compatible with the Pytorch operator BatchNormGatherStatsWithCounts. | |||
| */ | |||
| REG_OP(SyncBatchNormGatherStatsWithCounts) | |||
| .INPUT(mean_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .INPUT(invert_std_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .INPUT(count_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .INPUT(mean_broadcast, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .INPUT(running_var, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .OUTPUT(invert_std, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .OUTPUT(running_var_update, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .ATTR(momentum, Float, 0.1) | |||
| .ATTR(epsilon, Float, 0.001) | |||
| .OP_END_FACTORY_REG(SyncBatchNormGatherStatsWithCounts) | |||
| /** | |||
| * @brief update running_mean. | |||
| * @par Inputs: | |||
| * include: | |||
| * @li mean: A Tensor. The mean of each device. Must be one of the following types: float16, float32. | |||
| * @li running_mean: A Tensor. Runtime Mean. Must be one of the following types: float16, float32. \n | |||
| * @par Attributes: | |||
| * One Attribute, including: | |||
| * @li momentum: A optional float. Defaults to 0.01. \n | |||
| * @par Outputs: | |||
| * include: | |||
| * @li running_mean_update: A Tensor. It's moving mean of each device after the update. \n | |||
| * @par Third-party framework compatibility | |||
| * ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate | |||
| * compatible with the Pytorch operator BatchNormGatherStatsWithCounts. | |||
| */ | |||
| REG_OP(SyncBNTrainingUpdate) | |||
| .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .INPUT(running_mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .OUTPUT(running_mean_update, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .ATTR(momentum, Float, 0.1) | |||
| .OP_END_FACTORY_REG(SyncBNTrainingUpdate) | |||
| /** | |||
| *@brief part of SyncBatchNormBackward . \n | |||
| @@ -2076,7 +2076,7 @@ REG_OP(GIoUGrad) | |||
| * trans: An optional attr, true for 'xyxyt', false for 'xywht'. | |||
| *@par Outputs: | |||
| * overlaps: A 3D Tensor of type float16 or float32 with shape [B, N, K]. | |||
| * overlaps: A 3D Tensor of type float32 with shape [B, N, K]. | |||
| *@attention Constraints: | |||
| * In each batch, the invalid box cannot appear before the valid box. | |||
| @@ -2087,6 +2087,100 @@ REG_OP(RotatedOverlaps) | |||
| .OUTPUT(overlaps, TensorType({DT_FLOAT})) | |||
| .ATTR(trans, Bool, false) | |||
| .OP_END_FACTORY_REG(RotatedOverlaps) | |||
| /** | |||
| *@brief RotatedIou . \n | |||
| *@par Inputs: | |||
| *@li boxes : data of grad increment, a 3D Tensor of type float32 with | |||
| * shape (B, 5, N). "N" indicates the number of boxes, and the value | |||
| * "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta]. | |||
| *@li query_boxes: Bounding boxes, a 3D Tensor of type float32 with | |||
| * shape (B, 5, K). "K" indicates the number of boxes, and the value | |||
| * "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta]. | |||
| *@par Attributes: | |||
| *@li trans: An optional attr, true for 'xyxyt', false for 'xywht'. | |||
| *@li mode: An optional attr, a character string with the value range of ['iou', 'iof'], | |||
| * only support 'iou' now. | |||
| *@li is_cross: Cross calculation when it is True, and one-to-one calculation when it is False. | |||
| *@li v_threshold: An optional attr, provide condition relaxation for intersection calculation. | |||
| *@li e_threshold: An optional attr, provide condition relaxation for intersection calculation. | |||
| *@par Outputs: | |||
| * iou: A 3D Tensor of float32 with shape [B, N, K]. | |||
| *@attention Constraints: | |||
| * In each batch, the invalid box cannot appear before the valid box. | |||
| */ | |||
| REG_OP(RotatedIou) | |||
| .INPUT(boxes, TensorType({DT_FLOAT})) | |||
| .INPUT(query_boxes, TensorType({DT_FLOAT})) | |||
| .OUTPUT(iou, TensorType({DT_FLOAT})) | |||
| .ATTR(trans, Bool, false) | |||
| .ATTR(mode, String, "iou") | |||
| .ATTR(is_cross, Bool, true) | |||
| .ATTR(v_threshold, Float, 0) | |||
| .ATTR(e_threshold, Float, 0) | |||
| .OP_END_FACTORY_REG(RotatedIou) | |||
| /** | |||
| *@brief RotatedBoxEncode. \n | |||
| *@par Inputs: | |||
| * Two inputs, including: | |||
| *@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N). | |||
| * "B" indicates the number of batch size | |||
| * "N" indicates the number of bounding boxes, and the value "5" refers to | |||
| * "x0", "x1", "y0", "y1" and "angle". | |||
| *@li gt_box: A 3D Tensor of float32 (float16) with shape (B, 5, N). | |||
| * "B" indicates the number of batch size | |||
| * "N" indicates the number of bounding boxes, and the value "5" refers to | |||
| * "x0", "x1", "y0", "y1" and "angle". \n | |||
| *@par Attributes: | |||
| *@li weight: A float list for "x0", "x1", "y0", "y1" and "angle", | |||
| * defaults to [1.0, 1.0, 1.0, 1.0, 1.0]. | |||
| *@par Outputs: | |||
| *@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N), | |||
| * specifying the variations between all anchor boxes and ground truth boxes. | |||
| */ | |||
| REG_OP(RotatedBoxEncode) | |||
| .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .INPUT(gt_box, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0}) | |||
| .OP_END_FACTORY_REG(RotatedBoxEncode) | |||
| /** | |||
| *@brief RotatedBoxDecode. \n | |||
| *@par Inputs: | |||
| * Two inputs, including: | |||
| *@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N). | |||
| * "B" indicates the number of batch size | |||
| * "N" indicates the number of bounding boxes, and the value "5" refers to | |||
| * "x0", "x1", "y0", "y1" and "angle". | |||
| *@li deltas: A 3D Tensor of float32 (float16) with shape (B, 5, N). | |||
| * "B" indicates the number of batch size | |||
| * "N" indicates the number of bounding boxes, and the value "5" refers to | |||
| * "x0", "x1", "y0", "y1" and "angle". \n | |||
| *@par Attributes: | |||
| *@li weight: A float list for "x0", "x1", "y0", "y1" and "angle", | |||
| * defaults to [1.0, 1.0, 1.0, 1.0, 1.0]. | |||
| *@par Outputs: | |||
| *@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N), | |||
| * specifying the variations between all anchor boxes and ground truth boxes. | |||
| */ | |||
| REG_OP(RotatedBoxDecode) | |||
| .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0}) | |||
| .OP_END_FACTORY_REG(RotatedBoxDecode) | |||
| } // namespace ge | |||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | |||
| @@ -1644,6 +1644,36 @@ REG_OP(NormalizeBatch) | |||
| .REQUIRED_ATTR(normalize_type, String) | |||
| .ATTR(epsilon, Float, 0.00001) | |||
| .OP_END_FACTORY_REG(NormalizeBatch) | |||
| /** | |||
| *@brief GroupNorm and Reul operator | |||
| * calculating: x, gamma, beta | |||
| * y = relu(gamma*((x - mean) / np.sqrt(variance + 0.001)) + beta) | |||
| *@par Inputs: | |||
| *Three inputs, including: | |||
| * @li x: A Tensor. Must be one of the following types: float16, float32. | |||
| * @li gamma: A Tensor. Must be one of the following types: float16, float32. | |||
| * @li beta: A Tensor. Must be one of the following types: float16, float32 . \n | |||
| *@par Attributes: | |||
| * @li num_groups: A require attribute, the type is int32. | |||
| * @li eps: A optional attribute, the type is float32. Defaults to 0.00001. \n | |||
| *@par Outputs: | |||
| *One outputs, including: | |||
| * @li y: A Tensor. Must be one of the following types: float16, float32. | |||
| *@par Restrictions: | |||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use/ | |||
| */ | |||
| REG_OP(GroupNormRelu) | |||
| .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .REQUIRED_ATTR(num_groups, Int) | |||
| .ATTR(eps, Float, 0.00001) | |||
| .OP_END_FACTORY_REG(GroupNormRelu) | |||
| } // namespace ge | |||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ | |||
| @@ -25,7 +25,8 @@ | |||
| namespace ge { | |||
| /** | |||
| *@brief Computes the for the gelu of "x" . \n | |||
| *@brief The GELU activation function is x*Φ(x), | |||
| * where Φ(x) the standard Gaussian cumulative distribution function. \n | |||
| *@par Inputs: | |||
| *One input, including: | |||
| @@ -144,7 +145,7 @@ REG_OP(GeluGrad) | |||
| .OP_END_FACTORY_REG(GeluGrad) | |||
| /** | |||
| *@brief Computes the for the fast_gelu of "x" . \n | |||
| *@brief The FastGelu activation function is x*e^(0.851*x)*(x-|x|)/(1+e^(-1.702|x|)). \n | |||
| *@par Inputs: | |||
| *One input, including: | |||
| @@ -159,7 +160,23 @@ REG_OP(FastGelu) | |||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .OP_END_FACTORY_REG(FastGelu) | |||
| /** | |||
| *@brief The FastGeluV2 activation function is x*(sgn(x)*[(a/2)*(clip(|x|,max=-b)+b)^2+0.5]+0.5), | |||
| * where sgn(x) function is (x+0.000000000001)/|(x+0.000000000001)|. \n | |||
| *@par Inputs: | |||
| *One input, including: | |||
| *x: A Tensor. Must be one of the following types: float16, float32 | |||
| *@par Outputs: | |||
| *y: A Tensor. Has the same type as "x". | |||
| *@par Third-party framework compatibility | |||
| *Compatible with the TensorFlow operator FastGeluV2 | |||
| */ | |||
| REG_OP(FastGeluV2) | |||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .OP_END_FACTORY_REG(FastGeluV2) | |||
| /** | |||
| *@brief Computes the gradient for the fast_gelu of "x" . \n | |||
| @@ -623,9 +640,7 @@ REG_OP(Elu) | |||
| *x: A float16, float32, for the input data type . \n | |||
| *@par Attributes: | |||
| *@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . | |||
| *@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . | |||
| *@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n | |||
| *li alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . | |||
| *@par Outputs: | |||
| *y: A float16, float32, for the normalized result . \n | |||
| @@ -641,9 +656,7 @@ REG_OP(Elu) | |||
| REG_OP(Celu) | |||
| .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
| .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
| .ATTR(alpha1, Float, 1.0) | |||
| .ATTR(alpha2, Float, 1.0) | |||
| .ATTR(alpha3, Float, 1.0) | |||
| .ATTR(alpha, Float, 1.0) | |||
| .OP_END_FACTORY_REG(Celu) | |||
| /** | |||
| @@ -81,6 +81,7 @@ REG_OP(OCRRecognitionPreHandle) | |||
| .OUTPUT(imgs, TensorType({DT_UINT8})) | |||
| .OUTPUT(imgs_relation, TensorType({DT_INT32})) | |||
| .OUTPUT(imgs_lang, TensorType({DT_INT32})) | |||
| .OUTPUT(imgs_piece_fillers, TensorType({DT_INT32})) | |||
| .ATTR(batch_size, Int, 8) | |||
| .ATTR(data_format, String, "NHWC") | |||
| .ATTR(pad_mode, String, "REPLICATE") | |||
| @@ -515,6 +515,34 @@ REG_OP(ReduceSumD) | |||
| .ATTR(keep_dims, Bool, false) | |||
| .OP_END_FACTORY_REG(ReduceSumD) | |||
| /** | |||
| *@brief Calculate the total mean based on the mean of each device . \n | |||
| *@par Inputs: | |||
| * Three inputs, including: | |||
| *@li x: A Tensor. Must be one of the following types: float16, float32 . | |||
| *@li count: A Tensor. Must be one of the following types: float16, float32 . | |||
| *@li count_sum: A Tensor. Must be one of the following types: float16, float32 . \n | |||
| *@par Attributes: | |||
| *@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce. | |||
| *@li keepdims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||
| *@par Outputs: | |||
| *y: The reduced tensor. Has the same type and format as input "x" . \n | |||
| *@par Third-party framework compatibility | |||
| * Compatible with the TensorFlow operator Sum. | |||
| */ | |||
| REG_OP(ReduceMeanWithCount) | |||
| .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .INPUT(count, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
| .REQUIRED_ATTR(axes, ListInt) | |||
| .ATTR(keep_dims, Bool, false) | |||
| .OP_END_FACTORY_REG(ReduceMeanWithCount) | |||
| /** | |||
| *@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n | |||
| @@ -1326,6 +1354,101 @@ REG_OP(ReduceMeanVariance) | |||
| .ATTR(axes, ListInt, {}) | |||
| .ATTR(keep_dims, Bool, true) | |||
| .OP_END_FACTORY_REG(ReduceMeanVariance) | |||
| /** | |||
| * @brief Calculates the standard deviation or the variance of Tensors with the average value. | |||
| * @par Inputs: | |||
| * Two inputs, including: | |||
| * @li x: A Tensor. Must be one of the following types: float16, float32. \n | |||
| * @li mean: A Tensor. It's the mean of X. Has the same shape and type as "x" \n | |||
| * @par Attributes: | |||
| * Four Attributes, including: | |||
| * @li dim: An listint. \n | |||
| * @li if_std: An optional bool. Defaults to "False" | |||
| * If "True", Calculate the standard deviation | |||
| * If "False", Calculate the variance | |||
| * @li unbiased: An optional bool. Defaults to "True". | |||
| * If "True", Use Bessel Correction. | |||
| * If "False", Do not use Bessel Correction. \n | |||
| * @li keepdim: An optional bool. Defaults to "False". | |||
| * If "True", Keep the original tensor dimension. | |||
| * If "False", Do not keep the original tensor dimension. \n | |||
| * @par Outputs: | |||
| * @li output_var: A Tensor. It's the standard deviation or the variance of X. Has the same type as "x". | |||
| * @par Third-party framework compatibility | |||
| * Compatible with the Pytorch operator Var_mean. | |||
| */ | |||
| REG_OP(ReduceStdV2Update) | |||
| .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
| .INPUT(mean, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
| .OUTPUT(output_var, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
| .REQUIRED_ATTR(dim, ListInt) | |||
| .ATTR(if_std, Bool, false) | |||
| .ATTR(unbiased, Bool, true) | |||
| .ATTR(keepdim, Bool, false) | |||
| .OP_END_FACTORY_REG(ReduceStdV2Update) | |||
| /** | |||
| *@brief Computes the log and sum and exp of elements across dimensions of a tensor. | |||
| * Reduces "x" along the dimensions given in "axes". | |||
| * Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each | |||
| * entry in "axes". If "keep_dims" is true, the reduced dimensions | |||
| * are retained with length 1. | |||
| * | |||
| *@par Inputs: | |||
| * Two inputs, including: | |||
| *@li x: A Tensor. Must be one of the following types: | |||
| * float32, float16, int32, int64, uint32, uint64, double | |||
| *@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n | |||
| * | |||
| *@par Attributes: | |||
| *keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||
| * | |||
| *@par Outputs: | |||
| *y: The reduced tensor. Has the same type and format as input "x" . \n | |||
| * | |||
| *@par Third-party framework compatibility | |||
| * Compatible with the Onnx operator ReduceLogSumExp. | |||
| */ | |||
| REG_OP(ReduceLogSumExp) | |||
| .INPUT(x, TensorType::NumberType()) | |||
| .INPUT(axes, TensorType::IndexNumberType()) | |||
| .OUTPUT(y, TensorType::NumberType()) | |||
| .ATTR(keep_dims, Bool, false) | |||
| .OP_END_FACTORY_REG(ReduceLogSumExp) | |||
| /** | |||
| *@brief Computes the log and sum of elements across dimensions of a tensor. | |||
| * Reduces "x" along the dimensions given in "axes". | |||
| * Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each | |||
| * entry in "axes". If "keep_dims" is true, the reduced dimensions | |||
| * are retained with length 1. | |||
| * | |||
| *@par Inputs: | |||
| * Two inputs, including: | |||
| *@li x: A Tensor. Must be one of the following types: | |||
| * float32, float16, int32, int64, uint32, uint64, double | |||
| *@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n | |||
| * | |||
| *@par Attributes: | |||
| *keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||
| * | |||
| *@par Outputs: | |||
| *y: The reduced tensor. Has the same type and format as input "x" . \n | |||
| * | |||
| *@par Third-party framework compatibility | |||
| * Compatible with the Onnx operator ReduceLogSum. | |||
| */ | |||
| REG_OP(ReduceLogSum) | |||
| .INPUT(x, TensorType::NumberType()) | |||
| .INPUT(axes, TensorType::IndexNumberType()) | |||
| .OUTPUT(y, TensorType::NumberType()) | |||
| .ATTR(keep_dims, Bool, false) | |||
| .OP_END_FACTORY_REG(ReduceLogSum) | |||
| } //namespace ge | |||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ | |||
| @@ -700,6 +700,27 @@ REG_OP(SegmentMax) | |||
| .OUTPUT(y, TensorType::RealNumberType()) | |||
| .OP_END_FACTORY_REG(SegmentMax) | |||
| /** | |||
| *@brief Computes the sum along segments of a tensor . \n | |||
| *@par Inputs: | |||
| *Two inputs, including: | |||
| * @li x: A Tensor of type NumberType. | |||
| * @li segment_ids: A Tensor of type IndexNumberType, whose shape is a prefix | |||
| * of "x.shape". | |||
| *@par Outputs: | |||
| *y: A Tensor of type NumberType . \n | |||
| *@par Third-party framework compatibility | |||
| * Compatible with the TensorFlow operator SegmentSum. | |||
| */ | |||
| REG_OP(SegmentSum) | |||
| .INPUT(x, TensorType::NumberType()) | |||
| .INPUT(segment_ids, TensorType::IndexNumberType()) | |||
| .OUTPUT(y, TensorType::NumberType()) | |||
| .OP_END_FACTORY_REG(SegmentSum) | |||
| /** | |||
| *@brief: Computes the maximum along segments of a tensor. | |||
| *Computes a tensor such that output[i]=(data[i]) where max is over j | |||
| @@ -98,11 +98,11 @@ typedef struct rtExceptionInfo { | |||
| uint32_t tid; | |||
| uint32_t deviceid; | |||
| uint32_t retcode; | |||
| } rtExceptionInfo; | |||
| } rtExceptionInfo_t; | |||
| typedef void (*rtErrorCallback)(rtExceptionType); | |||
| typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); | |||
| typedef void (*rtTaskFailCallback)(rtExceptionInfo_t *exceptionInfo); | |||
| typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); | |||
| @@ -140,7 +140,7 @@ RTS_API rtError_t rtSetGroup(int32_t groupId); | |||
| * @param [in] groupid count | |||
| * @return RT_ERROR_NONE for ok, errno for failed | |||
| */ | |||
| RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t count); | |||
| RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t cnt); | |||
| /** | |||
| * @ingroup | |||
| @@ -94,11 +94,11 @@ typedef enum tagGetDevMsgType { | |||
| /** | |||
| * @ingroup dvrt_dev | |||
| * @brief get total device number. | |||
| * @param [in|out] count the device number | |||
| * @param [in|out] cnt the device number | |||
| * @return RT_ERROR_NONE for ok | |||
| * @return RT_ERROR_INVALID_VALUE for error input | |||
| */ | |||
| RTS_API rtError_t rtGetDeviceCount(int32_t *count); | |||
| RTS_API rtError_t rtGetDeviceCount(int32_t *cnt); | |||
| /** | |||
| * @ingroup dvrt_dev | |||
| * @brief get device ids | |||
| @@ -338,7 +338,7 @@ RTS_API rtError_t rtSetTSDevice(uint32_t tsId); | |||
| * @return RT_ERROR_NONE for ok | |||
| * @return RT_ERROR_DRV_ERR for can not get run mode | |||
| */ | |||
| RTS_API rtError_t rtGetRunMode(rtRunMode *mode); | |||
| RTS_API rtError_t rtGetRunMode(rtRunMode *runMode); | |||
| /** | |||
| * @ingroup dvrt_dev | |||
| @@ -23,11 +23,11 @@ typedef enum dvfsProfileMode { | |||
| /** | |||
| * @ingroup dvrt_dvfsprofile | |||
| * @brief Set the performance mode of the device | |||
| * @param [in] mode dvfsProfileMode | |||
| * @param [in] profMode dvfsProfileMode | |||
| * @return RT_ERROR_NONE for ok | |||
| * @return RT_ERROR_INVALID_VALUE for error input | |||
| */ | |||
| RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode mode); | |||
| RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode profMode); | |||
| /** | |||
| * @ingroup dvrt_dvfsprofile | |||
| @@ -519,13 +519,13 @@ RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStre | |||
| /** | |||
| * @ingroup rt_kernel | |||
| * @brief setup argment for next rtLaunch in current thread | |||
| * @param [in] arg argment address for kernel function | |||
| * @param [in] args argment address for kernel function | |||
| * @param [in] size argment size | |||
| * @param [in] offset argment table offset | |||
| * @return RT_ERROR_NONE for ok | |||
| * @return RT_ERROR_INVALID_VALUE for error input | |||
| */ | |||
| RTS_API rtError_t rtSetupArgument(const void *arg, uint32_t size, uint32_t offset); | |||
| RTS_API rtError_t rtSetupArgument(const void *args, uint32_t size, uint32_t offset); | |||
| /** | |||
| * @ingroup rt_kernel | |||
| @@ -544,11 +544,11 @@ RTS_API rtError_t rtLaunch(const void *stubFunc); | |||
| * @param [in] ptr host memory | |||
| * @param [in] size host memory size | |||
| * @param [in] flag reserved. set to 0 | |||
| * @param [out] arg returned arg. used for next kernel's arg. | |||
| * @param [out] args returned arg. used for next kernel's arg. | |||
| * @return RT_ERROR_NONE for ok | |||
| * @return RT_ERROR_INVALID_VALUE for error input | |||
| */ | |||
| RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg); | |||
| RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **args); | |||
| /** | |||
| * @ingroup rt_kernel | |||
| @@ -222,24 +222,24 @@ RTS_API rtError_t rtMemQueueInit(int32_t devId); | |||
| /** | |||
| * @ingroup rt_mem_queue | |||
| * @brief enqueu mbuf | |||
| * @brief enqueue memBuf | |||
| * @param [in] devId the logical device id | |||
| * @param [in] qid queue id | |||
| * @param [in] mbuf enqueue mbuf | |||
| * @param [in] memBuf enqueue memBuf | |||
| * @return RT_ERROR_NONE for ok | |||
| */ | |||
| RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *mbuf); | |||
| RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *memBuf); | |||
| /** | |||
| * @ingroup rt_mem_queue | |||
| * @brief enqueu mbuf | |||
| * @brief dequeue memBuf | |||
| * @param [in] devId the logical device id | |||
| * @param [in] qid queue id | |||
| * @param [out] mbuf dequeue mbuf | |||
| * @param [out] memBuf dequeue memBuf | |||
| * @return RT_ERROR_NONE for ok | |||
| */ | |||
| RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **mbuf); | |||
| RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **memBuf); | |||
| /** | |||
| * @ingroup rt_mem_queue | |||
| @@ -350,47 +350,47 @@ RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg); | |||
| /** | |||
| * @ingroup rt_mem_queue | |||
| * @brief alloc buff | |||
| * @param [out] buff: buff addr alloced | |||
| * @param [out] memBuf: buff addr alloced | |||
| * @param [in] size: The amount of memory space requested | |||
| * @return RT_ERROR_NONE for ok | |||
| */ | |||
| RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *mbuf, uint64_t size); | |||
| RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *memBuf, uint64_t size); | |||
| /** | |||
| * @ingroup rt_mem_queue | |||
| * @brief free buff | |||
| * @param [in] buff: buff addr to be freed | |||
| * @param [in] memBuf: buff addr to be freed | |||
| * @return RT_ERROR_NONE for ok | |||
| */ | |||
| RTS_API rtError_t rtMbufFree(rtMbufPtr_t mbuf); | |||
| RTS_API rtError_t rtMbufFree(rtMbufPtr_t memBuf); | |||
| /** | |||
| * @ingroup rt_mem_queue | |||
| * @brief get Data addr of Mbuf | |||
| * @param [in] mbuf: Mbuf addr | |||
| * @param [in] memBuf: Mbuf addr | |||
| * @param [out] buf: Mbuf data addr | |||
| * @return RT_ERROR_NONE for ok | |||
| */ | |||
| RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t mbuf, void **buf); | |||
| RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t memBuf, void **buf); | |||
| /** | |||
| * @ingroup rt_mem_queue | |||
| * @brief get total Buffer size of Mbuf | |||
| * @param [in] mbuf: Mbuf addr | |||
| * @param [in] memBuf: Mbuf addr | |||
| * @param [out] totalSize: total buffer size of Mbuf | |||
| * @return RT_ERROR_NONE for ok | |||
| */ | |||
| RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t mbuf, uint64_t *totalSize); | |||
| RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t memBuf, uint64_t *totalSize); | |||
| /** | |||
| * @ingroup rt_mem_queue | |||
| * @brief Get the address and length of its user_data from the specified Mbuf | |||
| * @param [in] mbuf: Mbuf addr | |||
| * @param [in] memBuf: Mbuf addr | |||
| * @param [out] priv: address of its user_data | |||
| * @param [out] size: length of its user_data | |||
| * @return RT_ERROR_NONE for ok | |||
| */ | |||
| RTS_API rtError_t rtMbufGetPrivInfo (rtMbufPtr_t mbuf, void **priv, uint64_t *size); | |||
| RTS_API rtError_t rtMbufGetPrivInfo (rtMbufPtr_t memBuf, void **priv, uint64_t *size); | |||
| // mem group | |||
| typedef struct { | |||
| @@ -44,6 +44,7 @@ typedef enum tagModelTaskType { | |||
| RT_MODEL_TASK_PROFILER_TRACE_EX, | |||
| RT_MODEL_TASK_FFTS_TASK, | |||
| RT_MODEL_TASK_FFTS_PLUS_TASK, | |||
| RT_MODEL_TASK_DSA_TASK, | |||
| } rtModelTaskType_t; | |||
| typedef enum tagModelStreamType { | |||
| @@ -32,6 +32,37 @@ typedef struct tagStarsSqeHeader { | |||
| uint16_t taskId; | |||
| } rtStarsSqeHeader_t; | |||
| typedef struct tagStarsDsaSqe { | |||
| // 0-7 bytes | |||
| rtStarsSqeHeader_t sqeHeader; | |||
| // 8-11 bytes | |||
| uint32_t start : 1; | |||
| uint32_t functionType : 3; | |||
| uint32_t dataType : 3; | |||
| uint32_t algoType : 3; | |||
| uint32_t paramVldBitmap : 5; | |||
| uint32_t paramAddrValBitmap : 7; | |||
| uint32_t reserved0 : 10; | |||
| // 12-15 bytes | |||
| uint16_t sqeIndex; | |||
| uint8_t kernelCredit; | |||
| uint8_t reserved1; | |||
| // 16-31 bytes | |||
| uint32_t dsaCfgResultAddrLow; | |||
| uint32_t dsaCfgResultAddrHigh; | |||
| uint32_t dsaCfgStateAddrLow; | |||
| uint32_t dsaCfgStateAddrHigh; | |||
| // 32-47 bytes | |||
| uint32_t dsaCfgParamAddrLow; | |||
| uint32_t dsaCfgParamAddrHigh; | |||
| uint32_t dsaCfgSeedLow; | |||
| uint32_t dsaCfgSeedHigh; | |||
| // 48-63 bytes | |||
| uint32_t dsaCfgNumberLow; | |||
| uint32_t dsaCfgNumberHigh; | |||
| uint32_t reserved2[2]; | |||
| } rtStarsDsaSqe_t; | |||
| // ffts+ type | |||
| typedef enum tagFftsPlusType { | |||
| RT_FFTS_PLUS_TYPE_RES1 = 2, // Reserved | |||
| @@ -1,17 +1,8 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| /* | |||
| * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. | |||
| * Description: handle perf data | |||
| * Author: xp | |||
| * Create: 2019-10-13 | |||
| */ | |||
| #ifndef MSPROFILER_API_PROF_ACL_API_H_ | |||
| @@ -25,6 +16,8 @@ | |||
| #define PROF_L2CACHE 0x00000010ULL | |||
| #define PROF_HCCL_TRACE 0x00000020ULL | |||
| #define PROF_TRAINING_TRACE 0x00000040ULL | |||
| #define PROF_MSPROFTX 0x00000080ULL | |||
| #define PROF_RUNTIME_API 0x00000100ULL | |||
| // system profilinig switch | |||
| #define PROF_CPU 0x00010000ULL | |||
| @@ -36,17 +29,18 @@ | |||
| #define PROF_AIVECTORCORE_SAMPLE 0x00400000ULL | |||
| #define PROF_MODEL_EXECUTE 0x0000001000000ULL | |||
| #define PROF_RUNTIME_API 0x0000002000000ULL | |||
| #define PROF_RUNTIME_TRACE 0x0000004000000ULL | |||
| #define PROF_SCHEDULE_TIMELINE 0x0000008000000ULL | |||
| #define PROF_SCHEDULE_TRACE 0x0000010000000ULL | |||
| #define PROF_AIVECTORCORE_METRICS 0x0000020000000ULL | |||
| #define PROF_SUBTASK_TIME 0x0000040000000ULL | |||
| #define PROF_TASK_TRACE 0x0000005000062ULL | |||
| #define PROF_OP_DETAIL 0x0000080000000ULL | |||
| #define PROF_MODEL_LOAD 0x8000000000000000ULL | |||
| #define PROF_TASK_TRACE (PROF_MODEL_EXECUTE | PROF_RUNTIME_TRACE | PROF_TRAINING_TRACE | \ | |||
| PROF_HCCL_TRACE | PROF_TASK_TIME) | |||
| // DataTypeConfig MASK | |||
| #define PROF_ACL_API_MASK 0x00000001ULL | |||
| #define PROF_TASK_TIME_MASK 0x00000002ULL | |||
| @@ -55,6 +49,8 @@ | |||
| #define PROF_L2CACHE_MASK 0x00000010ULL | |||
| #define PROF_HCCL_TRACE_MASK 0x00000020ULL | |||
| #define PROF_TRAINING_TRACE_MASK 0x00000040ULL | |||
| #define PROF_MSPROFTX_MASK 0x00000080ULL | |||
| #define PROF_RUNTIME_API_MASK 0x00000100ULL | |||
| // system profilinig mask | |||
| #define PROF_CPU_MASK 0x00010000ULL | |||
| @@ -66,12 +62,12 @@ | |||
| #define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000ULL | |||
| #define PROF_MODEL_EXECUTE_MASK 0x0000001000000ULL | |||
| #define PROF_RUNTIME_API_MASK 0x0000002000000ULL | |||
| #define PROF_RUNTIME_TRACE_MASK 0x0000004000000ULL | |||
| #define PROF_SCHEDULE_TIMELINE_MASK 0x0000008000000ULL | |||
| #define PROF_SCHEDULE_TRACE_MASK 0x0000010000000ULL | |||
| #define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000ULL | |||
| #define PROF_SUBTASK_TIME_MASK 0x0000040000000ULL | |||
| #define PROF_OP_DETAIL_MASK 0x0000080000000ULL | |||
| #define PROF_MODEL_LOAD_MASK 0x8000000000000000ULL | |||
| @@ -135,6 +131,33 @@ MSVP_PROF_API Status aclgrphProfGraphUnSubscribe(const uint32_t graphId); | |||
| * @retval 0 for failed | |||
| */ | |||
| MSVP_PROF_API size_t aclprofGetGraphId(const void *opInfo, size_t opInfoLen, uint32_t index); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief set stamp pay load | |||
| * | |||
| * | |||
| * @retval void | |||
| */ | |||
| MSVP_PROF_API int aclprofSetStampPayload(void *stamp, const int32_t type, void *value); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief set category and name | |||
| * | |||
| * | |||
| * @retval void | |||
| */ | |||
| MSVP_PROF_API int aclprofSetCategoryName(uint32_t category, const char *categoryName); | |||
| /** | |||
| * @ingroup AscendCL | |||
| * @brief set category to stamp | |||
| * | |||
| * | |||
| * @retval void | |||
| */ | |||
| MSVP_PROF_API int aclprofSetStampCategory(void *stamp, uint32_t category); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -76,7 +76,8 @@ enum MsprofReporterModuleId { | |||
| MSPROF_MODULE_HCCL, // HCCL | |||
| MSPROF_MODULE_ACL, // AclModule | |||
| MSPROF_MODULE_FRAMEWORK, // Framework | |||
| MSPROF_MODULE_RUNTIME // runtime | |||
| MSPROF_MODULE_RUNTIME, // runtime | |||
| MSPROF_MODULE_MSPROF // msprofTx | |||
| }; | |||
| /** | |||
| @@ -0,0 +1,449 @@ | |||
| /* | |||
| * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. | |||
| * Description: handle perf data | |||
| * Author: Huawei Technologies Co., Ltd. | |||
| * Create: 2019-10-13 | |||
| */ | |||
| #ifndef MSPROFILER_PROF_COMMON_H_ | |||
| #define MSPROFILER_PROF_COMMON_H_ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif // __cplusplus | |||
| #include <stdint.h> | |||
| #define MSPROF_DATA_HEAD_MAGIC_NUM 0x5a5a | |||
| enum MsprofDataTag { | |||
| MSPROF_ACL_DATA_TAG = 0, //acl data tag, range: 0~19 | |||
| MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39 | |||
| MSPROF_GE_DATA_TAG_FUSION = 21, | |||
| MSPROF_GE_DATA_TAG_INFER = 22, | |||
| MSPROF_GE_DATA_TAG_TASK = 23, | |||
| MSPROF_GE_DATA_TAG_TENSOR = 24, | |||
| MSPROF_GE_DATA_TAG_STEP = 25, | |||
| MSPROF_GE_DATA_TAG_ID_MAP = 26, | |||
| MSPROF_GE_DATA_TAG_HOST_SCH = 27, | |||
| MSPROF_RUNTIME_DATA_TAG_API = 40, //runtime data tag, range: 40~59 | |||
| MSPROF_RUNTIME_DATA_TAG_TRACK = 41, | |||
| MSPROF_AICPU_DATA_TAG = 60, //aicpu data tag, range: 60~79 | |||
| MSPROF_HCCL_DATA_TAG = 80, //hccl data tag, range: 80~99 | |||
| MSPROF_DP_DATA_TAG = 100, //dp data tag, range: 100~119 | |||
| MSPROF_MSPROFTX_DATA_TAG = 120, //hccl data tag, range: 120~139 | |||
| MSPROF_DATA_TAG_MAX = 65536, //data tag value type is uint16_t | |||
| }; | |||
| /** | |||
| * @brief struct of mixed data | |||
| */ | |||
| #define MSPROF_MIX_DATA_RESERVE_BYTES 7 | |||
| #define MSPROF_MIX_DATA_STRING_LEN 120 | |||
| enum MsprofMixDataType { | |||
| MSPROF_MIX_DATA_HASH_ID = 0, | |||
| MSPROF_MIX_DATA_STRING, | |||
| }; | |||
| struct MsprofMixData { | |||
| uint8_t type; // MsprofMixDataType | |||
| uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES]; | |||
| union { | |||
| uint64_t hashId; | |||
| char dataStr[MSPROF_MIX_DATA_STRING_LEN]; | |||
| } data; | |||
| }; | |||
| /** | |||
| * @brief profiling command info | |||
| */ | |||
| #define MSPROF_MAX_DEV_NUM 64 | |||
| struct MsprofCommandHandle { | |||
| uint64_t profSwitch; | |||
| uint64_t profSwitchHi; | |||
| uint32_t devNums; | |||
| uint32_t devIdList[MSPROF_MAX_DEV_NUM]; | |||
| uint32_t modelId; | |||
| uint32_t type; | |||
| }; | |||
| /** | |||
| * @brief struct of data reported by acl | |||
| */ | |||
| #define MSPROF_ACL_DATA_RESERVE_BYTES 32 | |||
| #define MSPROF_ACL_API_NAME_LEN 64 | |||
| enum MsprofAclApiType { | |||
| MSPROF_ACL_API_TYPE_OP = 1, | |||
| MSPROF_ACL_API_TYPE_MODEL, | |||
| MSPROF_ACL_API_TYPE_RUNTIME, | |||
| MSPROF_ACL_API_TYPE_OTHERS, | |||
| }; | |||
| struct MsprofAclProfData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_ACL_DATA_TAG; | |||
| uint32_t apiType; // enum MsprofAclApiType | |||
| uint64_t beginTime; | |||
| uint64_t endTime; | |||
| uint32_t processId; | |||
| uint32_t threadId; | |||
| char apiName[MSPROF_ACL_API_NAME_LEN]; | |||
| uint8_t reserve[MSPROF_ACL_DATA_RESERVE_BYTES]; | |||
| }; | |||
| /** | |||
| * @brief struct of data reported by GE | |||
| */ | |||
| #define MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES 104 | |||
| struct MsprofGeProfModelLoadData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_GE_DATA_TAG_MODEL_LOAD; | |||
| uint32_t modelId; | |||
| MsprofMixData modelName; | |||
| uint64_t startTime; | |||
| uint64_t endTime; | |||
| uint8_t reserve[MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES]; | |||
| }; | |||
| #define MSPROF_GE_FUSION_DATA_RESERVE_BYTES 8 | |||
| #define MSPROF_GE_FUSION_OP_NUM 8 | |||
| struct MsprofGeProfFusionData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_GE_DATA_TAG_FUSION; | |||
| uint32_t modelId; | |||
| MsprofMixData fusionName; | |||
| uint64_t inputMemSize; | |||
| uint64_t outputMemSize; | |||
| uint64_t weightMemSize; | |||
| uint64_t workspaceMemSize; | |||
| uint64_t totalMemSize; | |||
| uint64_t fusionOpNum; | |||
| uint64_t fusionOp[MSPROF_GE_FUSION_OP_NUM]; | |||
| uint8_t reserve[MSPROF_GE_FUSION_DATA_RESERVE_BYTES]; | |||
| }; | |||
| #define MSPROF_GE_INFER_DATA_RESERVE_BYTES 64 | |||
| struct MsprofGeProfInferData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_GE_DATA_TAG_INFER; | |||
| uint32_t modelId; | |||
| MsprofMixData modelName; | |||
| uint32_t requestId; | |||
| uint32_t threadId; | |||
| uint64_t inputDataStartTime; | |||
| uint64_t inputDataEndTime; | |||
| uint64_t inferStartTime; | |||
| uint64_t inferEndTime; | |||
| uint64_t outputDataStartTime; | |||
| uint64_t outputDataEndTime; | |||
| uint8_t reserve[MSPROF_GE_INFER_DATA_RESERVE_BYTES]; | |||
| }; | |||
| #define MSPROF_GE_TASK_DATA_RESERVE_BYTES 16 | |||
| #define MSPROF_GE_OP_TYPE_LEN 56 | |||
| enum MsprofGeTaskType { | |||
| MSPROF_GE_TASK_TYPE_AI_CORE = 0, | |||
| MSPROF_GE_TASK_TYPE_AI_CPU, | |||
| MSPROF_GE_TASK_TYPE_AIV, | |||
| }; | |||
| enum MsprofGeShapeType { | |||
| MSPROF_GE_SHAPE_TYPE_STATIC = 0, | |||
| MSPROF_GE_SHAPE_TYPE_DYNAMIC, | |||
| }; | |||
| struct MsprofGeOpType { | |||
| uint8_t type; // MsprofMixDataType | |||
| uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES]; | |||
| union { | |||
| uint64_t hashId; | |||
| char dataStr[MSPROF_GE_OP_TYPE_LEN]; | |||
| } data; | |||
| }; | |||
| struct MsprofGeProfTaskData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_GE_DATA_TAG_TASK; | |||
| uint32_t taskType; // MsprofGeTaskType | |||
| MsprofMixData opName; | |||
| MsprofGeOpType opType; | |||
| uint64_t curIterNum; | |||
| uint64_t timeStamp; | |||
| uint32_t shapeType; // MsprofGeShapeType | |||
| uint32_t blockDims; | |||
| uint32_t modelId; | |||
| uint32_t streamId; | |||
| uint32_t taskId; | |||
| uint32_t threadId; | |||
| uint8_t reserve[MSPROF_GE_TASK_DATA_RESERVE_BYTES]; | |||
| }; | |||
| #define MSPROF_GE_TENSOR_DATA_RESERVE_BYTES 8 | |||
| #define MSPROF_GE_TENSOR_DATA_SHAPE_LEN 8 | |||
| #define MSPROF_GE_TENSOR_DATA_NUM 5 | |||
| enum MsprofGeTensorType { | |||
| MSPROF_GE_TENSOR_TYPE_INPUT = 0, | |||
| MSPROF_GE_TENSOR_TYPE_OUTPUT, | |||
| }; | |||
| struct MsprofGeTensorData { | |||
| uint32_t tensorType; // MsprofGeTensorType | |||
| uint32_t format; | |||
| uint32_t dataType; | |||
| uint32_t shape[MSPROF_GE_TENSOR_DATA_SHAPE_LEN]; | |||
| }; | |||
| struct MsprofGeProfTensorData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_GE_DATA_TAG_TENSOR; | |||
| uint32_t modelId; | |||
| uint64_t curIterNum; | |||
| uint32_t streamId; | |||
| uint32_t taskId; | |||
| uint32_t tensorNum; | |||
| MsprofGeTensorData tensorData[MSPROF_GE_TENSOR_DATA_NUM]; | |||
| uint8_t reserve[MSPROF_GE_TENSOR_DATA_RESERVE_BYTES]; | |||
| }; | |||
| #define MSPROF_GE_STEP_DATA_RESERVE_BYTES 27 | |||
| enum MsprofGeStepTag { | |||
| MSPROF_GE_STEP_TAG_BEGIN = 0, | |||
| MSPROF_GE_STEP_TAG_END, | |||
| }; | |||
| struct MsprofGeProfStepData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_GE_DATA_TAG_STEP; | |||
| uint32_t modelId; | |||
| uint32_t streamId; | |||
| uint32_t taskId; | |||
| uint64_t timeStamp; | |||
| uint64_t curIterNum; | |||
| uint32_t threadId; | |||
| uint8_t tag; // MsprofGeStepTag | |||
| uint8_t reserve[MSPROF_GE_STEP_DATA_RESERVE_BYTES]; | |||
| }; | |||
| #define MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES 6 | |||
| struct MsprofGeProfIdMapData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_GE_DATA_TAG_ID_MAP; | |||
| uint32_t graphId; | |||
| uint32_t modelId; | |||
| uint32_t sessionId; | |||
| uint64_t timeStamp; | |||
| uint16_t mode; | |||
| uint8_t reserve[MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES]; | |||
| }; | |||
| #define MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES 24 | |||
| struct MsprofGeProfHostSchData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_GE_DATA_TAG_HOST_SCH; | |||
| uint32_t threadId; // record in start event | |||
| uint64_t element; | |||
| uint64_t event; | |||
| uint64_t startTime; // record in start event | |||
| uint64_t endTime; // record in end event | |||
| uint8_t reserve[MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES]; | |||
| }; | |||
| /** | |||
| * @brief struct of data reported by RunTime | |||
| */ | |||
| #define MSPROF_RUNTIME_API_DATA_RESERVE_BYTES 106 | |||
| #define MSPROF_RUNTIME_TASK_ID_NUM 10 | |||
| #define MSPROF_RUNTIME_API_NAME_LEN 64 | |||
| struct MsprofRuntimeProfApiData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_API; | |||
| uint32_t threadId; | |||
| uint64_t entryTime; | |||
| uint64_t exitTime; | |||
| uint64_t dataSize; | |||
| uint8_t apiName[MSPROF_RUNTIME_API_NAME_LEN]; | |||
| uint32_t retCode; | |||
| uint32_t streamId; | |||
| uint32_t taskNum; | |||
| uint32_t taskId[MSPROF_RUNTIME_TASK_ID_NUM]; | |||
| uint16_t memcpyDirection; | |||
| uint8_t reserve[MSPROF_RUNTIME_API_DATA_RESERVE_BYTES]; | |||
| }; | |||
| #define MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES 10 | |||
| #define MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN 32 | |||
| struct MsprofRuntimeProfTrackData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_TRACK; | |||
| uint32_t threadId; | |||
| uint64_t timeStamp; | |||
| char taskType[MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN]; | |||
| uint32_t taskId; | |||
| uint16_t streamId; | |||
| uint8_t reserve[MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES]; | |||
| }; | |||
| /** | |||
| * @brief struct of data reported by RunTime | |||
| */ | |||
| #define MSPROF_AICPU_DATA_RESERVE_BYTES 9 | |||
| struct MsprofAicpuProfData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_AICPU_DATA_TAG; | |||
| uint16_t streamId; | |||
| uint16_t taskId; | |||
| uint64_t runStartTime; | |||
| uint64_t runStartTick; | |||
| uint64_t computeStartTime; | |||
| uint64_t memcpyStartTime; | |||
| uint64_t memcpyEndTime; | |||
| uint64_t runEndTime; | |||
| uint64_t runEndTick; | |||
| uint32_t threadId; | |||
| uint32_t deviceId; | |||
| uint64_t submitTick; | |||
| uint64_t scheduleTick; | |||
| uint64_t tickBeforeRun; | |||
| uint64_t tickAfterRun; | |||
| uint32_t kernelType; | |||
| uint32_t dispatchTime; | |||
| uint32_t totalTime; | |||
| uint16_t fftsThreadId; | |||
| uint8_t version; | |||
| uint8_t reserve[MSPROF_AICPU_DATA_RESERVE_BYTES]; | |||
| }; | |||
| /** | |||
| * @brief struct of data reported by DP | |||
| */ | |||
| #define MSPROF_DP_DATA_RESERVE_BYTES 16 | |||
| #define MSPROF_DP_DATA_ACTION_LEN 16 | |||
| #define MSPROF_DP_DATA_SOURCE_LEN 64 | |||
| struct MsprofDpProfData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_DP_DATA_TAG; | |||
| uint32_t rsv; // Ensure 8-byte alignment | |||
| uint64_t timeStamp; | |||
| char action[MSPROF_DP_DATA_ACTION_LEN]; | |||
| char source[MSPROF_DP_DATA_SOURCE_LEN]; | |||
| uint64_t index; | |||
| uint64_t size; | |||
| uint8_t reserve[MSPROF_DP_DATA_RESERVE_BYTES]; | |||
| }; | |||
| /** | |||
| * @brief struct of data reported by HCCL | |||
| */ | |||
| #pragma pack(4) | |||
| struct MsprofHcclProfNotify { | |||
| uint32_t taskID; | |||
| uint64_t notifyID; | |||
| uint32_t stage; | |||
| uint32_t remoteRank; | |||
| uint32_t transportType; | |||
| uint32_t role; // role {0: dst, 1:src} | |||
| double durationEstimated; | |||
| }; | |||
| struct MsprofHcclProfReduce { | |||
| uint32_t taskID; | |||
| uint64_t src; | |||
| uint64_t dst; | |||
| uint64_t size; | |||
| uint32_t op; // {0: sum, 1: mul, 2: max, 3: min} | |||
| uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64} | |||
| uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||
| uint32_t remoteRank; | |||
| uint32_t transportType; // transport type {0: SDMA, 1: RDMA, 2:LOCAL} | |||
| uint32_t role; // role {0: dst, 1:src} | |||
| double durationEstimated; | |||
| }; | |||
| struct MsprofHcclProfRDMA { | |||
| uint32_t taskID; | |||
| uint64_t src; | |||
| uint64_t dst; | |||
| uint64_t size; | |||
| uint64_t notifyID; | |||
| uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||
| uint32_t remoteRank; | |||
| uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} | |||
| uint32_t role; // role {0: dst, 1:src} | |||
| uint32_t type; // RDMA type {0: RDMASendNotify, 1:RDMASendPayload} | |||
| double durationEstimated; | |||
| }; | |||
| struct MsprofHcclProfMemcpy { | |||
| uint32_t taskID; | |||
| uint64_t src; | |||
| uint64_t dst; | |||
| uint64_t size; | |||
| uint64_t notifyID; | |||
| uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||
| uint32_t remoteRank; | |||
| uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} | |||
| uint32_t role; // role {0: dst, 1:src} | |||
| double durationEstimated; | |||
| }; | |||
| struct MsprofHcclProfStageStep { | |||
| uint32_t rank; | |||
| uint32_t rankSize; | |||
| }; | |||
| struct MsprofHcclProfFlag { | |||
| uint64_t cclTag; | |||
| uint64_t groupName; | |||
| uint32_t localRank; | |||
| uint32_t workFlowMode; | |||
| }; | |||
| /** | |||
| * @name MsprofHcclProfData | |||
| * @brief struct of data reported by hccl | |||
| */ | |||
| struct MsprofHcclProfData { | |||
| uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
| uint16_t dataTag = MSPROF_HCCL_DATA_TAG; | |||
| uint32_t planeID; | |||
| uint32_t deviceID; | |||
| uint32_t streamID; | |||
| double ts; | |||
| char name[16]; | |||
| union { | |||
| MsprofHcclProfNotify notify; | |||
| MsprofHcclProfReduce reduce; | |||
| MsprofHcclProfStageStep stageStep; | |||
| MsprofHcclProfMemcpy forMemcpy; | |||
| MsprofHcclProfRDMA RDMA; | |||
| MsprofHcclProfFlag flag; | |||
| } args; | |||
| }; | |||
| #pragma pack() | |||
| /** | |||
| * @name MsprofStampInfo | |||
| * @brief struct of data reported by msproftx | |||
| */ | |||
| struct MsprofStampInfo { | |||
| uint16_t magicNumber; | |||
| uint16_t dataTag; | |||
| uint32_t processId; | |||
| uint32_t threadId; | |||
| uint32_t category; //marker category | |||
| uint32_t eventType; | |||
| int32_t payloadType; | |||
| union PayloadValue //payload info for marker | |||
| { | |||
| uint64_t ullValue; | |||
| int64_t llValue; | |||
| double dValue; | |||
| uint32_t uiValue[2]; | |||
| int32_t iValue[2]; | |||
| float fValue[2]; | |||
| } payload; | |||
| uint64_t startTime; | |||
| uint64_t endTime; | |||
| int32_t messageType; | |||
| char message[128]; | |||
| uint8_t reserve0[4]; | |||
| uint8_t reserve1[72]; | |||
| }; | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MSPROFILER_PROF_COMMON_H_ | |||